All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/amdkfd: Adjust function sequences to avoid unnecessary declarations
@ 2019-10-31  0:17 ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-10-31  0:17 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Zhao, Yong

This is cleaner.

Change-Id: I8cdecad387d8c547a088c6050f77385ee1135be1
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 9a4bafb2e175..3b5ca2b1d7a6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -26,18 +26,6 @@
 #include "kfd_pm4_headers_ai.h"
 #include "kfd_pm4_opcodes.h"
 
-static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v9(struct kernel_queue *kq);
-static void submit_packet_v9(struct kernel_queue *kq);
-
-void kernel_queue_init_v9(struct kernel_queue_ops *ops)
-{
-	ops->initialize = initialize_v9;
-	ops->uninitialize = uninitialize_v9;
-	ops->submit_packet = submit_packet_v9;
-}
-
 static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
 			enum kfd_queue_type type, unsigned int queue_size)
 {
@@ -67,6 +55,13 @@ static void submit_packet_v9(struct kernel_queue *kq)
 				kq->pending_wptr64);
 }
 
+void kernel_queue_init_v9(struct kernel_queue_ops *ops)
+{
+	ops->initialize = initialize_v9;
+	ops->uninitialize = uninitialize_v9;
+	ops->submit_packet = submit_packet_v9;
+}
+
 static int pm_map_process_v9(struct packet_manager *pm,
 		uint32_t *buffer, struct qcm_process_device *qpd)
 {
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 1/3] drm/amdkfd: Adjust function sequences to avoid unnecessary declarations
@ 2019-10-31  0:17 ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-10-31  0:17 UTC (permalink / raw)
  To: amd-gfx; +Cc: Zhao, Yong

This is cleaner.

Change-Id: I8cdecad387d8c547a088c6050f77385ee1135be1
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 9a4bafb2e175..3b5ca2b1d7a6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -26,18 +26,6 @@
 #include "kfd_pm4_headers_ai.h"
 #include "kfd_pm4_opcodes.h"
 
-static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v9(struct kernel_queue *kq);
-static void submit_packet_v9(struct kernel_queue *kq);
-
-void kernel_queue_init_v9(struct kernel_queue_ops *ops)
-{
-	ops->initialize = initialize_v9;
-	ops->uninitialize = uninitialize_v9;
-	ops->submit_packet = submit_packet_v9;
-}
-
 static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
 			enum kfd_queue_type type, unsigned int queue_size)
 {
@@ -67,6 +55,13 @@ static void submit_packet_v9(struct kernel_queue *kq)
 				kq->pending_wptr64);
 }
 
+void kernel_queue_init_v9(struct kernel_queue_ops *ops)
+{
+	ops->initialize = initialize_v9;
+	ops->uninitialize = uninitialize_v9;
+	ops->submit_packet = submit_packet_v9;
+}
+
 static int pm_map_process_v9(struct packet_manager *pm,
 		uint32_t *buffer, struct qcm_process_device *qpd)
 {
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-10-31  0:17     ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-10-31  0:17 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Zhao, Yong

release_mem won't be used at all on GFX9 and GFX10, so delete it.

Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
 2 files changed, 4 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
index aed32ab7102e..bfd6221acae9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
@@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
 	return 0;
 }
 
-
-static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
-{
-	struct pm4_mec_release_mem *packet;
-
-	WARN_ON(!buffer);
-
-	packet = (struct pm4_mec_release_mem *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
-
-	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
-					sizeof(struct pm4_mec_release_mem));
-
-	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
-	packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
-	packet->bitfields2.tcl1_action_ena = 1;
-	packet->bitfields2.tc_action_ena = 1;
-	packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
-
-	packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
-	packet->bitfields3.int_sel =
-		int_sel__mec_release_mem__send_interrupt_after_write_confirm;
-
-	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
-	packet->address_hi = upper_32_bits(gpu_addr);
-
-	packet->data_lo = 0;
-
-	return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
-}
-
 const struct packet_manager_funcs kfd_v10_pm_funcs = {
 	.map_process			= pm_map_process_v10,
 	.runlist			= pm_runlist_v10,
@@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
 	.map_queues			= pm_map_queues_v10,
 	.unmap_queues			= pm_unmap_queues_v10,
 	.query_status			= pm_query_status_v10,
-	.release_mem			= pm_release_mem_v10,
+	.release_mem			= NULL,
 	.map_process_size		= sizeof(struct pm4_mes_map_process),
 	.runlist_size			= sizeof(struct pm4_mes_runlist),
 	.set_resources_size		= sizeof(struct pm4_mes_set_resources),
 	.map_queues_size		= sizeof(struct pm4_mes_map_queues),
 	.unmap_queues_size		= sizeof(struct pm4_mes_unmap_queues),
 	.query_status_size		= sizeof(struct pm4_mes_query_status),
-	.release_mem_size		= sizeof(struct pm4_mec_release_mem)
+	.release_mem_size		= 0,
 };
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 3b5ca2b1d7a6..f0e4910a8865 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
 	return 0;
 }
 
-
-static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
-{
-	struct pm4_mec_release_mem *packet;
-
-	packet = (struct pm4_mec_release_mem *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
-
-	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
-					sizeof(struct pm4_mec_release_mem));
-
-	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
-	packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
-	packet->bitfields2.tcl1_action_ena = 1;
-	packet->bitfields2.tc_action_ena = 1;
-	packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
-
-	packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
-	packet->bitfields3.int_sel =
-		int_sel__mec_release_mem__send_interrupt_after_write_confirm;
-
-	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
-	packet->address_hi = upper_32_bits(gpu_addr);
-
-	packet->data_lo = 0;
-
-	return 0;
-}
-
 const struct packet_manager_funcs kfd_v9_pm_funcs = {
 	.map_process		= pm_map_process_v9,
 	.runlist		= pm_runlist_v9,
@@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
 	.map_queues		= pm_map_queues_v9,
 	.unmap_queues		= pm_unmap_queues_v9,
 	.query_status		= pm_query_status_v9,
-	.release_mem		= pm_release_mem_v9,
+	.release_mem		= NULL,
 	.map_process_size	= sizeof(struct pm4_mes_map_process),
 	.runlist_size		= sizeof(struct pm4_mes_runlist),
 	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
 	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
 	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
 	.query_status_size	= sizeof(struct pm4_mes_query_status),
-	.release_mem_size	= sizeof(struct pm4_mec_release_mem)
+	.release_mem_size	= 0,
 };
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-10-31  0:17     ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-10-31  0:17 UTC (permalink / raw)
  To: amd-gfx; +Cc: Zhao, Yong

release_mem won't be used at all on GFX9 and GFX10, so delete it.

Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
 2 files changed, 4 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
index aed32ab7102e..bfd6221acae9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
@@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
 	return 0;
 }
 
-
-static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
-{
-	struct pm4_mec_release_mem *packet;
-
-	WARN_ON(!buffer);
-
-	packet = (struct pm4_mec_release_mem *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
-
-	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
-					sizeof(struct pm4_mec_release_mem));
-
-	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
-	packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
-	packet->bitfields2.tcl1_action_ena = 1;
-	packet->bitfields2.tc_action_ena = 1;
-	packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
-
-	packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
-	packet->bitfields3.int_sel =
-		int_sel__mec_release_mem__send_interrupt_after_write_confirm;
-
-	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
-	packet->address_hi = upper_32_bits(gpu_addr);
-
-	packet->data_lo = 0;
-
-	return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
-}
-
 const struct packet_manager_funcs kfd_v10_pm_funcs = {
 	.map_process			= pm_map_process_v10,
 	.runlist			= pm_runlist_v10,
@@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
 	.map_queues			= pm_map_queues_v10,
 	.unmap_queues			= pm_unmap_queues_v10,
 	.query_status			= pm_query_status_v10,
-	.release_mem			= pm_release_mem_v10,
+	.release_mem			= NULL,
 	.map_process_size		= sizeof(struct pm4_mes_map_process),
 	.runlist_size			= sizeof(struct pm4_mes_runlist),
 	.set_resources_size		= sizeof(struct pm4_mes_set_resources),
 	.map_queues_size		= sizeof(struct pm4_mes_map_queues),
 	.unmap_queues_size		= sizeof(struct pm4_mes_unmap_queues),
 	.query_status_size		= sizeof(struct pm4_mes_query_status),
-	.release_mem_size		= sizeof(struct pm4_mec_release_mem)
+	.release_mem_size		= 0,
 };
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 3b5ca2b1d7a6..f0e4910a8865 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
 	return 0;
 }
 
-
-static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
-{
-	struct pm4_mec_release_mem *packet;
-
-	packet = (struct pm4_mec_release_mem *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
-
-	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
-					sizeof(struct pm4_mec_release_mem));
-
-	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
-	packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
-	packet->bitfields2.tcl1_action_ena = 1;
-	packet->bitfields2.tc_action_ena = 1;
-	packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
-
-	packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
-	packet->bitfields3.int_sel =
-		int_sel__mec_release_mem__send_interrupt_after_write_confirm;
-
-	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
-	packet->address_hi = upper_32_bits(gpu_addr);
-
-	packet->data_lo = 0;
-
-	return 0;
-}
-
 const struct packet_manager_funcs kfd_v9_pm_funcs = {
 	.map_process		= pm_map_process_v9,
 	.runlist		= pm_runlist_v9,
@@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
 	.map_queues		= pm_map_queues_v9,
 	.unmap_queues		= pm_unmap_queues_v9,
 	.query_status		= pm_query_status_v9,
-	.release_mem		= pm_release_mem_v9,
+	.release_mem		= NULL,
 	.map_process_size	= sizeof(struct pm4_mes_map_process),
 	.runlist_size		= sizeof(struct pm4_mes_runlist),
 	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
 	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
 	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
 	.query_status_size	= sizeof(struct pm4_mes_query_status),
-	.release_mem_size	= sizeof(struct pm4_mec_release_mem)
+	.release_mem_size	= 0,
 };
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 3/3] drm/amdkfd: Use kernel queue v9 functions for v10
@ 2019-10-31  0:17     ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-10-31  0:17 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Zhao, Yong

The kernel queue functions for v9 and v10 are the same except
pm_map_process_v* which have small difference, so they should be reused.
This eliminates the need of reapplying several patches which were
applied on v9 but not on v10, such as bigger GWS and more than 2
SDMA engine support which were introduced on Arcturus.

Change-Id: I2d385961e3c884db14e30b5afc98d0d9e4cb1802
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/Makefile           |   1 -
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |   4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |   1 -
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 317 ------------------
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  49 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   3 -
 6 files changed, 44 insertions(+), 331 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 48155060a57c..017a8b7156da 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -41,7 +41,6 @@ AMDKFD_FILES	:= $(AMDKFD_PATH)/kfd_module.o \
 		$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
 		$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
 		$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
-		$(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
 		$(AMDKFD_PATH)/kfd_packet_manager.o \
 		$(AMDKFD_PATH)/kfd_process_queue_manager.o \
 		$(AMDKFD_PATH)/kfd_device_queue_manager.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 11d244891393..0d966408ea87 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -332,12 +332,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 	case CHIP_RAVEN:
 	case CHIP_RENOIR:
 	case CHIP_ARCTURUS:
-		kernel_queue_init_v9(&kq->ops_asic_specific);
-		break;
 	case CHIP_NAVI10:
 	case CHIP_NAVI12:
 	case CHIP_NAVI14:
-		kernel_queue_init_v10(&kq->ops_asic_specific);
+		kernel_queue_init_v9(&kq->ops_asic_specific);
 		break;
 	default:
 		WARN(1, "Unexpected ASIC family %u",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 365fc674fea4..a7116a939029 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -102,6 +102,5 @@ struct kernel_queue {
 void kernel_queue_init_cik(struct kernel_queue_ops *ops);
 void kernel_queue_init_vi(struct kernel_queue_ops *ops);
 void kernel_queue_init_v9(struct kernel_queue_ops *ops);
-void kernel_queue_init_v10(struct kernel_queue_ops *ops);
 
 #endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
deleted file mode 100644
index bfd6221acae9..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-#include "kfd_device_queue_manager.h"
-#include "kfd_pm4_headers_ai.h"
-#include "kfd_pm4_opcodes.h"
-#include "gc/gc_10_1_0_sh_mask.h"
-
-static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v10(struct kernel_queue *kq);
-static void submit_packet_v10(struct kernel_queue *kq);
-
-void kernel_queue_init_v10(struct kernel_queue_ops *ops)
-{
-	ops->initialize = initialize_v10;
-	ops->uninitialize = uninitialize_v10;
-	ops->submit_packet = submit_packet_v10;
-}
-
-static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size)
-{
-	int retval;
-
-	retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
-	if (retval != 0)
-		return false;
-
-	kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
-	kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
-
-	memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
-
-	return true;
-}
-
-static void uninitialize_v10(struct kernel_queue *kq)
-{
-	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
-}
-
-static void submit_packet_v10(struct kernel_queue *kq)
-{
-	*kq->wptr64_kernel = kq->pending_wptr64;
-	write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
-				kq->pending_wptr64);
-}
-
-static int pm_map_process_v10(struct packet_manager *pm,
-		uint32_t *buffer, struct qcm_process_device *qpd)
-{
-	struct pm4_mes_map_process *packet;
-	uint64_t vm_page_table_base_addr = qpd->page_table_base;
-
-	packet = (struct pm4_mes_map_process *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
-	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
-					sizeof(struct pm4_mes_map_process));
-	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
-	packet->bitfields2.process_quantum = 1;
-	packet->bitfields2.pasid = qpd->pqm->process->pasid;
-	packet->bitfields14.gds_size = qpd->gds_size;
-	packet->bitfields14.num_gws = qpd->num_gws;
-	packet->bitfields14.num_oac = qpd->num_oac;
-	packet->bitfields14.sdma_enable = 1;
-
-	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
-
-	packet->sh_mem_config = qpd->sh_mem_config;
-	packet->sh_mem_bases = qpd->sh_mem_bases;
-	if (qpd->tba_addr) {
-		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
-		packet->sq_shader_tba_hi = (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) |
-			upper_32_bits(qpd->tba_addr >> 8);
-		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
-		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
-	}
-
-	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
-	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
-
-	packet->vm_context_page_table_base_addr_lo32 =
-			lower_32_bits(vm_page_table_base_addr);
-	packet->vm_context_page_table_base_addr_hi32 =
-			upper_32_bits(vm_page_table_base_addr);
-
-	return 0;
-}
-
-static int pm_runlist_v10(struct packet_manager *pm, uint32_t *buffer,
-			uint64_t ib, size_t ib_size_in_dwords, bool chain)
-{
-	struct pm4_mes_runlist *packet;
-
-	int concurrent_proc_cnt = 0;
-	struct kfd_dev *kfd = pm->dqm->dev;
-
-	/* Determine the number of processes to map together to HW:
-	 * it can not exceed the number of VMIDs available to the
-	 * scheduler, and it is determined by the smaller of the number
-	 * of processes in the runlist and kfd module parameter
-	 * hws_max_conc_proc.
-	 * Note: the arbitration between the number of VMIDs and
-	 * hws_max_conc_proc has been done in
-	 * kgd2kfd_device_init().
-	 */
-	concurrent_proc_cnt = min(pm->dqm->processes_count,
-			kfd->max_proc_per_quantum);
-
-
-	packet = (struct pm4_mes_runlist *)buffer;
-
-	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
-	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
-						sizeof(struct pm4_mes_runlist));
-
-	packet->bitfields4.ib_size = ib_size_in_dwords;
-	packet->bitfields4.chain = chain ? 1 : 0;
-	packet->bitfields4.offload_polling = 0;
-	packet->bitfields4.valid = 1;
-	packet->bitfields4.process_cnt = concurrent_proc_cnt;
-	packet->ordinal2 = lower_32_bits(ib);
-	packet->ib_base_hi = upper_32_bits(ib);
-
-	return 0;
-}
-
-static int pm_map_queues_v10(struct packet_manager *pm, uint32_t *buffer,
-		struct queue *q, bool is_static)
-{
-	struct pm4_mes_map_queues *packet;
-	bool use_static = is_static;
-
-	packet = (struct pm4_mes_map_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
-
-	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
-					sizeof(struct pm4_mes_map_queues));
-	packet->bitfields2.num_queues = 1;
-	packet->bitfields2.queue_sel =
-		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
-
-	packet->bitfields2.engine_sel =
-		engine_sel__mes_map_queues__compute_vi;
-	packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_compute_vi;
-
-	switch (q->properties.type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-		if (use_static)
-			packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_latency_static_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.queue_type =
-			queue_type__mes_map_queues__debug_interface_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-	case KFD_QUEUE_TYPE_SDMA_XGMI:
-		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
-				engine_sel__mes_map_queues__sdma0_vi;
-		use_static = false; /* no static queues under SDMA */
-		break;
-	default:
-		WARN(1, "queue type %d\n", q->properties.type);
-		return -EINVAL;
-	}
-	packet->bitfields3.doorbell_offset =
-			q->properties.doorbell_off;
-
-	packet->mqd_addr_lo =
-			lower_32_bits(q->gart_mqd_addr);
-
-	packet->mqd_addr_hi =
-			upper_32_bits(q->gart_mqd_addr);
-
-	packet->wptr_addr_lo =
-			lower_32_bits((uint64_t)q->properties.write_ptr);
-
-	packet->wptr_addr_hi =
-			upper_32_bits((uint64_t)q->properties.write_ptr);
-
-	return 0;
-}
-
-static int pm_unmap_queues_v10(struct packet_manager *pm, uint32_t *buffer,
-			enum kfd_queue_type type,
-			enum kfd_unmap_queues_filter filter,
-			uint32_t filter_param, bool reset,
-			unsigned int sdma_engine)
-{
-	struct pm4_mes_unmap_queues *packet;
-
-	packet = (struct pm4_mes_unmap_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
-
-	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
-					sizeof(struct pm4_mes_unmap_queues));
-	switch (type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.engine_sel =
-			engine_sel__mes_unmap_queues__compute;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-	case KFD_QUEUE_TYPE_SDMA_XGMI:
-		packet->bitfields2.engine_sel =
-			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
-		break;
-	default:
-		WARN(1, "queue type %d\n", type);
-		break;
-	}
-
-	if (reset)
-		packet->bitfields2.action =
-			action__mes_unmap_queues__reset_queues;
-	else
-		packet->bitfields2.action =
-			action__mes_unmap_queues__preempt_queues;
-
-	switch (filter) {
-	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
-		packet->bitfields2.num_queues = 1;
-		packet->bitfields3b.doorbell_offset0 = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
-		packet->bitfields3a.pasid = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__unmap_all_queues;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
-		/* in this case, we do not preempt static queues */
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
-		break;
-	default:
-		WARN(1, "filter %d\n", filter);
-		break;
-	}
-
-	return 0;
-
-}
-
-static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
-			uint64_t fence_address,	uint32_t fence_value)
-{
-	struct pm4_mes_query_status *packet;
-
-	packet = (struct pm4_mes_query_status *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
-
-
-	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
-					sizeof(struct pm4_mes_query_status));
-
-	packet->bitfields2.context_id = 0;
-	packet->bitfields2.interrupt_sel =
-			interrupt_sel__mes_query_status__completion_status;
-	packet->bitfields2.command =
-			command__mes_query_status__fence_only_after_write_ack;
-
-	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
-	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
-	packet->data_hi = upper_32_bits((uint64_t)fence_value);
-	packet->data_lo = lower_32_bits((uint64_t)fence_value);
-
-	return 0;
-}
-
-const struct packet_manager_funcs kfd_v10_pm_funcs = {
-	.map_process			= pm_map_process_v10,
-	.runlist			= pm_runlist_v10,
-	.set_resources			= pm_set_resources_vi,
-	.map_queues			= pm_map_queues_v10,
-	.unmap_queues			= pm_unmap_queues_v10,
-	.query_status			= pm_query_status_v10,
-	.release_mem			= NULL,
-	.map_process_size		= sizeof(struct pm4_mes_map_process),
-	.runlist_size			= sizeof(struct pm4_mes_runlist),
-	.set_resources_size		= sizeof(struct pm4_mes_set_resources),
-	.map_queues_size		= sizeof(struct pm4_mes_map_queues),
-	.unmap_queues_size		= sizeof(struct pm4_mes_unmap_queues),
-	.query_status_size		= sizeof(struct pm4_mes_query_status),
-	.release_mem_size		= 0,
-};
-
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index f0e4910a8865..d8f7343bfe71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -62,8 +62,9 @@ void kernel_queue_init_v9(struct kernel_queue_ops *ops)
 	ops->submit_packet = submit_packet_v9;
 }
 
-static int pm_map_process_v9(struct packet_manager *pm,
-		uint32_t *buffer, struct qcm_process_device *qpd)
+static int pm_map_process_v9_base(struct packet_manager *pm,
+		uint32_t *buffer, struct qcm_process_device *qpd,
+		unsigned int sq_shader_tba_hi_trap_en_shift)
 {
 	struct pm4_mes_map_process *packet;
 	uint64_t vm_page_table_base_addr = qpd->page_table_base;
@@ -85,10 +86,16 @@ static int pm_map_process_v9(struct packet_manager *pm,
 
 	packet->sh_mem_config = qpd->sh_mem_config;
 	packet->sh_mem_bases = qpd->sh_mem_bases;
-	packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
-	packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
-	packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
-	packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+	if (qpd->tba_addr) {
+		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+		packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
+		if (sq_shader_tba_hi_trap_en_shift) {
+			packet->sq_shader_tba_hi |=
+					1 << sq_shader_tba_hi_trap_en_shift;
+		}
+		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+	}
 
 	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
 	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
@@ -101,6 +108,11 @@ static int pm_map_process_v9(struct packet_manager *pm,
 	return 0;
 }
 
+static int pm_map_process_v9(struct packet_manager *pm,
+		uint32_t *buffer, struct qcm_process_device *qpd) {
+	return pm_map_process_v9_base(pm, buffer, qpd, 0);
+}
+
 static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
 			uint64_t ib, size_t ib_size_in_dwords, bool chain)
 {
@@ -352,3 +364,28 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
 	.query_status_size	= sizeof(struct pm4_mes_query_status),
 	.release_mem_size	= 0,
 };
+
+#include "gc/gc_10_1_0_sh_mask.h"
+
+static int pm_map_process_v10(struct packet_manager *pm,
+		uint32_t *buffer, struct qcm_process_device *qpd) {
+	return pm_map_process_v9_base(pm, buffer, qpd,
+			SQ_SHADER_TBA_HI__TRAP_EN__SHIFT);
+}
+
+const struct packet_manager_funcs kfd_v10_pm_funcs = {
+	.map_process		= pm_map_process_v10,
+	.runlist		= pm_runlist_v9,
+	.set_resources		= pm_set_resources_v9,
+	.map_queues		= pm_map_queues_v9,
+	.unmap_queues		= pm_unmap_queues_v9,
+	.query_status		= pm_query_status_v9,
+	.release_mem		= NULL,
+	.map_process_size	= sizeof(struct pm4_mes_map_process),
+	.runlist_size		= sizeof(struct pm4_mes_runlist),
+	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
+	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
+	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
+	.query_status_size	= sizeof(struct pm4_mes_query_status),
+	.release_mem_size	= 0,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 62db4d20ed32..5127ddee24c8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -996,9 +996,6 @@ void pm_release_ib(struct packet_manager *pm);
 
 /* Following PM funcs can be shared among VI and AI */
 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
-int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
-			struct scheduling_resources *res);
-
 
 uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 3/3] drm/amdkfd: Use kernel queue v9 functions for v10
@ 2019-10-31  0:17     ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-10-31  0:17 UTC (permalink / raw)
  To: amd-gfx; +Cc: Zhao, Yong

The kernel queue functions for v9 and v10 are the same except
pm_map_process_v* which have small difference, so they should be reused.
This eliminates the need of reapplying several patches which were
applied on v9 but not on v10, such as bigger GWS and more than 2
SDMA engine support which were introduced on Arcturus.

Change-Id: I2d385961e3c884db14e30b5afc98d0d9e4cb1802
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/Makefile           |   1 -
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |   4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |   1 -
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 317 ------------------
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  49 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   3 -
 6 files changed, 44 insertions(+), 331 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 48155060a57c..017a8b7156da 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -41,7 +41,6 @@ AMDKFD_FILES	:= $(AMDKFD_PATH)/kfd_module.o \
 		$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
 		$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
 		$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
-		$(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
 		$(AMDKFD_PATH)/kfd_packet_manager.o \
 		$(AMDKFD_PATH)/kfd_process_queue_manager.o \
 		$(AMDKFD_PATH)/kfd_device_queue_manager.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 11d244891393..0d966408ea87 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -332,12 +332,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 	case CHIP_RAVEN:
 	case CHIP_RENOIR:
 	case CHIP_ARCTURUS:
-		kernel_queue_init_v9(&kq->ops_asic_specific);
-		break;
 	case CHIP_NAVI10:
 	case CHIP_NAVI12:
 	case CHIP_NAVI14:
-		kernel_queue_init_v10(&kq->ops_asic_specific);
+		kernel_queue_init_v9(&kq->ops_asic_specific);
 		break;
 	default:
 		WARN(1, "Unexpected ASIC family %u",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 365fc674fea4..a7116a939029 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -102,6 +102,5 @@ struct kernel_queue {
 void kernel_queue_init_cik(struct kernel_queue_ops *ops);
 void kernel_queue_init_vi(struct kernel_queue_ops *ops);
 void kernel_queue_init_v9(struct kernel_queue_ops *ops);
-void kernel_queue_init_v10(struct kernel_queue_ops *ops);
 
 #endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
deleted file mode 100644
index bfd6221acae9..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright 2018 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-#include "kfd_device_queue_manager.h"
-#include "kfd_pm4_headers_ai.h"
-#include "kfd_pm4_opcodes.h"
-#include "gc/gc_10_1_0_sh_mask.h"
-
-static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_v10(struct kernel_queue *kq);
-static void submit_packet_v10(struct kernel_queue *kq);
-
-void kernel_queue_init_v10(struct kernel_queue_ops *ops)
-{
-	ops->initialize = initialize_v10;
-	ops->uninitialize = uninitialize_v10;
-	ops->submit_packet = submit_packet_v10;
-}
-
-static bool initialize_v10(struct kernel_queue *kq, struct kfd_dev *dev,
-			enum kfd_queue_type type, unsigned int queue_size)
-{
-	int retval;
-
-	retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
-	if (retval != 0)
-		return false;
-
-	kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
-	kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
-
-	memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
-
-	return true;
-}
-
-static void uninitialize_v10(struct kernel_queue *kq)
-{
-	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
-}
-
-static void submit_packet_v10(struct kernel_queue *kq)
-{
-	*kq->wptr64_kernel = kq->pending_wptr64;
-	write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
-				kq->pending_wptr64);
-}
-
-static int pm_map_process_v10(struct packet_manager *pm,
-		uint32_t *buffer, struct qcm_process_device *qpd)
-{
-	struct pm4_mes_map_process *packet;
-	uint64_t vm_page_table_base_addr = qpd->page_table_base;
-
-	packet = (struct pm4_mes_map_process *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
-	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
-					sizeof(struct pm4_mes_map_process));
-	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
-	packet->bitfields2.process_quantum = 1;
-	packet->bitfields2.pasid = qpd->pqm->process->pasid;
-	packet->bitfields14.gds_size = qpd->gds_size;
-	packet->bitfields14.num_gws = qpd->num_gws;
-	packet->bitfields14.num_oac = qpd->num_oac;
-	packet->bitfields14.sdma_enable = 1;
-
-	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
-
-	packet->sh_mem_config = qpd->sh_mem_config;
-	packet->sh_mem_bases = qpd->sh_mem_bases;
-	if (qpd->tba_addr) {
-		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
-		packet->sq_shader_tba_hi = (1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) |
-			upper_32_bits(qpd->tba_addr >> 8);
-		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
-		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
-	}
-
-	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
-	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
-
-	packet->vm_context_page_table_base_addr_lo32 =
-			lower_32_bits(vm_page_table_base_addr);
-	packet->vm_context_page_table_base_addr_hi32 =
-			upper_32_bits(vm_page_table_base_addr);
-
-	return 0;
-}
-
-static int pm_runlist_v10(struct packet_manager *pm, uint32_t *buffer,
-			uint64_t ib, size_t ib_size_in_dwords, bool chain)
-{
-	struct pm4_mes_runlist *packet;
-
-	int concurrent_proc_cnt = 0;
-	struct kfd_dev *kfd = pm->dqm->dev;
-
-	/* Determine the number of processes to map together to HW:
-	 * it can not exceed the number of VMIDs available to the
-	 * scheduler, and it is determined by the smaller of the number
-	 * of processes in the runlist and kfd module parameter
-	 * hws_max_conc_proc.
-	 * Note: the arbitration between the number of VMIDs and
-	 * hws_max_conc_proc has been done in
-	 * kgd2kfd_device_init().
-	 */
-	concurrent_proc_cnt = min(pm->dqm->processes_count,
-			kfd->max_proc_per_quantum);
-
-
-	packet = (struct pm4_mes_runlist *)buffer;
-
-	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
-	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
-						sizeof(struct pm4_mes_runlist));
-
-	packet->bitfields4.ib_size = ib_size_in_dwords;
-	packet->bitfields4.chain = chain ? 1 : 0;
-	packet->bitfields4.offload_polling = 0;
-	packet->bitfields4.valid = 1;
-	packet->bitfields4.process_cnt = concurrent_proc_cnt;
-	packet->ordinal2 = lower_32_bits(ib);
-	packet->ib_base_hi = upper_32_bits(ib);
-
-	return 0;
-}
-
-static int pm_map_queues_v10(struct packet_manager *pm, uint32_t *buffer,
-		struct queue *q, bool is_static)
-{
-	struct pm4_mes_map_queues *packet;
-	bool use_static = is_static;
-
-	packet = (struct pm4_mes_map_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
-
-	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
-					sizeof(struct pm4_mes_map_queues));
-	packet->bitfields2.num_queues = 1;
-	packet->bitfields2.queue_sel =
-		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
-
-	packet->bitfields2.engine_sel =
-		engine_sel__mes_map_queues__compute_vi;
-	packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_compute_vi;
-
-	switch (q->properties.type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-		if (use_static)
-			packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_latency_static_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.queue_type =
-			queue_type__mes_map_queues__debug_interface_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-	case KFD_QUEUE_TYPE_SDMA_XGMI:
-		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
-				engine_sel__mes_map_queues__sdma0_vi;
-		use_static = false; /* no static queues under SDMA */
-		break;
-	default:
-		WARN(1, "queue type %d\n", q->properties.type);
-		return -EINVAL;
-	}
-	packet->bitfields3.doorbell_offset =
-			q->properties.doorbell_off;
-
-	packet->mqd_addr_lo =
-			lower_32_bits(q->gart_mqd_addr);
-
-	packet->mqd_addr_hi =
-			upper_32_bits(q->gart_mqd_addr);
-
-	packet->wptr_addr_lo =
-			lower_32_bits((uint64_t)q->properties.write_ptr);
-
-	packet->wptr_addr_hi =
-			upper_32_bits((uint64_t)q->properties.write_ptr);
-
-	return 0;
-}
-
-static int pm_unmap_queues_v10(struct packet_manager *pm, uint32_t *buffer,
-			enum kfd_queue_type type,
-			enum kfd_unmap_queues_filter filter,
-			uint32_t filter_param, bool reset,
-			unsigned int sdma_engine)
-{
-	struct pm4_mes_unmap_queues *packet;
-
-	packet = (struct pm4_mes_unmap_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
-
-	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
-					sizeof(struct pm4_mes_unmap_queues));
-	switch (type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.engine_sel =
-			engine_sel__mes_unmap_queues__compute;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-	case KFD_QUEUE_TYPE_SDMA_XGMI:
-		packet->bitfields2.engine_sel =
-			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
-		break;
-	default:
-		WARN(1, "queue type %d\n", type);
-		break;
-	}
-
-	if (reset)
-		packet->bitfields2.action =
-			action__mes_unmap_queues__reset_queues;
-	else
-		packet->bitfields2.action =
-			action__mes_unmap_queues__preempt_queues;
-
-	switch (filter) {
-	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
-		packet->bitfields2.num_queues = 1;
-		packet->bitfields3b.doorbell_offset0 = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
-		packet->bitfields3a.pasid = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__unmap_all_queues;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
-		/* in this case, we do not preempt static queues */
-		packet->bitfields2.queue_sel =
-			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
-		break;
-	default:
-		WARN(1, "filter %d\n", filter);
-		break;
-	}
-
-	return 0;
-
-}
-
-static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
-			uint64_t fence_address,	uint32_t fence_value)
-{
-	struct pm4_mes_query_status *packet;
-
-	packet = (struct pm4_mes_query_status *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
-
-
-	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
-					sizeof(struct pm4_mes_query_status));
-
-	packet->bitfields2.context_id = 0;
-	packet->bitfields2.interrupt_sel =
-			interrupt_sel__mes_query_status__completion_status;
-	packet->bitfields2.command =
-			command__mes_query_status__fence_only_after_write_ack;
-
-	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
-	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
-	packet->data_hi = upper_32_bits((uint64_t)fence_value);
-	packet->data_lo = lower_32_bits((uint64_t)fence_value);
-
-	return 0;
-}
-
-const struct packet_manager_funcs kfd_v10_pm_funcs = {
-	.map_process			= pm_map_process_v10,
-	.runlist			= pm_runlist_v10,
-	.set_resources			= pm_set_resources_vi,
-	.map_queues			= pm_map_queues_v10,
-	.unmap_queues			= pm_unmap_queues_v10,
-	.query_status			= pm_query_status_v10,
-	.release_mem			= NULL,
-	.map_process_size		= sizeof(struct pm4_mes_map_process),
-	.runlist_size			= sizeof(struct pm4_mes_runlist),
-	.set_resources_size		= sizeof(struct pm4_mes_set_resources),
-	.map_queues_size		= sizeof(struct pm4_mes_map_queues),
-	.unmap_queues_size		= sizeof(struct pm4_mes_unmap_queues),
-	.query_status_size		= sizeof(struct pm4_mes_query_status),
-	.release_mem_size		= 0,
-};
-
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index f0e4910a8865..d8f7343bfe71 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -62,8 +62,9 @@ void kernel_queue_init_v9(struct kernel_queue_ops *ops)
 	ops->submit_packet = submit_packet_v9;
 }
 
-static int pm_map_process_v9(struct packet_manager *pm,
-		uint32_t *buffer, struct qcm_process_device *qpd)
+static int pm_map_process_v9_base(struct packet_manager *pm,
+		uint32_t *buffer, struct qcm_process_device *qpd,
+		unsigned int sq_shader_tba_hi_trap_en_shift)
 {
 	struct pm4_mes_map_process *packet;
 	uint64_t vm_page_table_base_addr = qpd->page_table_base;
@@ -85,10 +86,16 @@ static int pm_map_process_v9(struct packet_manager *pm,
 
 	packet->sh_mem_config = qpd->sh_mem_config;
 	packet->sh_mem_bases = qpd->sh_mem_bases;
-	packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
-	packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
-	packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
-	packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+	if (qpd->tba_addr) {
+		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+		packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
+		if (sq_shader_tba_hi_trap_en_shift) {
+			packet->sq_shader_tba_hi |=
+					1 << sq_shader_tba_hi_trap_en_shift;
+		}
+		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+	}
 
 	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
 	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
@@ -101,6 +108,11 @@ static int pm_map_process_v9(struct packet_manager *pm,
 	return 0;
 }
 
+static int pm_map_process_v9(struct packet_manager *pm,
+		uint32_t *buffer, struct qcm_process_device *qpd) {
+	return pm_map_process_v9_base(pm, buffer, qpd, 0);
+}
+
 static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
 			uint64_t ib, size_t ib_size_in_dwords, bool chain)
 {
@@ -352,3 +364,28 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
 	.query_status_size	= sizeof(struct pm4_mes_query_status),
 	.release_mem_size	= 0,
 };
+
+#include "gc/gc_10_1_0_sh_mask.h"
+
+static int pm_map_process_v10(struct packet_manager *pm,
+		uint32_t *buffer, struct qcm_process_device *qpd) {
+	return pm_map_process_v9_base(pm, buffer, qpd,
+			SQ_SHADER_TBA_HI__TRAP_EN__SHIFT);
+}
+
+const struct packet_manager_funcs kfd_v10_pm_funcs = {
+	.map_process		= pm_map_process_v10,
+	.runlist		= pm_runlist_v9,
+	.set_resources		= pm_set_resources_v9,
+	.map_queues		= pm_map_queues_v9,
+	.unmap_queues		= pm_unmap_queues_v9,
+	.query_status		= pm_query_status_v9,
+	.release_mem		= NULL,
+	.map_process_size	= sizeof(struct pm4_mes_map_process),
+	.runlist_size		= sizeof(struct pm4_mes_runlist),
+	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
+	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
+	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
+	.query_status_size	= sizeof(struct pm4_mes_query_status),
+	.release_mem_size	= 0,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 62db4d20ed32..5127ddee24c8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -996,9 +996,6 @@ void pm_release_ib(struct packet_manager *pm);
 
 /* Following PM funcs can be shared among VI and AI */
 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
-int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
-			struct scheduling_resources *res);
-
 
 uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07  4:45         ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07  4:45 UTC (permalink / raw)
  To: Zhao, Yong, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8 
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>   	return 0;
>   }
>   
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -	struct pm4_mec_release_mem *packet;
> -
> -	WARN_ON(!buffer);
> -
> -	packet = (struct pm4_mec_release_mem *)buffer;
> -	memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -					sizeof(struct pm4_mec_release_mem));
> -
> -	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -	packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -	packet->bitfields2.tcl1_action_ena = 1;
> -	packet->bitfields2.tc_action_ena = 1;
> -	packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -	packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -	packet->bitfields3.int_sel =
> -		int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -	packet->address_hi = upper_32_bits(gpu_addr);
> -
> -	packet->data_lo = 0;
> -
> -	return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>   	.map_process			= pm_map_process_v10,
>   	.runlist			= pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>   	.map_queues			= pm_map_queues_v10,
>   	.unmap_queues			= pm_unmap_queues_v10,
>   	.query_status			= pm_query_status_v10,
> -	.release_mem			= pm_release_mem_v10,
> +	.release_mem			= NULL,
>   	.map_process_size		= sizeof(struct pm4_mes_map_process),
>   	.runlist_size			= sizeof(struct pm4_mes_runlist),
>   	.set_resources_size		= sizeof(struct pm4_mes_set_resources),
>   	.map_queues_size		= sizeof(struct pm4_mes_map_queues),
>   	.unmap_queues_size		= sizeof(struct pm4_mes_unmap_queues),
>   	.query_status_size		= sizeof(struct pm4_mes_query_status),
> -	.release_mem_size		= sizeof(struct pm4_mec_release_mem)
> +	.release_mem_size		= 0,
>   };
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>   	return 0;
>   }
>   
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -	struct pm4_mec_release_mem *packet;
> -
> -	packet = (struct pm4_mec_release_mem *)buffer;
> -	memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -					sizeof(struct pm4_mec_release_mem));
> -
> -	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -	packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -	packet->bitfields2.tcl1_action_ena = 1;
> -	packet->bitfields2.tc_action_ena = 1;
> -	packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -	packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -	packet->bitfields3.int_sel =
> -		int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -	packet->address_hi = upper_32_bits(gpu_addr);
> -
> -	packet->data_lo = 0;
> -
> -	return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>   	.map_process		= pm_map_process_v9,
>   	.runlist		= pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>   	.map_queues		= pm_map_queues_v9,
>   	.unmap_queues		= pm_unmap_queues_v9,
>   	.query_status		= pm_query_status_v9,
> -	.release_mem		= pm_release_mem_v9,
> +	.release_mem		= NULL,
>   	.map_process_size	= sizeof(struct pm4_mes_map_process),
>   	.runlist_size		= sizeof(struct pm4_mes_runlist),
>   	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
>   	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
>   	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
>   	.query_status_size	= sizeof(struct pm4_mes_query_status),
> -	.release_mem_size	= sizeof(struct pm4_mec_release_mem)
> +	.release_mem_size	= 0,
>   };
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07  4:45         ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07  4:45 UTC (permalink / raw)
  To: Zhao, Yong, amd-gfx

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8 
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>   	return 0;
>   }
>   
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -	struct pm4_mec_release_mem *packet;
> -
> -	WARN_ON(!buffer);
> -
> -	packet = (struct pm4_mec_release_mem *)buffer;
> -	memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -					sizeof(struct pm4_mec_release_mem));
> -
> -	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -	packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -	packet->bitfields2.tcl1_action_ena = 1;
> -	packet->bitfields2.tc_action_ena = 1;
> -	packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -	packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -	packet->bitfields3.int_sel =
> -		int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -	packet->address_hi = upper_32_bits(gpu_addr);
> -
> -	packet->data_lo = 0;
> -
> -	return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>   	.map_process			= pm_map_process_v10,
>   	.runlist			= pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>   	.map_queues			= pm_map_queues_v10,
>   	.unmap_queues			= pm_unmap_queues_v10,
>   	.query_status			= pm_query_status_v10,
> -	.release_mem			= pm_release_mem_v10,
> +	.release_mem			= NULL,
>   	.map_process_size		= sizeof(struct pm4_mes_map_process),
>   	.runlist_size			= sizeof(struct pm4_mes_runlist),
>   	.set_resources_size		= sizeof(struct pm4_mes_set_resources),
>   	.map_queues_size		= sizeof(struct pm4_mes_map_queues),
>   	.unmap_queues_size		= sizeof(struct pm4_mes_unmap_queues),
>   	.query_status_size		= sizeof(struct pm4_mes_query_status),
> -	.release_mem_size		= sizeof(struct pm4_mec_release_mem)
> +	.release_mem_size		= 0,
>   };
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>   	return 0;
>   }
>   
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -	struct pm4_mec_release_mem *packet;
> -
> -	packet = (struct pm4_mec_release_mem *)buffer;
> -	memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -					sizeof(struct pm4_mec_release_mem));
> -
> -	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -	packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -	packet->bitfields2.tcl1_action_ena = 1;
> -	packet->bitfields2.tc_action_ena = 1;
> -	packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -	packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -	packet->bitfields3.int_sel =
> -		int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -	packet->address_hi = upper_32_bits(gpu_addr);
> -
> -	packet->data_lo = 0;
> -
> -	return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>   	.map_process		= pm_map_process_v9,
>   	.runlist		= pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>   	.map_queues		= pm_map_queues_v9,
>   	.unmap_queues		= pm_unmap_queues_v9,
>   	.query_status		= pm_query_status_v9,
> -	.release_mem		= pm_release_mem_v9,
> +	.release_mem		= NULL,
>   	.map_process_size	= sizeof(struct pm4_mes_map_process),
>   	.runlist_size		= sizeof(struct pm4_mes_runlist),
>   	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
>   	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
>   	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
>   	.query_status_size	= sizeof(struct pm4_mes_query_status),
> -	.release_mem_size	= sizeof(struct pm4_mec_release_mem)
> +	.release_mem_size	= 0,
>   };
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/amdkfd: Adjust function sequences to avoid unnecessary declarations
@ 2019-11-07  4:45     ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07  4:45 UTC (permalink / raw)
  To: Zhao, Yong, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2019-10-30 20:17, Zhao, Yong wrote:
> This is cleaner.
>
> Change-Id: I8cdecad387d8c547a088c6050f77385ee1135be1
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 19 +++++++------------
>   1 file changed, 7 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 9a4bafb2e175..3b5ca2b1d7a6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -26,18 +26,6 @@
>   #include "kfd_pm4_headers_ai.h"
>   #include "kfd_pm4_opcodes.h"
>   
> -static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
> -			enum kfd_queue_type type, unsigned int queue_size);
> -static void uninitialize_v9(struct kernel_queue *kq);
> -static void submit_packet_v9(struct kernel_queue *kq);
> -
> -void kernel_queue_init_v9(struct kernel_queue_ops *ops)
> -{
> -	ops->initialize = initialize_v9;
> -	ops->uninitialize = uninitialize_v9;
> -	ops->submit_packet = submit_packet_v9;
> -}
> -
>   static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
>   			enum kfd_queue_type type, unsigned int queue_size)
>   {
> @@ -67,6 +55,13 @@ static void submit_packet_v9(struct kernel_queue *kq)
>   				kq->pending_wptr64);
>   }
>   
> +void kernel_queue_init_v9(struct kernel_queue_ops *ops)
> +{
> +	ops->initialize = initialize_v9;
> +	ops->uninitialize = uninitialize_v9;
> +	ops->submit_packet = submit_packet_v9;
> +}
> +
>   static int pm_map_process_v9(struct packet_manager *pm,
>   		uint32_t *buffer, struct qcm_process_device *qpd)
>   {
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 1/3] drm/amdkfd: Adjust function sequences to avoid unnecessary declarations
@ 2019-11-07  4:45     ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07  4:45 UTC (permalink / raw)
  To: Zhao, Yong, amd-gfx

On 2019-10-30 20:17, Zhao, Yong wrote:
> This is cleaner.
>
> Change-Id: I8cdecad387d8c547a088c6050f77385ee1135be1
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 19 +++++++------------
>   1 file changed, 7 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 9a4bafb2e175..3b5ca2b1d7a6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -26,18 +26,6 @@
>   #include "kfd_pm4_headers_ai.h"
>   #include "kfd_pm4_opcodes.h"
>   
> -static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
> -			enum kfd_queue_type type, unsigned int queue_size);
> -static void uninitialize_v9(struct kernel_queue *kq);
> -static void submit_packet_v9(struct kernel_queue *kq);
> -
> -void kernel_queue_init_v9(struct kernel_queue_ops *ops)
> -{
> -	ops->initialize = initialize_v9;
> -	ops->uninitialize = uninitialize_v9;
> -	ops->submit_packet = submit_packet_v9;
> -}
> -
>   static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
>   			enum kfd_queue_type type, unsigned int queue_size)
>   {
> @@ -67,6 +55,13 @@ static void submit_packet_v9(struct kernel_queue *kq)
>   				kq->pending_wptr64);
>   }
>   
> +void kernel_queue_init_v9(struct kernel_queue_ops *ops)
> +{
> +	ops->initialize = initialize_v9;
> +	ops->uninitialize = uninitialize_v9;
> +	ops->submit_packet = submit_packet_v9;
> +}
> +
>   static int pm_map_process_v9(struct packet_manager *pm,
>   		uint32_t *buffer, struct qcm_process_device *qpd)
>   {
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/3] drm/amdkfd: Use kernel queue v9 functions for v10
@ 2019-11-07  4:52         ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07  4:52 UTC (permalink / raw)
  To: Zhao, Yong, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2019-10-30 20:17, Zhao, Yong wrote:
> The kernel queue functions for v9 and v10 are the same except
> pm_map_process_v* which have small difference, so they should be reused.
> This eliminates the need of reapplying several patches which were
> applied on v9 but not on v10, such as bigger GWS and more than 2
> SDMA engine support which were introduced on Arcturus.

This looks reasonable in principle. See one suggestion inline to 
simplify it further.


>
> Change-Id: I2d385961e3c884db14e30b5afc98d0d9e4cb1802
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/Makefile           |   1 -
>   drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |   4 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |   1 -
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 317 ------------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  49 ++-
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   3 -
>   6 files changed, 44 insertions(+), 331 deletions(-)
>   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
> index 48155060a57c..017a8b7156da 100644
> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
> @@ -41,7 +41,6 @@ AMDKFD_FILES	:= $(AMDKFD_PATH)/kfd_module.o \
>   		$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
>   		$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
>   		$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
> -		$(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
>   		$(AMDKFD_PATH)/kfd_packet_manager.o \
>   		$(AMDKFD_PATH)/kfd_process_queue_manager.o \
>   		$(AMDKFD_PATH)/kfd_device_queue_manager.o \
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> index 11d244891393..0d966408ea87 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> @@ -332,12 +332,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
>   	case CHIP_RAVEN:
>   	case CHIP_RENOIR:
>   	case CHIP_ARCTURUS:
> -		kernel_queue_init_v9(&kq->ops_asic_specific);
> -		break;
>   	case CHIP_NAVI10:
>   	case CHIP_NAVI12:
>   	case CHIP_NAVI14:
> -		kernel_queue_init_v10(&kq->ops_asic_specific);
> +		kernel_queue_init_v9(&kq->ops_asic_specific);
>   		break;
>   	default:
>   		WARN(1, "Unexpected ASIC family %u",
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> index 365fc674fea4..a7116a939029 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> @@ -102,6 +102,5 @@ struct kernel_queue {
>   void kernel_queue_init_cik(struct kernel_queue_ops *ops);
>   void kernel_queue_init_vi(struct kernel_queue_ops *ops);
>   void kernel_queue_init_v9(struct kernel_queue_ops *ops);
> -void kernel_queue_init_v10(struct kernel_queue_ops *ops);
>   
>   #endif /* KFD_KERNEL_QUEUE_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> deleted file mode 100644
> index bfd6221acae9..000000000000
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ /dev/null
[snip]
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index f0e4910a8865..d8f7343bfe71 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -62,8 +62,9 @@ void kernel_queue_init_v9(struct kernel_queue_ops *ops)
>   	ops->submit_packet = submit_packet_v9;
>   }
>   
> -static int pm_map_process_v9(struct packet_manager *pm,
> -		uint32_t *buffer, struct qcm_process_device *qpd)
> +static int pm_map_process_v9_base(struct packet_manager *pm,
> +		uint32_t *buffer, struct qcm_process_device *qpd,
> +		unsigned int sq_shader_tba_hi_trap_en_shift)
>   {
>   	struct pm4_mes_map_process *packet;
>   	uint64_t vm_page_table_base_addr = qpd->page_table_base;
> @@ -85,10 +86,16 @@ static int pm_map_process_v9(struct packet_manager *pm,
>   
>   	packet->sh_mem_config = qpd->sh_mem_config;
>   	packet->sh_mem_bases = qpd->sh_mem_bases;
> -	packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
> -	packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
> -	packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
> -	packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
> +	if (qpd->tba_addr) {
> +		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
> +		packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
> +		if (sq_shader_tba_hi_trap_en_shift) {
> +			packet->sq_shader_tba_hi |=
> +					1 << sq_shader_tba_hi_trap_en_shift;

If you pass in a mask instead of a shift, you don't need the 
conditional. I.e.

     packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8) | 
sq_shader_tba_hi_trap_en_mask;

> +		}
> +		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
> +		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
> +	}
>   
>   	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
>   	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
> @@ -101,6 +108,11 @@ static int pm_map_process_v9(struct packet_manager *pm,
>   	return 0;
>   }
>   
> +static int pm_map_process_v9(struct packet_manager *pm,
> +		uint32_t *buffer, struct qcm_process_device *qpd) {
> +	return pm_map_process_v9_base(pm, buffer, qpd, 0);
> +}
> +
>   static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
>   			uint64_t ib, size_t ib_size_in_dwords, bool chain)
>   {
> @@ -352,3 +364,28 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>   	.query_status_size	= sizeof(struct pm4_mes_query_status),
>   	.release_mem_size	= 0,
>   };
> +
> +#include "gc/gc_10_1_0_sh_mask.h"
> +
> +static int pm_map_process_v10(struct packet_manager *pm,
> +		uint32_t *buffer, struct qcm_process_device *qpd) {
> +	return pm_map_process_v9_base(pm, buffer, qpd,
> +			SQ_SHADER_TBA_HI__TRAP_EN__SHIFT);

With my suggestion above, pass in (1 << 
SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) here.

Regards,
   Felix

> +}
> +
> +const struct packet_manager_funcs kfd_v10_pm_funcs = {
> +	.map_process		= pm_map_process_v10,
> +	.runlist		= pm_runlist_v9,
> +	.set_resources		= pm_set_resources_v9,
> +	.map_queues		= pm_map_queues_v9,
> +	.unmap_queues		= pm_unmap_queues_v9,
> +	.query_status		= pm_query_status_v9,
> +	.release_mem		= NULL,
> +	.map_process_size	= sizeof(struct pm4_mes_map_process),
> +	.runlist_size		= sizeof(struct pm4_mes_runlist),
> +	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
> +	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
> +	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
> +	.query_status_size	= sizeof(struct pm4_mes_query_status),
> +	.release_mem_size	= 0,
> +};
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 62db4d20ed32..5127ddee24c8 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -996,9 +996,6 @@ void pm_release_ib(struct packet_manager *pm);
>   
>   /* Following PM funcs can be shared among VI and AI */
>   unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
> -int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
> -			struct scheduling_resources *res);
> -
>   
>   uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
>   
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/3] drm/amdkfd: Use kernel queue v9 functions for v10
@ 2019-11-07  4:52         ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07  4:52 UTC (permalink / raw)
  To: Zhao, Yong, amd-gfx

On 2019-10-30 20:17, Zhao, Yong wrote:
> The kernel queue functions for v9 and v10 are the same except
> pm_map_process_v* which have small difference, so they should be reused.
> This eliminates the need of reapplying several patches which were
> applied on v9 but not on v10, such as bigger GWS and more than 2
> SDMA engine support which were introduced on Arcturus.

This looks reasonable in principle. See one suggestion inline to 
simplify it further.


>
> Change-Id: I2d385961e3c884db14e30b5afc98d0d9e4cb1802
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/Makefile           |   1 -
>   drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |   4 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |   1 -
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 317 ------------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  49 ++-
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   3 -
>   6 files changed, 44 insertions(+), 331 deletions(-)
>   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
> index 48155060a57c..017a8b7156da 100644
> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
> @@ -41,7 +41,6 @@ AMDKFD_FILES	:= $(AMDKFD_PATH)/kfd_module.o \
>   		$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
>   		$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
>   		$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
> -		$(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
>   		$(AMDKFD_PATH)/kfd_packet_manager.o \
>   		$(AMDKFD_PATH)/kfd_process_queue_manager.o \
>   		$(AMDKFD_PATH)/kfd_device_queue_manager.o \
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> index 11d244891393..0d966408ea87 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> @@ -332,12 +332,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
>   	case CHIP_RAVEN:
>   	case CHIP_RENOIR:
>   	case CHIP_ARCTURUS:
> -		kernel_queue_init_v9(&kq->ops_asic_specific);
> -		break;
>   	case CHIP_NAVI10:
>   	case CHIP_NAVI12:
>   	case CHIP_NAVI14:
> -		kernel_queue_init_v10(&kq->ops_asic_specific);
> +		kernel_queue_init_v9(&kq->ops_asic_specific);
>   		break;
>   	default:
>   		WARN(1, "Unexpected ASIC family %u",
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> index 365fc674fea4..a7116a939029 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> @@ -102,6 +102,5 @@ struct kernel_queue {
>   void kernel_queue_init_cik(struct kernel_queue_ops *ops);
>   void kernel_queue_init_vi(struct kernel_queue_ops *ops);
>   void kernel_queue_init_v9(struct kernel_queue_ops *ops);
> -void kernel_queue_init_v10(struct kernel_queue_ops *ops);
>   
>   #endif /* KFD_KERNEL_QUEUE_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> deleted file mode 100644
> index bfd6221acae9..000000000000
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ /dev/null
[snip]
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index f0e4910a8865..d8f7343bfe71 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -62,8 +62,9 @@ void kernel_queue_init_v9(struct kernel_queue_ops *ops)
>   	ops->submit_packet = submit_packet_v9;
>   }
>   
> -static int pm_map_process_v9(struct packet_manager *pm,
> -		uint32_t *buffer, struct qcm_process_device *qpd)
> +static int pm_map_process_v9_base(struct packet_manager *pm,
> +		uint32_t *buffer, struct qcm_process_device *qpd,
> +		unsigned int sq_shader_tba_hi_trap_en_shift)
>   {
>   	struct pm4_mes_map_process *packet;
>   	uint64_t vm_page_table_base_addr = qpd->page_table_base;
> @@ -85,10 +86,16 @@ static int pm_map_process_v9(struct packet_manager *pm,
>   
>   	packet->sh_mem_config = qpd->sh_mem_config;
>   	packet->sh_mem_bases = qpd->sh_mem_bases;
> -	packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
> -	packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
> -	packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
> -	packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
> +	if (qpd->tba_addr) {
> +		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
> +		packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
> +		if (sq_shader_tba_hi_trap_en_shift) {
> +			packet->sq_shader_tba_hi |=
> +					1 << sq_shader_tba_hi_trap_en_shift;

If you pass in a mask instead of a shift, you don't need the 
conditional. I.e.

     packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8) | 
sq_shader_tba_hi_trap_en_mask;

> +		}
> +		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
> +		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
> +	}
>   
>   	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
>   	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
> @@ -101,6 +108,11 @@ static int pm_map_process_v9(struct packet_manager *pm,
>   	return 0;
>   }
>   
> +static int pm_map_process_v9(struct packet_manager *pm,
> +		uint32_t *buffer, struct qcm_process_device *qpd) {
> +	return pm_map_process_v9_base(pm, buffer, qpd, 0);
> +}
> +
>   static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
>   			uint64_t ib, size_t ib_size_in_dwords, bool chain)
>   {
> @@ -352,3 +364,28 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>   	.query_status_size	= sizeof(struct pm4_mes_query_status),
>   	.release_mem_size	= 0,
>   };
> +
> +#include "gc/gc_10_1_0_sh_mask.h"
> +
> +static int pm_map_process_v10(struct packet_manager *pm,
> +		uint32_t *buffer, struct qcm_process_device *qpd) {
> +	return pm_map_process_v9_base(pm, buffer, qpd,
> +			SQ_SHADER_TBA_HI__TRAP_EN__SHIFT);

With my suggestion above, pass in (1 << 
SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) here.

Regards,
   Felix

> +}
> +
> +const struct packet_manager_funcs kfd_v10_pm_funcs = {
> +	.map_process		= pm_map_process_v10,
> +	.runlist		= pm_runlist_v9,
> +	.set_resources		= pm_set_resources_v9,
> +	.map_queues		= pm_map_queues_v9,
> +	.unmap_queues		= pm_unmap_queues_v9,
> +	.query_status		= pm_query_status_v9,
> +	.release_mem		= NULL,
> +	.map_process_size	= sizeof(struct pm4_mes_map_process),
> +	.runlist_size		= sizeof(struct pm4_mes_runlist),
> +	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
> +	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
> +	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
> +	.query_status_size	= sizeof(struct pm4_mes_query_status),
> +	.release_mem_size	= 0,
> +};
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 62db4d20ed32..5127ddee24c8 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -996,9 +996,6 @@ void pm_release_ib(struct packet_manager *pm);
>   
>   /* Following PM funcs can be shared among VI and AI */
>   unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
> -int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
> -			struct scheduling_resources *res);
> -
>   
>   uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
>   
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 16:57             ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-11-07 16:57 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 6562 bytes --]

Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     WARN_ON(!buffer);
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_process                    = pm_map_process_v10,
>        .runlist                        = pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_queues                     = pm_map_queues_v10,
>        .unmap_queues                   = pm_unmap_queues_v10,
>        .query_status                   = pm_query_status_v10,
> -     .release_mem                    = pm_release_mem_v10,
> +     .release_mem                    = NULL,
>        .map_process_size               = sizeof(struct pm4_mes_map_process),
>        .runlist_size                   = sizeof(struct pm4_mes_runlist),
>        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size              = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size               = 0,
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_process            = pm_map_process_v9,
>        .runlist                = pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_queues             = pm_map_queues_v9,
>        .unmap_queues           = pm_unmap_queues_v9,
>        .query_status           = pm_query_status_v9,
> -     .release_mem            = pm_release_mem_v9,
> +     .release_mem            = NULL,
>        .map_process_size       = sizeof(struct pm4_mes_map_process),
>        .runlist_size           = sizeof(struct pm4_mes_runlist),
>        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size      = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size       = 0,
>   };

[-- Attachment #1.2: Type: text/html, Size: 14223 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 16:57             ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-11-07 16:57 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 6471 bytes --]

Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong <Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     WARN_ON(!buffer);
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_process                    = pm_map_process_v10,
>        .runlist                        = pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_queues                     = pm_map_queues_v10,
>        .unmap_queues                   = pm_unmap_queues_v10,
>        .query_status                   = pm_query_status_v10,
> -     .release_mem                    = pm_release_mem_v10,
> +     .release_mem                    = NULL,
>        .map_process_size               = sizeof(struct pm4_mes_map_process),
>        .runlist_size                   = sizeof(struct pm4_mes_runlist),
>        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size              = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size               = 0,
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_process            = pm_map_process_v9,
>        .runlist                = pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_queues             = pm_map_queues_v9,
>        .unmap_queues           = pm_unmap_queues_v9,
>        .query_status           = pm_query_status_v9,
> -     .release_mem            = pm_release_mem_v9,
> +     .release_mem            = NULL,
>        .map_process_size       = sizeof(struct pm4_mes_map_process),
>        .runlist_size           = sizeof(struct pm4_mes_runlist),
>        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size      = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size       = 0,
>   };

[-- Attachment #1.2: Type: text/html, Size: 14101 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 17:33                 ` Russell, Kent
  0 siblings, 0 replies; 36+ messages in thread
From: Russell, Kent @ 2019-11-07 17:33 UTC (permalink / raw)
  To: Zhao, Yong, Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 7887 bytes --]

I think that the versioning is getting a little confusing since we're using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we'll keep having these questions of "why is Hawaii GFX8", "why is Arcturus GFX9", etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.

Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.

Kent

From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> On Behalf Of Zhao, Yong
Sent: Thursday, November 7, 2019 11:57 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org<mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32mMSPqsTGOZug@public.gmane.orgesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao-5C7GfCeVMHo@public.gmane.org<mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     WARN_ON(!buffer);
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_process                    = pm_map_process_v10,
>        .runlist                        = pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_queues                     = pm_map_queues_v10,
>        .unmap_queues                   = pm_unmap_queues_v10,
>        .query_status                   = pm_query_status_v10,
> -     .release_mem                    = pm_release_mem_v10,
> +     .release_mem                    = NULL,
>        .map_process_size               = sizeof(struct pm4_mes_map_process),
>        .runlist_size                   = sizeof(struct pm4_mes_runlist),
>        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size              = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size               = 0,
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_process            = pm_map_process_v9,
>        .runlist                = pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_queues             = pm_map_queues_v9,
>        .unmap_queues           = pm_unmap_queues_v9,
>        .query_status           = pm_query_status_v9,
> -     .release_mem            = pm_release_mem_v9,
> +     .release_mem            = NULL,
>        .map_process_size       = sizeof(struct pm4_mes_map_process),
>        .runlist_size           = sizeof(struct pm4_mes_runlist),
>        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size      = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size       = 0,
>   };

[-- Attachment #1.2: Type: text/html, Size: 16989 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 17:33                 ` Russell, Kent
  0 siblings, 0 replies; 36+ messages in thread
From: Russell, Kent @ 2019-11-07 17:33 UTC (permalink / raw)
  To: Zhao, Yong, Kuehling, Felix, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 7600 bytes --]

I think that the versioning is getting a little confusing since we're using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we'll keep having these questions of "why is Hawaii GFX8", "why is Arcturus GFX9", etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.

Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.

Kent

From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
Sent: Thursday, November 7, 2019 11:57 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     WARN_ON(!buffer);
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_process                    = pm_map_process_v10,
>        .runlist                        = pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_queues                     = pm_map_queues_v10,
>        .unmap_queues                   = pm_unmap_queues_v10,
>        .query_status                   = pm_query_status_v10,
> -     .release_mem                    = pm_release_mem_v10,
> +     .release_mem                    = NULL,
>        .map_process_size               = sizeof(struct pm4_mes_map_process),
>        .runlist_size                   = sizeof(struct pm4_mes_runlist),
>        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size              = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size               = 0,
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_process            = pm_map_process_v9,
>        .runlist                = pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_queues             = pm_map_queues_v9,
>        .unmap_queues           = pm_unmap_queues_v9,
>        .query_status           = pm_query_status_v9,
> -     .release_mem            = pm_release_mem_v9,
> +     .release_mem            = NULL,
>        .map_process_size       = sizeof(struct pm4_mes_map_process),
>        .runlist_size           = sizeof(struct pm4_mes_runlist),
>        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size      = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size       = 0,
>   };

[-- Attachment #1.2: Type: text/html, Size: 16749 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 17:39                     ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-11-07 17:39 UTC (permalink / raw)
  To: Russell, Kent, Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 7942 bytes --]

Hi Kent,

I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.

Yong

On 2019-11-07 12:33 p.m., Russell, Kent wrote:
I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.

Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.

Kent

From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
Sent: Thursday, November 7, 2019 11:57 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     WARN_ON(!buffer);
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_process                    = pm_map_process_v10,
>        .runlist                        = pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_queues                     = pm_map_queues_v10,
>        .unmap_queues                   = pm_unmap_queues_v10,
>        .query_status                   = pm_query_status_v10,
> -     .release_mem                    = pm_release_mem_v10,
> +     .release_mem                    = NULL,
>        .map_process_size               = sizeof(struct pm4_mes_map_process),
>        .runlist_size                   = sizeof(struct pm4_mes_runlist),
>        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size              = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size               = 0,
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_process            = pm_map_process_v9,
>        .runlist                = pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_queues             = pm_map_queues_v9,
>        .unmap_queues           = pm_unmap_queues_v9,
>        .query_status           = pm_query_status_v9,
> -     .release_mem            = pm_release_mem_v9,
> +     .release_mem            = NULL,
>        .map_process_size       = sizeof(struct pm4_mes_map_process),
>        .runlist_size           = sizeof(struct pm4_mes_runlist),
>        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size      = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size       = 0,
>   };

[-- Attachment #1.2: Type: text/html, Size: 17335 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 17:39                     ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-11-07 17:39 UTC (permalink / raw)
  To: Russell, Kent, Kuehling, Felix, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 7942 bytes --]

Hi Kent,

I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.

Yong

On 2019-11-07 12:33 p.m., Russell, Kent wrote:
I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.

Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.

Kent

From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
Sent: Thursday, November 7, 2019 11:57 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     WARN_ON(!buffer);
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_process                    = pm_map_process_v10,
>        .runlist                        = pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_queues                     = pm_map_queues_v10,
>        .unmap_queues                   = pm_unmap_queues_v10,
>        .query_status                   = pm_query_status_v10,
> -     .release_mem                    = pm_release_mem_v10,
> +     .release_mem                    = NULL,
>        .map_process_size               = sizeof(struct pm4_mes_map_process),
>        .runlist_size                   = sizeof(struct pm4_mes_runlist),
>        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size              = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size               = 0,
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_process            = pm_map_process_v9,
>        .runlist                = pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_queues             = pm_map_queues_v9,
>        .unmap_queues           = pm_unmap_queues_v9,
>        .query_status           = pm_query_status_v9,
> -     .release_mem            = pm_release_mem_v9,
> +     .release_mem            = NULL,
>        .map_process_size       = sizeof(struct pm4_mes_map_process),
>        .runlist_size           = sizeof(struct pm4_mes_runlist),
>        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size      = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size       = 0,
>   };

[-- Attachment #1.2: Type: text/html, Size: 17335 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 17:47                         ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07 17:47 UTC (permalink / raw)
  To: Zhao, Yong, Russell, Kent, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 8806 bytes --]

No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.

You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.

This is the price we pay for micro-optimizing minor code duplication.

Regards,
  Felix

On 2019-11-07 12:39, Zhao, Yong wrote:

Hi Kent,

I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.

Yong

On 2019-11-07 12:33 p.m., Russell, Kent wrote:
I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.

Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.

Kent

From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
Sent: Thursday, November 7, 2019 11:57 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     WARN_ON(!buffer);
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_process                    = pm_map_process_v10,
>        .runlist                        = pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_queues                     = pm_map_queues_v10,
>        .unmap_queues                   = pm_unmap_queues_v10,
>        .query_status                   = pm_query_status_v10,
> -     .release_mem                    = pm_release_mem_v10,
> +     .release_mem                    = NULL,
>        .map_process_size               = sizeof(struct pm4_mes_map_process),
>        .runlist_size                   = sizeof(struct pm4_mes_runlist),
>        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size              = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size               = 0,
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_process            = pm_map_process_v9,
>        .runlist                = pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_queues             = pm_map_queues_v9,
>        .unmap_queues           = pm_unmap_queues_v9,
>        .query_status           = pm_query_status_v9,
> -     .release_mem            = pm_release_mem_v9,
> +     .release_mem            = NULL,
>        .map_process_size       = sizeof(struct pm4_mes_map_process),
>        .runlist_size           = sizeof(struct pm4_mes_runlist),
>        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size      = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size       = 0,
>   };

[-- Attachment #1.2: Type: text/html, Size: 18453 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 17:47                         ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07 17:47 UTC (permalink / raw)
  To: Zhao, Yong, Russell, Kent, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 8806 bytes --]

No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.

You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.

This is the price we pay for micro-optimizing minor code duplication.

Regards,
  Felix

On 2019-11-07 12:39, Zhao, Yong wrote:

Hi Kent,

I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.

Yong

On 2019-11-07 12:33 p.m., Russell, Kent wrote:
I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.

Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.

Kent

From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
Sent: Thursday, November 7, 2019 11:57 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     WARN_ON(!buffer);
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_process                    = pm_map_process_v10,
>        .runlist                        = pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_queues                     = pm_map_queues_v10,
>        .unmap_queues                   = pm_unmap_queues_v10,
>        .query_status                   = pm_query_status_v10,
> -     .release_mem                    = pm_release_mem_v10,
> +     .release_mem                    = NULL,
>        .map_process_size               = sizeof(struct pm4_mes_map_process),
>        .runlist_size                   = sizeof(struct pm4_mes_runlist),
>        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size              = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size               = 0,
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_process            = pm_map_process_v9,
>        .runlist                = pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_queues             = pm_map_queues_v9,
>        .unmap_queues           = pm_unmap_queues_v9,
>        .query_status           = pm_query_status_v9,
> -     .release_mem            = pm_release_mem_v9,
> +     .release_mem            = NULL,
>        .map_process_size       = sizeof(struct pm4_mes_map_process),
>        .runlist_size           = sizeof(struct pm4_mes_runlist),
>        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size      = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size       = 0,
>   };

[-- Attachment #1.2: Type: text/html, Size: 18453 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 18:17                             ` Russell, Kent
  0 siblings, 0 replies; 36+ messages in thread
From: Russell, Kent @ 2019-11-07 18:17 UTC (permalink / raw)
  To: Kuehling, Felix, Zhao, Yong, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 10446 bytes --]

That makes sense, Felix.

My big issue is that if I am looking for Hawaii, I go to gfx7, I don’t look in gfx8. And then I have to search the git history to see if it was a mistake, or intentional. As it stands, the consolidation of code is just going to make things more confusing in the end when it comes to functions like this where GFX versions are not respected, which I think is what Felix is trying to get at. If the GFX name is in the function, it should apply to all ASICs in that GFX family.

I understand that it can help to avoid issues where a bugfix goes into one gfx* file and is missed in the other (which might be the impetus for your cleanup efforts), but that’s mostly on the dev to do properly. Otherwise anyone who is even reasonably new to the code is going to be completely confused, which is something we want to avoid. Even those who are working on the code daily will be chasing ghosts looking in the “wrong” gfx* files/functions trying to figure out what’s happening.

All I want is clarity and consistency. If we’re using the GFX versions for naming, then let’s keep the functions and definitions correct, at the cost of duplicated code, because accuracy is key.  Might mean we have to undo some of Yong’s consolidation, but if the end result is correct and easily-understood code, then it’s worth it.

Kent

From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Thursday, November 7, 2019 12:47 PM
To: Zhao, Yong <Yong.Zhao@amd.com>; Russell, Kent <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii


No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.

You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.

This is the price we pay for micro-optimizing minor code duplication.

Regards,
  Felix
On 2019-11-07 12:39, Zhao, Yong wrote:

Hi Kent,

I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.

Yong
On 2019-11-07 12:33 p.m., Russell, Kent wrote:
I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.

Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.

Kent

From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
Sent: Thursday, November 7, 2019 11:57 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     WARN_ON(!buffer);
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_process                    = pm_map_process_v10,
>        .runlist                        = pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_queues                     = pm_map_queues_v10,
>        .unmap_queues                   = pm_unmap_queues_v10,
>        .query_status                   = pm_query_status_v10,
> -     .release_mem                    = pm_release_mem_v10,
> +     .release_mem                    = NULL,
>        .map_process_size               = sizeof(struct pm4_mes_map_process),
>        .runlist_size                   = sizeof(struct pm4_mes_runlist),
>        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size              = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size               = 0,
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_process            = pm_map_process_v9,
>        .runlist                = pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_queues             = pm_map_queues_v9,
>        .unmap_queues           = pm_unmap_queues_v9,
>        .query_status           = pm_query_status_v9,
> -     .release_mem            = pm_release_mem_v9,
> +     .release_mem            = NULL,
>        .map_process_size       = sizeof(struct pm4_mes_map_process),
>        .runlist_size           = sizeof(struct pm4_mes_runlist),
>        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size      = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size       = 0,
>   };

[-- Attachment #1.2: Type: text/html, Size: 20612 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* RE: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 18:17                             ` Russell, Kent
  0 siblings, 0 replies; 36+ messages in thread
From: Russell, Kent @ 2019-11-07 18:17 UTC (permalink / raw)
  To: Kuehling, Felix, Zhao, Yong, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 10446 bytes --]

That makes sense, Felix.

My big issue is that if I am looking for Hawaii, I go to gfx7, I don’t look in gfx8. And then I have to search the git history to see if it was a mistake, or intentional. As it stands, the consolidation of code is just going to make things more confusing in the end when it comes to functions like this where GFX versions are not respected, which I think is what Felix is trying to get at. If the GFX name is in the function, it should apply to all ASICs in that GFX family.

I understand that it can help to avoid issues where a bugfix goes into one gfx* file and is missed in the other (which might be the impetus for your cleanup efforts), but that’s mostly on the dev to do properly. Otherwise anyone who is even reasonably new to the code is going to be completely confused, which is something we want to avoid. Even those who are working on the code daily will be chasing ghosts looking in the “wrong” gfx* files/functions trying to figure out what’s happening.

All I want is clarity and consistency. If we’re using the GFX versions for naming, then let’s keep the functions and definitions correct, at the cost of duplicated code, because accuracy is key.  Might mean we have to undo some of Yong’s consolidation, but if the end result is correct and easily-understood code, then it’s worth it.

Kent

From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Thursday, November 7, 2019 12:47 PM
To: Zhao, Yong <Yong.Zhao@amd.com>; Russell, Kent <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii


No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.

You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.

This is the price we pay for micro-optimizing minor code duplication.

Regards,
  Felix
On 2019-11-07 12:39, Zhao, Yong wrote:

Hi Kent,

I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.

Yong
On 2019-11-07 12:33 p.m., Russell, Kent wrote:
I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.

Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.

Kent

From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
Sent: Thursday, November 7, 2019 11:57 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

Hi Felix,

That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.

Regards,
Yong
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>
Sent: Wednesday, November 6, 2019 11:45 PM
To: Zhao, Yong <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-10-30 20:17, Zhao, Yong wrote:
> release_mem won't be used at all on GFX9 and GFX10, so delete it.

Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
either. Why arbitrarily limit this change to GFXv9 and 10?

Regards,
   Felix

>
> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com<mailto:Yong.Zhao@amd.com>>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>   2 files changed, 4 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> index aed32ab7102e..bfd6221acae9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     WARN_ON(!buffer);
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> -}
> -
>   const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_process                    = pm_map_process_v10,
>        .runlist                        = pm_runlist_v10,
> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>        .map_queues                     = pm_map_queues_v10,
>        .unmap_queues                   = pm_unmap_queues_v10,
>        .query_status                   = pm_query_status_v10,
> -     .release_mem                    = pm_release_mem_v10,
> +     .release_mem                    = NULL,
>        .map_process_size               = sizeof(struct pm4_mes_map_process),
>        .runlist_size                   = sizeof(struct pm4_mes_runlist),
>        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size              = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size               = 0,
>   };
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index 3b5ca2b1d7a6..f0e4910a8865 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>        return 0;
>   }
>
> -
> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> -{
> -     struct pm4_mec_release_mem *packet;
> -
> -     packet = (struct pm4_mec_release_mem *)buffer;
> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> -
> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> -                                     sizeof(struct pm4_mec_release_mem));
> -
> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> -     packet->bitfields2.tcl1_action_ena = 1;
> -     packet->bitfields2.tc_action_ena = 1;
> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> -
> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> -     packet->bitfields3.int_sel =
> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> -
> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> -     packet->address_hi = upper_32_bits(gpu_addr);
> -
> -     packet->data_lo = 0;
> -
> -     return 0;
> -}
> -
>   const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_process            = pm_map_process_v9,
>        .runlist                = pm_runlist_v9,
> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>        .map_queues             = pm_map_queues_v9,
>        .unmap_queues           = pm_unmap_queues_v9,
>        .query_status           = pm_query_status_v9,
> -     .release_mem            = pm_release_mem_v9,
> +     .release_mem            = NULL,
>        .map_process_size       = sizeof(struct pm4_mes_map_process),
>        .runlist_size           = sizeof(struct pm4_mes_runlist),
>        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>        .query_status_size      = sizeof(struct pm4_mes_query_status),
> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> +     .release_mem_size       = 0,
>   };

[-- Attachment #1.2: Type: text/html, Size: 20612 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 18:32                             ` Alex Deucher
  0 siblings, 0 replies; 36+ messages in thread
From: Alex Deucher @ 2019-11-07 18:32 UTC (permalink / raw)
  To: Kuehling, Felix
  Cc: Zhao, Yong, Russell, Kent, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling@amd.com> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling@amd.com>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 18:32                             ` Alex Deucher
  0 siblings, 0 replies; 36+ messages in thread
From: Alex Deucher @ 2019-11-07 18:32 UTC (permalink / raw)
  To: Kuehling, Felix; +Cc: Zhao, Yong, Russell, Kent, amd-gfx

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling@amd.com> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling@amd.com>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 18:54                                 ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-11-07 18:54 UTC (permalink / raw)
  To: Alex Deucher, Kuehling, Felix
  Cc: Russell, Kent, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 12140 bytes --]

Hi Kent,

This consolidation is a must, because we should not have duplicated it in the first place. The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 are different is because GFX9 is SOC15 where packet formats and doorbell size changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by reusing v8 functions, even though it is there. Furthermore, in my opinion kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, packet formats should stay the same. For kernel queues, we should be able to differentiate it by pre SOC15 or not, and I have an impression that MEC firmware agrees to maintain the kernel queue interface stable across generations most of time.

Regards,
Yong
________________________________
From: Alex Deucher <alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; Russell, Kent <Kent.Russell-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp93rCq3LdnpKM@public.gmane.orgrg
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 21362 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 18:54                                 ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-11-07 18:54 UTC (permalink / raw)
  To: Alex Deucher, Kuehling, Felix; +Cc: Russell, Kent, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 11772 bytes --]

Hi Kent,

This consolidation is a must, because we should not have duplicated it in the first place. The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 are different is because GFX9 is SOC15 where packet formats and doorbell size changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by reusing v8 functions, even though it is there. Furthermore, in my opinion kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, packet formats should stay the same. For kernel queues, we should be able to differentiate it by pre SOC15 or not, and I have an impression that MEC firmware agrees to maintain the kernel queue interface stable across generations most of time.

Regards,
Yong
________________________________
From: Alex Deucher <alexdeucher@gmail.com>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <Felix.Kuehling@amd.com>
Cc: Zhao, Yong <Yong.Zhao@amd.com>; Russell, Kent <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling@amd.com> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling@amd.com>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 20970 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 19:12                                     ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07 19:12 UTC (permalink / raw)
  To: Zhao, Yong, Alex Deucher
  Cc: Russell, Kent, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 13997 bytes --]

On 2019-11-07 13:54, Zhao, Yong wrote:
Hi Kent,

This consolidation is a must, because we should not have duplicated it in the first place.

The code is working fine with the duplication. You disagree with duplicating the code in the first place. But that's just your opinion. It's not a must in any objective sense.


The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 are different is because GFX9 is SOC15 where packet formats and doorbell size changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by reusing v8 functions, even though it is there. Furthermore, in my opinion kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, packet formats should stay the same. For kernel queues, we should be able to differentiate it by pre SOC15 or not, and I have an impression that MEC firmware agrees to maintain the kernel queue interface stable across generations most of time.

OK, you're making assumptions about PM4 packets on future ASIC generations. It's true that the transition to SOC15 with 64-bit doorbells and read/write-pointers was particularly disruptive. Your assumption will hold until it gets broken by some other disruptive change.


For now, if you want clear naming, we could call the GFXv7/8 packet manager functions "pre-SOC15" or "legacy" and the GFXv9/10 and future functions "SOC15". This may work for a while. But I suspect at some point something is going to change and we'll need to create a new version for a newer ASIC generation. You already have a small taste of that with the different TBA-enable bit in the MAP_PROCESS packet in GFXv10.


Regards,

  Felix


Regards,
Yong
________________________________
From: Alex Deucher <alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; Russell, Kent <Kent.Russell-5C7GfCeVMHo@public.gmane.org><mailto:Kent.Russell-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32mqWrfYKbYh0A@public.gmane.orgktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32lNgt0PjOBp93rCq3LdnpKM@public.gmane.orgrg><mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org><mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-urvtwAKJhsc@public.gmane.orgm>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling@amd.com>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32mzQB+pC5nmwQ@public.gmane.orgedesktop.org><mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 25094 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 19:12                                     ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07 19:12 UTC (permalink / raw)
  To: Zhao, Yong, Alex Deucher; +Cc: Russell, Kent, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 13255 bytes --]

On 2019-11-07 13:54, Zhao, Yong wrote:
Hi Kent,

This consolidation is a must, because we should not have duplicated it in the first place.

The code is working fine with the duplication. You disagree with duplicating the code in the first place. But that's just your opinion. It's not a must in any objective sense.


The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 are different is because GFX9 is SOC15 where packet formats and doorbell size changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by reusing v8 functions, even though it is there. Furthermore, in my opinion kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, packet formats should stay the same. For kernel queues, we should be able to differentiate it by pre SOC15 or not, and I have an impression that MEC firmware agrees to maintain the kernel queue interface stable across generations most of time.

OK, you're making assumptions about PM4 packets on future ASIC generations. It's true that the transition to SOC15 with 64-bit doorbells and read/write-pointers was particularly disruptive. Your assumption will hold until it gets broken by some other disruptive change.


For now, if you want clear naming, we could call the GFXv7/8 packet manager functions "pre-SOC15" or "legacy" and the GFXv9/10 and future functions "SOC15". This may work for a while. But I suspect at some point something is going to change and we'll need to create a new version for a newer ASIC generation. You already have a small taste of that with the different TBA-enable bit in the MAP_PROCESS packet in GFXv10.


Regards,

  Felix


Regards,
Yong
________________________________
From: Alex Deucher <alexdeucher@gmail.com><mailto:alexdeucher@gmail.com>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Cc: Zhao, Yong <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>; Russell, Kent <Kent.Russell@amd.com><mailto:Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 24304 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 19:40                                         ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-11-07 19:40 UTC (permalink / raw)
  To: Kuehling, Felix, Alex Deucher
  Cc: Russell, Kent, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 14865 bytes --]

Hi Felix,

The code working fine is true except that all not new features after this duplication are broken. If I want to make all GFX10 feature complete, I have to either manually adapt several duplications to the GFX10 file or do this consolidation. From this perspective and ease of my work, it is a must.

Regards,
Yong

________________________________
From: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Sent: Thursday, November 7, 2019 2:12 PM
To: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; Alex Deucher <alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: Russell, Kent <Kent.Russell-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-11-07 13:54, Zhao, Yong wrote:
Hi Kent,

This consolidation is a must, because we should not have duplicated it in the first place.

The code is working fine with the duplication. You disagree with duplicating the code in the first place. But that's just your opinion. It's not a must in any objective sense.



The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 are different is because GFX9 is SOC15 where packet formats and doorbell size changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by reusing v8 functions, even though it is there. Furthermore, in my opinion kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, packet formats should stay the same. For kernel queues, we should be able to differentiate it by pre SOC15 or not, and I have an impression that MEC firmware agrees to maintain the kernel queue interface stable across generations most of time.

OK, you're making assumptions about PM4 packets on future ASIC generations. It's true that the transition to SOC15 with 64-bit doorbells and read/write-pointers was particularly disruptive. Your assumption will hold until it gets broken by some other disruptive change.


For now, if you want clear naming, we could call the GFXv7/8 packet manager functions "pre-SOC15" or "legacy" and the GFXv9/10 and future functions "SOC15". This may work for a while. But I suspect at some point something is going to change and we'll need to create a new version for a newer ASIC generation. You already have a small taste of that with the different TBA-enable bit in the MAP_PROCESS packet in GFXv10.


Regards,

  Felix


Regards,
Yong
________________________________
From: Alex Deucher <alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; Russell, Kent <Kent.Russell-5C7GfCeVMHo@public.gmane.org><mailto:Kent.Russell-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32mqWrfYKbYh0A@public.gmane.orgktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32lNgt0PjOBp93rCq3LdnpKM@public.gmane.orgrg><mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org><mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-urvtwAKJhsc@public.gmane.orgm>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling@amd.com>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32mzQB+pC5nmwQ@public.gmane.orgedesktop.org><mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 27977 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 19:40                                         ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-11-07 19:40 UTC (permalink / raw)
  To: Kuehling, Felix, Alex Deucher; +Cc: Russell, Kent, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 13974 bytes --]

Hi Felix,

The code working fine is true except that all not new features after this duplication are broken. If I want to make all GFX10 feature complete, I have to either manually adapt several duplications to the GFX10 file or do this consolidation. From this perspective and ease of my work, it is a must.

Regards,
Yong

________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Thursday, November 7, 2019 2:12 PM
To: Zhao, Yong <Yong.Zhao@amd.com>; Alex Deucher <alexdeucher@gmail.com>
Cc: Russell, Kent <Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-11-07 13:54, Zhao, Yong wrote:
Hi Kent,

This consolidation is a must, because we should not have duplicated it in the first place.

The code is working fine with the duplication. You disagree with duplicating the code in the first place. But that's just your opinion. It's not a must in any objective sense.



The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 are different is because GFX9 is SOC15 where packet formats and doorbell size changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by reusing v8 functions, even though it is there. Furthermore, in my opinion kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, packet formats should stay the same. For kernel queues, we should be able to differentiate it by pre SOC15 or not, and I have an impression that MEC firmware agrees to maintain the kernel queue interface stable across generations most of time.

OK, you're making assumptions about PM4 packets on future ASIC generations. It's true that the transition to SOC15 with 64-bit doorbells and read/write-pointers was particularly disruptive. Your assumption will hold until it gets broken by some other disruptive change.


For now, if you want clear naming, we could call the GFXv7/8 packet manager functions "pre-SOC15" or "legacy" and the GFXv9/10 and future functions "SOC15". This may work for a while. But I suspect at some point something is going to change and we'll need to create a new version for a newer ASIC generation. You already have a small taste of that with the different TBA-enable bit in the MAP_PROCESS packet in GFXv10.


Regards,

  Felix


Regards,
Yong
________________________________
From: Alex Deucher <alexdeucher@gmail.com><mailto:alexdeucher@gmail.com>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Cc: Zhao, Yong <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>; Russell, Kent <Kent.Russell@amd.com><mailto:Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 27068 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 20:31                                             ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07 20:31 UTC (permalink / raw)
  To: Zhao, Yong, Alex Deucher
  Cc: Russell, Kent, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 16210 bytes --]

On 2019-11-07 14:40, Zhao, Yong wrote:
Hi Felix,

The code working fine is true except that all not new features after this duplication are broken. If I want to make all GFX10 feature complete, I have to either manually adapt several duplications to the GFX10 file or do this consolidation. From this perspective and ease of my work, it is a must.

"A must" means there is no alternative. You already listed two alternatives yourself: "either manually adapt several duplications to the GFX10 file or do this consolidation."


In _your_ opinion, the consolidation means less work for _you_. That's _your_ point of view. The discussion in this code review pointed out other points of view. When you take all of them into account, you may reconsider what is less work overall, and what is easier to maintain.


I'm not opposing your change per-se. But I'd like you to consider the whole picture, including the consequences of any design decisions you're making and imposing on anyone working on this code in the future. In this cases I think it's a relatively minor issue and it may just come down to a matter of opinion that I don't feel terribly strongly about.


With that said, the change is

Reviewed-by: Felix Kuehling <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling@amd.com>


Regards,

  Felix


Regards,
Yong

________________________________
From: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-urvtwAKJhsc@public.gmane.orgm>
Sent: Thursday, November 7, 2019 2:12 PM
To: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; Alex Deucher <alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: Russell, Kent <Kent.Russell-5C7GfCeVMHo@public.gmane.org><mailto:Kent.Russell-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-11-07 13:54, Zhao, Yong wrote:
Hi Kent,

This consolidation is a must, because we should not have duplicated it in the first place.

The code is working fine with the duplication. You disagree with duplicating the code in the first place. But that's just your opinion. It's not a must in any objective sense.



The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 are different is because GFX9 is SOC15 where packet formats and doorbell size changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by reusing v8 functions, even though it is there. Furthermore, in my opinion kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, packet formats should stay the same. For kernel queues, we should be able to differentiate it by pre SOC15 or not, and I have an impression that MEC firmware agrees to maintain the kernel queue interface stable across generations most of time.

OK, you're making assumptions about PM4 packets on future ASIC generations. It's true that the transition to SOC15 with 64-bit doorbells and read/write-pointers was particularly disruptive. Your assumption will hold until it gets broken by some other disruptive change.


For now, if you want clear naming, we could call the GFXv7/8 packet manager functions "pre-SOC15" or "legacy" and the GFXv9/10 and future functions "SOC15". This may work for a while. But I suspect at some point something is going to change and we'll need to create a new version for a newer ASIC generation. You already have a small taste of that with the different TBA-enable bit in the MAP_PROCESS packet in GFXv10.


Regards,

  Felix


Regards,
Yong
________________________________
From: Alex Deucher <alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; Russell, Kent <Kent.Russell-5C7GfCeVMHo@public.gmane.org><mailto:Kent.Russell-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32mqWrfYKbYh0A@public.gmane.orgktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32lNgt0PjOBp93rCq3LdnpKM@public.gmane.orgrg><mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org><mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-urvtwAKJhsc@public.gmane.orgm>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling@amd.com>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32mzQB+pC5nmwQ@public.gmane.orgedesktop.org><mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 31340 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 20:31                                             ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07 20:31 UTC (permalink / raw)
  To: Zhao, Yong, Alex Deucher; +Cc: Russell, Kent, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 15176 bytes --]

On 2019-11-07 14:40, Zhao, Yong wrote:
Hi Felix,

The code working fine is true except that all not new features after this duplication are broken. If I want to make all GFX10 feature complete, I have to either manually adapt several duplications to the GFX10 file or do this consolidation. From this perspective and ease of my work, it is a must.

"A must" means there is no alternative. You already listed two alternatives yourself: "either manually adapt several duplications to the GFX10 file or do this consolidation."


In _your_ opinion, the consolidation means less work for _you_. That's _your_ point of view. The discussion in this code review pointed out other points of view. When you take all of them into account, you may reconsider what is less work overall, and what is easier to maintain.


I'm not opposing your change per-se. But I'd like you to consider the whole picture, including the consequences of any design decisions you're making and imposing on anyone working on this code in the future. In this cases I think it's a relatively minor issue and it may just come down to a matter of opinion that I don't feel terribly strongly about.


With that said, the change is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>


Regards,

  Felix


Regards,
Yong

________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Sent: Thursday, November 7, 2019 2:12 PM
To: Zhao, Yong <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>; Alex Deucher <alexdeucher@gmail.com><mailto:alexdeucher@gmail.com>
Cc: Russell, Kent <Kent.Russell@amd.com><mailto:Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-11-07 13:54, Zhao, Yong wrote:
Hi Kent,

This consolidation is a must, because we should not have duplicated it in the first place.

The code is working fine with the duplication. You disagree with duplicating the code in the first place. But that's just your opinion. It's not a must in any objective sense.



The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 are different is because GFX9 is SOC15 where packet formats and doorbell size changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by reusing v8 functions, even though it is there. Furthermore, in my opinion kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, packet formats should stay the same. For kernel queues, we should be able to differentiate it by pre SOC15 or not, and I have an impression that MEC firmware agrees to maintain the kernel queue interface stable across generations most of time.

OK, you're making assumptions about PM4 packets on future ASIC generations. It's true that the transition to SOC15 with 64-bit doorbells and read/write-pointers was particularly disruptive. Your assumption will hold until it gets broken by some other disruptive change.


For now, if you want clear naming, we could call the GFXv7/8 packet manager functions "pre-SOC15" or "legacy" and the GFXv9/10 and future functions "SOC15". This may work for a while. But I suspect at some point something is going to change and we'll need to create a new version for a newer ASIC generation. You already have a small taste of that with the different TBA-enable bit in the MAP_PROCESS packet in GFXv10.


Regards,

  Felix


Regards,
Yong
________________________________
From: Alex Deucher <alexdeucher@gmail.com><mailto:alexdeucher@gmail.com>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Cc: Zhao, Yong <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>; Russell, Kent <Kent.Russell@amd.com><mailto:Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 30240 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 20:37                                                 ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-11-07 20:37 UTC (permalink / raw)
  To: Kuehling, Felix, Alex Deucher
  Cc: Russell, Kent, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 16488 bytes --]

After considering Kent's concern, I actually took the consolidation to the next level where v9 and v10 have no divergence. With that, I think the "mustness" is stronger. Please check out the new patch.


Regards,

Yong


On 2019-11-07 3:31 p.m., Kuehling, Felix wrote:
On 2019-11-07 14:40, Zhao, Yong wrote:
Hi Felix,

The code working fine is true except that all not new features after this duplication are broken. If I want to make all GFX10 feature complete, I have to either manually adapt several duplications to the GFX10 file or do this consolidation. From this perspective and ease of my work, it is a must.

"A must" means there is no alternative. You already listed two alternatives yourself: "either manually adapt several duplications to the GFX10 file or do this consolidation."


In _your_ opinion, the consolidation means less work for _you_. That's _your_ point of view. The discussion in this code review pointed out other points of view. When you take all of them into account, you may reconsider what is less work overall, and what is easier to maintain.


I'm not opposing your change per-se. But I'd like you to consider the whole picture, including the consequences of any design decisions you're making and imposing on anyone working on this code in the future. In this cases I think it's a relatively minor issue and it may just come down to a matter of opinion that I don't feel terribly strongly about.


With that said, the change is

Reviewed-by: Felix Kuehling <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling@amd.com>


Regards,

  Felix


Regards,
Yong

________________________________
From: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-urvtwAKJhsc@public.gmane.orgm>
Sent: Thursday, November 7, 2019 2:12 PM
To: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; Alex Deucher <alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: Russell, Kent <Kent.Russell-5C7GfCeVMHo@public.gmane.org><mailto:Kent.Russell-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-11-07 13:54, Zhao, Yong wrote:
Hi Kent,

This consolidation is a must, because we should not have duplicated it in the first place.

The code is working fine with the duplication. You disagree with duplicating the code in the first place. But that's just your opinion. It's not a must in any objective sense.



The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 are different is because GFX9 is SOC15 where packet formats and doorbell size changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by reusing v8 functions, even though it is there. Furthermore, in my opinion kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, packet formats should stay the same. For kernel queues, we should be able to differentiate it by pre SOC15 or not, and I have an impression that MEC firmware agrees to maintain the kernel queue interface stable across generations most of time.

OK, you're making assumptions about PM4 packets on future ASIC generations. It's true that the transition to SOC15 with 64-bit doorbells and read/write-pointers was particularly disruptive. Your assumption will hold until it gets broken by some other disruptive change.


For now, if you want clear naming, we could call the GFXv7/8 packet manager functions "pre-SOC15" or "legacy" and the GFXv9/10 and future functions "SOC15". This may work for a while. But I suspect at some point something is going to change and we'll need to create a new version for a newer ASIC generation. You already have a small taste of that with the different TBA-enable bit in the MAP_PROCESS packet in GFXv10.


Regards,

  Felix


Regards,
Yong
________________________________
From: Alex Deucher <alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:alexdeucher-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; Russell, Kent <Kent.Russell-5C7GfCeVMHo@public.gmane.org><mailto:Kent.Russell-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32mqWrfYKbYh0A@public.gmane.orgktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32lNgt0PjOBp93rCq3LdnpKM@public.gmane.orgrg><mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org><mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-urvtwAKJhsc@public.gmane.orgm>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling@amd.com>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> <amd-gfx-PD4FTy7X32mzQB+pC5nmwQ@public.gmane.orgedesktop.org><mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao-5C7GfCeVMHo@public.gmane.org><mailto:Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 32080 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 20:37                                                 ` Zhao, Yong
  0 siblings, 0 replies; 36+ messages in thread
From: Zhao, Yong @ 2019-11-07 20:37 UTC (permalink / raw)
  To: Kuehling, Felix, Alex Deucher; +Cc: Russell, Kent, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 15454 bytes --]

After considering Kent's concern, I actually took the consolidation to the next level where v9 and v10 have no divergence. With that, I think the "mustness" is stronger. Please check out the new patch.


Regards,

Yong


On 2019-11-07 3:31 p.m., Kuehling, Felix wrote:
On 2019-11-07 14:40, Zhao, Yong wrote:
Hi Felix,

The code working fine is true except that all not new features after this duplication are broken. If I want to make all GFX10 feature complete, I have to either manually adapt several duplications to the GFX10 file or do this consolidation. From this perspective and ease of my work, it is a must.

"A must" means there is no alternative. You already listed two alternatives yourself: "either manually adapt several duplications to the GFX10 file or do this consolidation."


In _your_ opinion, the consolidation means less work for _you_. That's _your_ point of view. The discussion in this code review pointed out other points of view. When you take all of them into account, you may reconsider what is less work overall, and what is easier to maintain.


I'm not opposing your change per-se. But I'd like you to consider the whole picture, including the consequences of any design decisions you're making and imposing on anyone working on this code in the future. In this cases I think it's a relatively minor issue and it may just come down to a matter of opinion that I don't feel terribly strongly about.


With that said, the change is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>


Regards,

  Felix


Regards,
Yong

________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Sent: Thursday, November 7, 2019 2:12 PM
To: Zhao, Yong <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>; Alex Deucher <alexdeucher@gmail.com><mailto:alexdeucher@gmail.com>
Cc: Russell, Kent <Kent.Russell@amd.com><mailto:Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On 2019-11-07 13:54, Zhao, Yong wrote:
Hi Kent,

This consolidation is a must, because we should not have duplicated it in the first place.

The code is working fine with the duplication. You disagree with duplicating the code in the first place. But that's just your opinion. It's not a must in any objective sense.



The kernel queue functions by design are generic. The reasson why GFX8 and GFX9 are different is because GFX9 is SOC15 where packet formats and doorbell size changed.  On the other hand, kfd_kernel_queue_v7.c file is pretty much empty by reusing v8 functions, even though it is there. Furthermore, in my opinion kfd_kernel_queue_v7.c should be merged into v8 counterpart, From GFX9 onwards, packet formats should stay the same. For kernel queues, we should be able to differentiate it by pre SOC15 or not, and I have an impression that MEC firmware agrees to maintain the kernel queue interface stable across generations most of time.

OK, you're making assumptions about PM4 packets on future ASIC generations. It's true that the transition to SOC15 with 64-bit doorbells and read/write-pointers was particularly disruptive. Your assumption will hold until it gets broken by some other disruptive change.


For now, if you want clear naming, we could call the GFXv7/8 packet manager functions "pre-SOC15" or "legacy" and the GFXv9/10 and future functions "SOC15". This may work for a while. But I suspect at some point something is going to change and we'll need to create a new version for a newer ASIC generation. You already have a small taste of that with the different TBA-enable bit in the MAP_PROCESS packet in GFXv10.


Regards,

  Felix


Regards,
Yong
________________________________
From: Alex Deucher <alexdeucher@gmail.com><mailto:alexdeucher@gmail.com>
Sent: Thursday, November 7, 2019 1:32 PM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Cc: Zhao, Yong <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>; Russell, Kent <Kent.Russell@amd.com><mailto:Kent.Russell@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii

On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com> wrote:
>
> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>
> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>
> This is the price we pay for micro-optimizing minor code duplication.

What we've tried to do in amdgpu is to break out shared code in to
common helpers that are not IP specific and use that in each IP module
(e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
particular chunk of code across multiple generations.  E.g., the uvd
stuff is a good example.  We have shared generic uvd helpers that work
for most UVD IP versions, and then if we need an IP specific version,
we override that in the callbacks with a version specific one.  E.g.,
for the video decode engines we use the generic helpers for a number
of ring functions:

static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_ring_test_ring,
        .test_ib = amdgpu_uvd_ring_test_ib,
        .insert_nop = uvd_v7_0_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

while we override more of them for the video encode engines:

static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
...
    .test_ring = uvd_v7_0_enc_ring_test_ring,
        .test_ib = uvd_v7_0_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
    .insert_end = uvd_v7_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_uvd_ring_begin_use,
        .end_use = amdgpu_uvd_ring_end_use,
...
};

But still maintain IP specific components.

Alex

>
> Regards,
>   Felix
>
> On 2019-11-07 12:39, Zhao, Yong wrote:
>
> Hi Kent,
>
> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>
> Yong
>
> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>
> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>
>
>
> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>
>
>
> Kent
>
>
>
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org><mailto:amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
> Sent: Thursday, November 7, 2019 11:57 AM
> To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> Hi Felix,
>
>
>
> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>
>
>
> Regards,
>
> Yong
>
> ________________________________
>
> From: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
> Sent: Wednesday, November 6, 2019 11:45 PM
> To: Zhao, Yong <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org><mailto:amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>
>
>
> On 2019-10-30 20:17, Zhao, Yong wrote:
> > release_mem won't be used at all on GFX9 and GFX10, so delete it.
>
> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
> either. Why arbitrarily limit this change to GFXv9 and 10?
>
> Regards,
>    Felix
>
> >
> > Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
> > Signed-off-by: Yong Zhao <Yong.Zhao@amd.com><mailto:Yong.Zhao@amd.com>
> > ---
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
> >   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
> >   2 files changed, 4 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > index aed32ab7102e..bfd6221acae9 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> > @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     WARN_ON(!buffer);
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_process                    = pm_map_process_v10,
> >        .runlist                        = pm_runlist_v10,
> > @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
> >        .map_queues                     = pm_map_queues_v10,
> >        .unmap_queues                   = pm_unmap_queues_v10,
> >        .query_status                   = pm_query_status_v10,
> > -     .release_mem                    = pm_release_mem_v10,
> > +     .release_mem                    = NULL,
> >        .map_process_size               = sizeof(struct pm4_mes_map_process),
> >        .runlist_size                   = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size             = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size                = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size              = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size               = 0,
> >   };
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > index 3b5ca2b1d7a6..f0e4910a8865 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> > @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
> >        return 0;
> >   }
> >
> > -
> > -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
> > -{
> > -     struct pm4_mec_release_mem *packet;
> > -
> > -     packet = (struct pm4_mec_release_mem *)buffer;
> > -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
> > -                                     sizeof(struct pm4_mec_release_mem));
> > -
> > -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
> > -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
> > -     packet->bitfields2.tcl1_action_ena = 1;
> > -     packet->bitfields2.tc_action_ena = 1;
> > -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
> > -
> > -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
> > -     packet->bitfields3.int_sel =
> > -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
> > -
> > -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
> > -     packet->address_hi = upper_32_bits(gpu_addr);
> > -
> > -     packet->data_lo = 0;
> > -
> > -     return 0;
> > -}
> > -
> >   const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_process            = pm_map_process_v9,
> >        .runlist                = pm_runlist_v9,
> > @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> >        .map_queues             = pm_map_queues_v9,
> >        .unmap_queues           = pm_unmap_queues_v9,
> >        .query_status           = pm_query_status_v9,
> > -     .release_mem            = pm_release_mem_v9,
> > +     .release_mem            = NULL,
> >        .map_process_size       = sizeof(struct pm4_mes_map_process),
> >        .runlist_size           = sizeof(struct pm4_mes_runlist),
> >        .set_resources_size     = sizeof(struct pm4_mes_set_resources),
> >        .map_queues_size        = sizeof(struct pm4_mes_map_queues),
> >        .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
> >        .query_status_size      = sizeof(struct pm4_mes_query_status),
> > -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
> > +     .release_mem_size       = 0,
> >   };
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 30980 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 20:42                                 ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07 20:42 UTC (permalink / raw)
  To: Alex Deucher
  Cc: Zhao, Yong, Russell, Kent, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2019-11-07 13:32, Alex Deucher wrote:
> On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling@amd.com> wrote:
>> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>>
>> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>>
>> This is the price we pay for micro-optimizing minor code duplication.
> What we've tried to do in amdgpu is to break out shared code in to
> common helpers that are not IP specific and use that in each IP module
> (e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
> particular chunk of code across multiple generations.  E.g., the uvd
> stuff is a good example.  We have shared generic uvd helpers that work
> for most UVD IP versions, and then if we need an IP specific version,
> we override that in the callbacks with a version specific one.  E.g.,
> for the video decode engines we use the generic helpers for a number
> of ring functions:
>
> static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
> ...
>      .test_ring = uvd_v7_0_ring_test_ring,
>          .test_ib = amdgpu_uvd_ring_test_ib,
>          .insert_nop = uvd_v7_0_ring_insert_nop,
>          .pad_ib = amdgpu_ring_generic_pad_ib,
>          .begin_use = amdgpu_uvd_ring_begin_use,
>          .end_use = amdgpu_uvd_ring_end_use,
> ...
> };
>
> while we override more of them for the video encode engines:
>
> static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
> ...
>      .test_ring = uvd_v7_0_enc_ring_test_ring,
>          .test_ib = uvd_v7_0_enc_ring_test_ib,
>          .insert_nop = amdgpu_ring_insert_nop,
>      .insert_end = uvd_v7_0_enc_ring_insert_end,
>          .pad_ib = amdgpu_ring_generic_pad_ib,
>          .begin_use = amdgpu_uvd_ring_begin_use,
>          .end_use = amdgpu_uvd_ring_end_use,
> ...
> };
>
> But still maintain IP specific components.

Thanks Alex. In this case the common code is in kfd_packet_manager.c and 
the IP-version-specific code that writes the actual PM4 packets is in 
the kernel_queue_v*.c files. Yong is trying to merge the PM4 packet 
writing code for v9 and v10 because the packet formats are essentially 
unchanged. It makes the naming conventions in the code a bit meaningless 
because v9 now really means "v9 and v10". Apparently there is precedent 
for this, as we already did the same thing with v7 and v8, which I 
forgot about in my initial code review.

Regards,
   Felix


>
> Alex
>
>> Regards,
>>    Felix
>>
>> On 2019-11-07 12:39, Zhao, Yong wrote:
>>
>> Hi Kent,
>>
>> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>>
>> Yong
>>
>> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>>
>> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>>
>>
>>
>> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>>
>>
>>
>> Kent
>>
>>
>>
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
>> Sent: Thursday, November 7, 2019 11:57 AM
>> To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>>
>>
>>
>> Hi Felix,
>>
>>
>>
>> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>>
>>
>>
>> Regards,
>>
>> Yong
>>
>> ________________________________
>>
>> From: Kuehling, Felix <Felix.Kuehling@amd.com>
>> Sent: Wednesday, November 6, 2019 11:45 PM
>> To: Zhao, Yong <Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
>> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>>
>>
>>
>> On 2019-10-30 20:17, Zhao, Yong wrote:
>>> release_mem won't be used at all on GFX9 and GFX10, so delete it.
>> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
>> either. Why arbitrarily limit this change to GFXv9 and 10?
>>
>> Regards,
>>     Felix
>>
>>> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
>>> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
>>> ---
>>>    .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>>>    .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>>>    2 files changed, 4 insertions(+), 64 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
>>> index aed32ab7102e..bfd6221acae9 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
>>> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>>>         return 0;
>>>    }
>>>
>>> -
>>> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
>>> -{
>>> -     struct pm4_mec_release_mem *packet;
>>> -
>>> -     WARN_ON(!buffer);
>>> -
>>> -     packet = (struct pm4_mec_release_mem *)buffer;
>>> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
>>> -
>>> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
>>> -                                     sizeof(struct pm4_mec_release_mem));
>>> -
>>> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
>>> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
>>> -     packet->bitfields2.tcl1_action_ena = 1;
>>> -     packet->bitfields2.tc_action_ena = 1;
>>> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
>>> -
>>> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
>>> -     packet->bitfields3.int_sel =
>>> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
>>> -
>>> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
>>> -     packet->address_hi = upper_32_bits(gpu_addr);
>>> -
>>> -     packet->data_lo = 0;
>>> -
>>> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
>>> -}
>>> -
>>>    const struct packet_manager_funcs kfd_v10_pm_funcs = {
>>>         .map_process                    = pm_map_process_v10,
>>>         .runlist                        = pm_runlist_v10,
>>> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>>>         .map_queues                     = pm_map_queues_v10,
>>>         .unmap_queues                   = pm_unmap_queues_v10,
>>>         .query_status                   = pm_query_status_v10,
>>> -     .release_mem                    = pm_release_mem_v10,
>>> +     .release_mem                    = NULL,
>>>         .map_process_size               = sizeof(struct pm4_mes_map_process),
>>>         .runlist_size                   = sizeof(struct pm4_mes_runlist),
>>>         .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>>>         .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>>>         .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>>>         .query_status_size              = sizeof(struct pm4_mes_query_status),
>>> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
>>> +     .release_mem_size               = 0,
>>>    };
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>>> index 3b5ca2b1d7a6..f0e4910a8865 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>>> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>>>         return 0;
>>>    }
>>>
>>> -
>>> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
>>> -{
>>> -     struct pm4_mec_release_mem *packet;
>>> -
>>> -     packet = (struct pm4_mec_release_mem *)buffer;
>>> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
>>> -
>>> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
>>> -                                     sizeof(struct pm4_mec_release_mem));
>>> -
>>> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
>>> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
>>> -     packet->bitfields2.tcl1_action_ena = 1;
>>> -     packet->bitfields2.tc_action_ena = 1;
>>> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
>>> -
>>> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
>>> -     packet->bitfields3.int_sel =
>>> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
>>> -
>>> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
>>> -     packet->address_hi = upper_32_bits(gpu_addr);
>>> -
>>> -     packet->data_lo = 0;
>>> -
>>> -     return 0;
>>> -}
>>> -
>>>    const struct packet_manager_funcs kfd_v9_pm_funcs = {
>>>         .map_process            = pm_map_process_v9,
>>>         .runlist                = pm_runlist_v9,
>>> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>>>         .map_queues             = pm_map_queues_v9,
>>>         .unmap_queues           = pm_unmap_queues_v9,
>>>         .query_status           = pm_query_status_v9,
>>> -     .release_mem            = pm_release_mem_v9,
>>> +     .release_mem            = NULL,
>>>         .map_process_size       = sizeof(struct pm4_mes_map_process),
>>>         .runlist_size           = sizeof(struct pm4_mes_runlist),
>>>         .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>>>         .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>>>         .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>>>         .query_status_size      = sizeof(struct pm4_mes_query_status),
>>> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
>>> +     .release_mem_size       = 0,
>>>    };
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
@ 2019-11-07 20:42                                 ` Kuehling, Felix
  0 siblings, 0 replies; 36+ messages in thread
From: Kuehling, Felix @ 2019-11-07 20:42 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Zhao, Yong, Russell, Kent, amd-gfx

On 2019-11-07 13:32, Alex Deucher wrote:
> On Thu, Nov 7, 2019 at 12:47 PM Kuehling, Felix <Felix.Kuehling@amd.com> wrote:
>> No, please lets not add a new nomenclature for PM4 packet versions. GFX versions are agreed on between hardware, firmware, and software and it's generally understood what they mean. If we add a new PM4 packet versioning scheme on our own, then this will add a lot of confusion when talking to firmware teams.
>>
>> You know, this would all be more straight forward if we accepted a little bit of code duplication and had packet writing functions per GFX version. You'll see this pattern a lot in the amdgpu driver where each IP version duplicates a bunch of code. In many cases you may be able to save a few lines of code by sharing functions between IP versions. But you'll add some confusion and burden on future maintenance.
>>
>> This is the price we pay for micro-optimizing minor code duplication.
> What we've tried to do in amdgpu is to break out shared code in to
> common helpers that are not IP specific and use that in each IP module
> (e.g., amdgpu_uvd.c amdgpu_gfx.c, etc.).  Sometimes we can use a
> particular chunk of code across multiple generations.  E.g., the uvd
> stuff is a good example.  We have shared generic uvd helpers that work
> for most UVD IP versions, and then if we need an IP specific version,
> we override that in the callbacks with a version specific one.  E.g.,
> for the video decode engines we use the generic helpers for a number
> of ring functions:
>
> static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
> ...
>      .test_ring = uvd_v7_0_ring_test_ring,
>          .test_ib = amdgpu_uvd_ring_test_ib,
>          .insert_nop = uvd_v7_0_ring_insert_nop,
>          .pad_ib = amdgpu_ring_generic_pad_ib,
>          .begin_use = amdgpu_uvd_ring_begin_use,
>          .end_use = amdgpu_uvd_ring_end_use,
> ...
> };
>
> while we override more of them for the video encode engines:
>
> static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
> ...
>      .test_ring = uvd_v7_0_enc_ring_test_ring,
>          .test_ib = uvd_v7_0_enc_ring_test_ib,
>          .insert_nop = amdgpu_ring_insert_nop,
>      .insert_end = uvd_v7_0_enc_ring_insert_end,
>          .pad_ib = amdgpu_ring_generic_pad_ib,
>          .begin_use = amdgpu_uvd_ring_begin_use,
>          .end_use = amdgpu_uvd_ring_end_use,
> ...
> };
>
> But still maintain IP specific components.

Thanks Alex. In this case the common code is in kfd_packet_manager.c and 
the IP-version-specific code that writes the actual PM4 packets is in 
the kernel_queue_v*.c files. Yong is trying to merge the PM4 packet 
writing code for v9 and v10 because the packet formats are essentially 
unchanged. It makes the naming conventions in the code a bit meaningless 
because v9 now really means "v9 and v10". Apparently there is precedent 
for this, as we already did the same thing with v7 and v8, which I 
forgot about in my initial code review.

Regards,
   Felix


>
> Alex
>
>> Regards,
>>    Felix
>>
>> On 2019-11-07 12:39, Zhao, Yong wrote:
>>
>> Hi Kent,
>>
>> I can't agree more on this. Also, the same applies to the file names. Definitely we need to agree on the naming scheme before making it happen.
>>
>> Yong
>>
>> On 2019-11-07 12:33 p.m., Russell, Kent wrote:
>>
>> I think that the versioning is getting a little confusing since we’re using the old GFX versions, but not really sticking to it due to the shareability of certain managers and shaders. Could we look into doing something like gen1 or gen2, or some other more ambiguous non-GFX-related versioning? Otherwise we’ll keep having these questions of “why is Hawaii GFX8”, “why is Arcturus GFX9”, etc. Then if things change, we just up the value concretely, instead of maybe doing a v11 if GFX11 changes things, and only GFX11 ASICs use those functions/variables.
>>
>>
>>
>> Obviously not high-priority, but maybe something to consider as you continue to consolidate and remove duplicate code.
>>
>>
>>
>> Kent
>>
>>
>>
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhao, Yong
>> Sent: Thursday, November 7, 2019 11:57 AM
>> To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>>
>>
>>
>> Hi Felix,
>>
>>
>>
>> That's because v8 and v7 share the same packet_manager_funcs. In this case, it is better to keep it as it is.
>>
>>
>>
>> Regards,
>>
>> Yong
>>
>> ________________________________
>>
>> From: Kuehling, Felix <Felix.Kuehling@amd.com>
>> Sent: Wednesday, November 6, 2019 11:45 PM
>> To: Zhao, Yong <Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
>> Subject: Re: [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii
>>
>>
>>
>> On 2019-10-30 20:17, Zhao, Yong wrote:
>>> release_mem won't be used at all on GFX9 and GFX10, so delete it.
>> Hawaii was GFXv7. So we're not using the release_mem packet on GFXv8
>> either. Why arbitrarily limit this change to GFXv9 and 10?
>>
>> Regards,
>>     Felix
>>
>>> Change-Id: I13787a8a29b83e7516c582a7401f2e14721edf5f
>>> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
>>> ---
>>>    .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 35 ++-----------------
>>>    .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 33 ++---------------
>>>    2 files changed, 4 insertions(+), 64 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
>>> index aed32ab7102e..bfd6221acae9 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
>>> @@ -298,37 +298,6 @@ static int pm_query_status_v10(struct packet_manager *pm, uint32_t *buffer,
>>>         return 0;
>>>    }
>>>
>>> -
>>> -static int pm_release_mem_v10(uint64_t gpu_addr, uint32_t *buffer)
>>> -{
>>> -     struct pm4_mec_release_mem *packet;
>>> -
>>> -     WARN_ON(!buffer);
>>> -
>>> -     packet = (struct pm4_mec_release_mem *)buffer;
>>> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
>>> -
>>> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
>>> -                                     sizeof(struct pm4_mec_release_mem));
>>> -
>>> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
>>> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
>>> -     packet->bitfields2.tcl1_action_ena = 1;
>>> -     packet->bitfields2.tc_action_ena = 1;
>>> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
>>> -
>>> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
>>> -     packet->bitfields3.int_sel =
>>> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
>>> -
>>> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
>>> -     packet->address_hi = upper_32_bits(gpu_addr);
>>> -
>>> -     packet->data_lo = 0;
>>> -
>>> -     return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int);
>>> -}
>>> -
>>>    const struct packet_manager_funcs kfd_v10_pm_funcs = {
>>>         .map_process                    = pm_map_process_v10,
>>>         .runlist                        = pm_runlist_v10,
>>> @@ -336,13 +305,13 @@ const struct packet_manager_funcs kfd_v10_pm_funcs = {
>>>         .map_queues                     = pm_map_queues_v10,
>>>         .unmap_queues                   = pm_unmap_queues_v10,
>>>         .query_status                   = pm_query_status_v10,
>>> -     .release_mem                    = pm_release_mem_v10,
>>> +     .release_mem                    = NULL,
>>>         .map_process_size               = sizeof(struct pm4_mes_map_process),
>>>         .runlist_size                   = sizeof(struct pm4_mes_runlist),
>>>         .set_resources_size             = sizeof(struct pm4_mes_set_resources),
>>>         .map_queues_size                = sizeof(struct pm4_mes_map_queues),
>>>         .unmap_queues_size              = sizeof(struct pm4_mes_unmap_queues),
>>>         .query_status_size              = sizeof(struct pm4_mes_query_status),
>>> -     .release_mem_size               = sizeof(struct pm4_mec_release_mem)
>>> +     .release_mem_size               = 0,
>>>    };
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>>> index 3b5ca2b1d7a6..f0e4910a8865 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
>>> @@ -336,35 +336,6 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
>>>         return 0;
>>>    }
>>>
>>> -
>>> -static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
>>> -{
>>> -     struct pm4_mec_release_mem *packet;
>>> -
>>> -     packet = (struct pm4_mec_release_mem *)buffer;
>>> -     memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
>>> -
>>> -     packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
>>> -                                     sizeof(struct pm4_mec_release_mem));
>>> -
>>> -     packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
>>> -     packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
>>> -     packet->bitfields2.tcl1_action_ena = 1;
>>> -     packet->bitfields2.tc_action_ena = 1;
>>> -     packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
>>> -
>>> -     packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
>>> -     packet->bitfields3.int_sel =
>>> -             int_sel__mec_release_mem__send_interrupt_after_write_confirm;
>>> -
>>> -     packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
>>> -     packet->address_hi = upper_32_bits(gpu_addr);
>>> -
>>> -     packet->data_lo = 0;
>>> -
>>> -     return 0;
>>> -}
>>> -
>>>    const struct packet_manager_funcs kfd_v9_pm_funcs = {
>>>         .map_process            = pm_map_process_v9,
>>>         .runlist                = pm_runlist_v9,
>>> @@ -372,12 +343,12 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>>>         .map_queues             = pm_map_queues_v9,
>>>         .unmap_queues           = pm_unmap_queues_v9,
>>>         .query_status           = pm_query_status_v9,
>>> -     .release_mem            = pm_release_mem_v9,
>>> +     .release_mem            = NULL,
>>>         .map_process_size       = sizeof(struct pm4_mes_map_process),
>>>         .runlist_size           = sizeof(struct pm4_mes_runlist),
>>>         .set_resources_size     = sizeof(struct pm4_mes_set_resources),
>>>         .map_queues_size        = sizeof(struct pm4_mes_map_queues),
>>>         .unmap_queues_size      = sizeof(struct pm4_mes_unmap_queues),
>>>         .query_status_size      = sizeof(struct pm4_mes_query_status),
>>> -     .release_mem_size       = sizeof(struct pm4_mec_release_mem)
>>> +     .release_mem_size       = 0,
>>>    };
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2019-11-07 20:42 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-31  0:17 [PATCH 1/3] drm/amdkfd: Adjust function sequences to avoid unnecessary declarations Zhao, Yong
2019-10-31  0:17 ` Zhao, Yong
     [not found] ` <20191031001739.10764-1-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
2019-10-31  0:17   ` [PATCH 2/3] drm/amdkfd: only keep release_mem function for Hawaii Zhao, Yong
2019-10-31  0:17     ` Zhao, Yong
     [not found]     ` <20191031001739.10764-2-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
2019-11-07  4:45       ` Kuehling, Felix
2019-11-07  4:45         ` Kuehling, Felix
     [not found]         ` <31102345-20f7-1b0e-dd2a-f7984b253a07-5C7GfCeVMHo@public.gmane.org>
2019-11-07 16:57           ` Zhao, Yong
2019-11-07 16:57             ` Zhao, Yong
     [not found]             ` <DM6PR12MB2778C141CFEA0006DA2CEC6DF0780-lmeGfMZKVrFSet88YzIdmgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-07 17:33               ` Russell, Kent
2019-11-07 17:33                 ` Russell, Kent
     [not found]                 ` <CY4PR1201MB2533087B346C4619F461FF3185780-1iTaO6aE1DCmssYNuJz6EmrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2019-11-07 17:39                   ` Zhao, Yong
2019-11-07 17:39                     ` Zhao, Yong
     [not found]                     ` <41a6b535-ab7a-44f4-2899-f211f5ddc296-5C7GfCeVMHo@public.gmane.org>
2019-11-07 17:47                       ` Kuehling, Felix
2019-11-07 17:47                         ` Kuehling, Felix
     [not found]                         ` <85fd399c-5fa9-bdef-cdf6-433e80ba6407-5C7GfCeVMHo@public.gmane.org>
2019-11-07 18:17                           ` Russell, Kent
2019-11-07 18:17                             ` Russell, Kent
2019-11-07 18:32                           ` Alex Deucher
2019-11-07 18:32                             ` Alex Deucher
     [not found]                             ` <CADnq5_Og4AWKdYFpa8hvmd2XXjr1Wzb_a26gqsq1H6c7NkWmMA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-11-07 18:54                               ` Zhao, Yong
2019-11-07 18:54                                 ` Zhao, Yong
     [not found]                                 ` <DM6PR12MB2778007D58A5332F6A030DA3F0780-lmeGfMZKVrFSet88YzIdmgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-07 19:12                                   ` Kuehling, Felix
2019-11-07 19:12                                     ` Kuehling, Felix
     [not found]                                     ` <b4a76aeb-4d2c-c348-01bd-a40124930241-5C7GfCeVMHo@public.gmane.org>
2019-11-07 19:40                                       ` Zhao, Yong
2019-11-07 19:40                                         ` Zhao, Yong
     [not found]                                         ` <DM6PR12MB2778CD10620AEDD371AC088EF0780-lmeGfMZKVrFSet88YzIdmgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-07 20:31                                           ` Kuehling, Felix
2019-11-07 20:31                                             ` Kuehling, Felix
     [not found]                                             ` <6e021a24-eeb0-e1e1-d467-87ab48a6c7bc-5C7GfCeVMHo@public.gmane.org>
2019-11-07 20:37                                               ` Zhao, Yong
2019-11-07 20:37                                                 ` Zhao, Yong
2019-11-07 20:42                               ` Kuehling, Felix
2019-11-07 20:42                                 ` Kuehling, Felix
2019-10-31  0:17   ` [PATCH 3/3] drm/amdkfd: Use kernel queue v9 functions for v10 Zhao, Yong
2019-10-31  0:17     ` Zhao, Yong
     [not found]     ` <20191031001739.10764-3-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
2019-11-07  4:52       ` Kuehling, Felix
2019-11-07  4:52         ` Kuehling, Felix
2019-11-07  4:45   ` [PATCH 1/3] drm/amdkfd: Adjust function sequences to avoid unnecessary declarations Kuehling, Felix
2019-11-07  4:45     ` Kuehling, Felix

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.