All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/amdkfd: Move the control stack on GFX10 to userspace buffer
@ 2019-09-26 18:38 Zhao, Yong
       [not found] ` <20190926183826.18813-1-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Zhao, Yong @ 2019-09-26 18:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Zhao, Yong

The GFX10 does not require the control stack to be right after mqd
buffer any more, so move it back to usersapce allocated CSWR buffer.

Change-Id: I446c9685549a09ac8846a42ee22d86cfb93fd98c
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 37 ++-----------------
 1 file changed, 4 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 29d50d6af9d7..e2fb76247f47 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -69,35 +69,13 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
 		struct queue_properties *q)
 {
-	int retval;
-	struct kfd_mem_obj *mqd_mem_obj = NULL;
+	struct kfd_mem_obj *mqd_mem_obj;
 
-	/* From V9,  for CWSR, the control stack is located on the next page
-	 * boundary after the mqd, we will use the gtt allocation function
-	 * instead of sub-allocation function.
-	 */
-	if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
-		mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
-		if (!mqd_mem_obj)
-			return NULL;
-		retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
-			ALIGN(q->ctl_stack_size, PAGE_SIZE) +
-				ALIGN(sizeof(struct v10_compute_mqd), PAGE_SIZE),
-			&(mqd_mem_obj->gtt_mem),
-			&(mqd_mem_obj->gpu_addr),
-			(void *)&(mqd_mem_obj->cpu_ptr), true);
-	} else {
-		retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
-				&mqd_mem_obj);
-	}
-
-	if (retval) {
-		kfree(mqd_mem_obj);
+	if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
+			&mqd_mem_obj))
 		return NULL;
-	}
 
 	return mqd_mem_obj;
-
 }
 
 static void init_mqd(struct mqd_manager *mm, void **mqd,
@@ -250,14 +228,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
 static void free_mqd(struct mqd_manager *mm, void *mqd,
 			struct kfd_mem_obj *mqd_mem_obj)
 {
-	struct kfd_dev *kfd = mm->dev;
-
-	if (mqd_mem_obj->gtt_mem) {
-		amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
-		kfree(mqd_mem_obj);
-	} else {
-		kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-	}
+	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
 }
 
 static bool is_occupied(struct mqd_manager *mm, void *mqd,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/6] drm/amdkfd: Delete unused defines
       [not found] ` <20190926183826.18813-1-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
@ 2019-09-26 18:38   ` Zhao, Yong
  2019-09-26 18:38   ` [PATCH 3/6] drm/amdkfd: Use hex print format for pasid Zhao, Yong
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Zhao, Yong @ 2019-09-26 18:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Zhao, Yong

They are not used anywhere.

Change-Id: Ieba4f57760f0c45f24e54629245cae419b8ff157
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 49cd946bf8b4..f8f8d6fe8b52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -52,9 +52,6 @@
 #include "gmc_v9_0.h"
 
 
-#define V9_PIPE_PER_MEC		(4)
-#define V9_QUEUES_PER_PIPE_MEC	(8)
-
 enum hqd_dequeue_request_type {
 	NO_ACTION = 0,
 	DRAIN_PIPE,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/6] drm/amdkfd: Use hex print format for pasid
       [not found] ` <20190926183826.18813-1-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
  2019-09-26 18:38   ` [PATCH 2/6] drm/amdkfd: Delete unused defines Zhao, Yong
@ 2019-09-26 18:38   ` Zhao, Yong
  2019-09-26 18:38   ` [PATCH 4/6] drm/amdkfd: Record vmid pasid mapping in the driver Zhao, Yong
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Zhao, Yong @ 2019-09-26 18:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Zhao, Yong

Since KFD pasid starts from 0x8000 (32768 in decimal), it is better
perceived as a hex number. Meanwhile, change the pasid type from
unsigned int to uint16_t to be consistent throughout the code.

Change-Id: I565fe39f69e782749a697f18545775354c7a89f8
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 12 +++++------
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c       |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c       |  8 ++++----
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 12 +++++------
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c     |  8 ++++----
 drivers/gpu/drm/amd/amdkfd/kfd_events.c       | 12 +++++------
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c        |  6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 20 +++++++++----------
 .../amd/amdkfd/kfd_process_queue_manager.c    |  6 +++---
 10 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e5ff772862cd..106d45ae7c9b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -301,7 +301,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 		goto err_bind_process;
 	}
 
-	pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
+	pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
 			p->pasid,
 			dev->id);
 
@@ -351,7 +351,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
 	int retval;
 	struct kfd_ioctl_destroy_queue_args *args = data;
 
-	pr_debug("Destroying queue id %d for pasid %d\n",
+	pr_debug("Destroying queue id %d for pasid 0x%x\n",
 				args->queue_id,
 				p->pasid);
 
@@ -397,7 +397,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
 	properties.queue_percent = args->queue_percentage;
 	properties.priority = args->queue_priority;
 
-	pr_debug("Updating queue id %d for pasid %d\n",
+	pr_debug("Updating queue id %d for pasid 0x%x\n",
 			args->queue_id, p->pasid);
 
 	mutex_lock(&p->mutex);
@@ -854,7 +854,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
 	struct kfd_process_device_apertures *pAperture;
 	struct kfd_process_device *pdd;
 
-	dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 
 	args->num_of_nodes = 0;
 
@@ -912,7 +912,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
 	uint32_t nodes = 0;
 	int ret;
 
-	dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 
 	if (args->num_of_nodes == 0) {
 		/* Return number of nodes, so that user space can alloacate
@@ -3063,7 +3063,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	pr_debug("Process %d mapping mmio page\n"
+	pr_debug("pasid 0x%x mapping mmio page\n"
 		 "     target user address == 0x%08llX\n"
 		 "     physical address    == 0x%08llX\n"
 		 "     vm_flags            == 0x%04lX\n"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 3635e0b4b3b7..492951cad143 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -800,7 +800,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 				(dev->kgd, vmid)) {
 			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
 					(dev->kgd, vmid) == p->pasid) {
-				pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
+				pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
 						vmid, p->pasid);
 				break;
 			}
@@ -808,7 +808,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 	}
 
 	if (vmid > last_vmid_to_scan) {
-		pr_err("Didn't find vmid for pasid %d\n", p->pasid);
+		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
 		return -EFAULT;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
index 9d4af961c5d1..9bfa50633654 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -96,7 +96,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
 long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
 {
 	if (pmgr->pasid != 0) {
-		pr_debug("H/W debugger is already active using pasid %d\n",
+		pr_debug("H/W debugger is already active using pasid 0x%x\n",
 				pmgr->pasid);
 		return -EBUSY;
 	}
@@ -117,7 +117,7 @@ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
 {
 	/* Is the requests coming from the already registered process? */
 	if (pmgr->pasid != p->pasid) {
-		pr_debug("H/W debugger is not registered by calling pasid %d\n",
+		pr_debug("H/W debugger is not registered by calling pasid 0x%x\n",
 				p->pasid);
 		return -EINVAL;
 	}
@@ -134,7 +134,7 @@ long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
 {
 	/* Is the requests coming from the already registered process? */
 	if (pmgr->pasid != wac_info->process->pasid) {
-		pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+		pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
 				wac_info->process->pasid);
 		return -EINVAL;
 	}
@@ -147,7 +147,7 @@ long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
 {
 	/* Is the requests coming from the already registered process? */
 	if (pmgr->pasid != adw_info->process->pasid) {
-		pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+		pr_debug("H/W debugger support was not registered for requester pasid 0x%x\n",
 				adw_info->process->pasid);
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 3722431cce79..e7f0a32e0e44 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -630,7 +630,7 @@ static int suspend_single_queue(struct device_queue_manager *dqm,
 {
 	int retval = 0;
 
-	pr_debug("Suspending PASID %u queue [%i]\n",
+	pr_debug("Suspending PASID 0x%x queue [%i]\n",
 			pdd->process->pasid,
 			q->properties.queue_id);
 
@@ -667,7 +667,7 @@ static int resume_single_queue(struct device_queue_manager *dqm,
 	/* Retrieve PD base */
 	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
 
-	pr_debug("Restoring from suspend PASID %u queue [%i]\n",
+	pr_debug("Restoring from suspend PASID 0x%x queue [%i]\n",
 			    pdd->process->pasid,
 			    q->properties.queue_id);
 
@@ -697,7 +697,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
 		goto out;
 
 	pdd = qpd_to_pdd(qpd);
-	pr_info_ratelimited("Evicting PASID %u queues\n",
+	pr_info_ratelimited("Evicting PASID 0x%x queues\n",
 			    pdd->process->pasid);
 
 	/* Mark all queues as evicted. Deactivate all active queues on
@@ -743,7 +743,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
 		goto out;
 
 	pdd = qpd_to_pdd(qpd);
-	pr_info_ratelimited("Evicting PASID %u queues\n",
+	pr_info_ratelimited("Evicting PASID 0x%x queues\n",
 			    pdd->process->pasid);
 
 	/* Mark all queues as evicted. Deactivate all active queues on
@@ -794,7 +794,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 		goto out;
 	}
 
-	pr_info_ratelimited("Restoring PASID %u queues\n",
+	pr_info_ratelimited("Restoring PASID 0x%x queues\n",
 			    pdd->process->pasid);
 
 	/* Update PD Base in QPD */
@@ -870,7 +870,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
 		goto out;
 	}
 
-	pr_info_ratelimited("Restoring PASID %u queues\n",
+	pr_info_ratelimited("Restoring PASID 0x%x queues\n",
 			    pdd->process->pasid);
 
 	/* Update PD Base in QPD */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 6ad0a7bc5175..5effcff09e9e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -158,7 +158,7 @@ static vm_fault_t kfd_doorbell_vm_fault(struct vm_fault *vmf)
 	if (!pdd)
 		return VM_FAULT_SIGBUS;
 
-	pr_debug("Process %d doorbell vm page fault\n", pdd->process->pasid);
+	pr_debug("pasid 0x%x doorbell vm page fault\n", pdd->process->pasid);
 
 	kfd_process_remap_doorbells_locked(pdd->process);
 
@@ -220,12 +220,12 @@ int kfd_doorbell_remap(struct kfd_process_device *pdd)
 	vma = pdd->qpd.doorbell_vma;
 	size = kfd_doorbell_process_slice(pdd->dev);
 
-	pr_debug("Process %d remap doorbell 0x%lx\n", process->pasid,
+	pr_debug("pasid 0x%x remap doorbell 0x%lx\n", process->pasid,
 		vma->vm_start);
 
 	ret = vm_iomap_memory(vma, address, size);
 	if (ret)
-		pr_err("Process %d failed to remap doorbell 0x%lx\n",
+		pr_err("pasid 0x%x failed to remap doorbell 0x%lx\n",
 			process->pasid, vma->vm_start);
 
 out_unlock:
@@ -257,7 +257,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	pr_debug("Process %d mapping doorbell page\n"
+	pr_debug("pasid 0x%x mapping doorbell page\n"
 		 "     target user address == 0x%08llX\n"
 		 "     physical address    == 0x%08llX\n"
 		 "     vm_flags            == 0x%04lX\n"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 38a99ba91a88..6baf78c9245f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -852,8 +852,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
 
 	if (type == KFD_EVENT_TYPE_MEMORY) {
 		dev_warn(kfd_device,
-			"Sending SIGSEGV to HSA Process with PID %d ",
-				p->lead_thread->pid);
+			"Sending SIGSEGV to process %d (pasid 0x%x)",
+				p->lead_thread->pid, p->pasid);
 		send_sig(SIGSEGV, p->lead_thread, 0);
 	}
 
@@ -861,13 +861,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
 	if (send_signal) {
 		if (send_sigterm) {
 			dev_warn(kfd_device,
-				"Sending SIGTERM to HSA Process with PID %d ",
-					p->lead_thread->pid);
+				"Sending SIGTERM to process %d (pasid 0x%x)",
+					p->lead_thread->pid, p->pasid);
 			send_sig(SIGTERM, p->lead_thread, 0);
 		} else {
 			dev_err(kfd_device,
-				"HSA Process (PID %d) got unhandled exception",
-				p->lead_thread->pid);
+				"Process %d (pasid 0x%x) got unhandled exception",
+				p->lead_thread->pid, p->pasid);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 5f35df23fb18..193e2835bd4d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -160,7 +160,7 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
 	if (!p)
 		return;
 
-	pr_debug("Unbinding process %d from IOMMU\n", pasid);
+	pr_debug("Unbinding process 0x%x from IOMMU\n", pasid);
 
 	mutex_lock(kfd_get_dbgmgr_mutex());
 
@@ -194,7 +194,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
 	struct kfd_dev *dev;
 
 	dev_warn_ratelimited(kfd_device,
-			"Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X",
+			"Invalid PPR device %x:%x.%x pasid 0x%x address 0x%lX flags 0x%X",
 			PCI_BUS_NUM(pdev->devfn),
 			PCI_SLOT(pdev->devfn),
 			PCI_FUNC(pdev->devfn),
@@ -235,7 +235,7 @@ static int kfd_bind_processes_to_device(struct kfd_dev *kfd)
 		err = amd_iommu_bind_pasid(kfd->pdev, p->pasid,
 				p->lead_thread);
 		if (err < 0) {
-			pr_err("Unexpected pasid %d binding failure\n",
+			pr_err("Unexpected pasid 0x%x binding failure\n",
 					p->pasid);
 			mutex_unlock(&p->mutex);
 			break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 437f94309489..0d2c7fa1fa46 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -812,7 +812,7 @@ struct kfd_process {
 	/* Use for delayed freeing of kfd_process structure */
 	struct rcu_head	rcu;
 
-	unsigned int pasid;
+	uint16_t pasid;
 	unsigned int doorbell_index;
 
 	/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 5d410c7d15a4..ce5f5d3c9d54 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -447,7 +447,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 
 	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
 				 per_device_list) {
-		pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n",
+		pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
 				pdd->dev->id, p->pasid);
 
 		if (pdd->drm_file) {
@@ -1280,7 +1280,7 @@ static void evict_process_worker(struct work_struct *work)
 
 	p->last_evict_timestamp = get_jiffies_64();
 
-	pr_info("Started evicting pasid %d\n", p->pasid);
+	pr_info("Started evicting pasid 0x%x\n", p->pasid);
 	ret = kfd_process_evict_queues(p);
 	if (!ret) {
 		dma_fence_signal(p->ef);
@@ -1293,9 +1293,9 @@ static void evict_process_worker(struct work_struct *work)
 			pr_debug("Process %d queues idle, doorbell unmapped\n",
 				p->pasid);
 
-		pr_info("Finished evicting pasid %d\n", p->pasid);
+		pr_info("Finished evicting pasid 0x%x\n", p->pasid);
 	} else
-		pr_err("Failed to evict queues of pasid %d\n", p->pasid);
+		pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
 	trace_kfd_evict_process_worker_end(p, ret ? "Failed" : "Success");
 }
 
@@ -1312,7 +1312,7 @@ static void restore_process_worker(struct work_struct *work)
 	 */
 	p = container_of(dwork, struct kfd_process, restore_work);
 	trace_kfd_restore_process_worker_start(p);
-	pr_info("Started restoring pasid %d\n", p->pasid);
+	pr_info("Started restoring pasid 0x%x\n", p->pasid);
 
 	/* Setting last_restore_timestamp before successful restoration.
 	 * Otherwise this would have to be set by KGD (restore_process_bos)
@@ -1328,7 +1328,7 @@ static void restore_process_worker(struct work_struct *work)
 	ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
 						     &p->ef);
 	if (ret) {
-		pr_info("Failed to restore BOs of pasid %d, retry after %d ms\n",
+		pr_info("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
 			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
 
 		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
@@ -1343,9 +1343,9 @@ static void restore_process_worker(struct work_struct *work)
 	ret = kfd_process_restore_queues(p);
 	trace_kfd_restore_process_worker_end(p,	ret ? "Failed" : "Success");
 	if (!ret)
-		pr_info("Finished restoring pasid %d\n", p->pasid);
+		pr_info("Finished restoring pasid 0x%x\n", p->pasid);
 	else
-		pr_err("Failed to restore queues of pasid %d\n", p->pasid);
+		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
 }
 
 void kfd_suspend_all_processes(void)
@@ -1359,7 +1359,7 @@ void kfd_suspend_all_processes(void)
 		cancel_delayed_work_sync(&p->restore_work);
 
 		if (kfd_process_evict_queues(p))
-			pr_err("Failed to suspend process %d\n", p->pasid);
+			pr_err("Failed to suspend process 0x%x\n", p->pasid);
 		dma_fence_signal(p->ef);
 		dma_fence_put(p->ef);
 		p->ef = NULL;
@@ -1442,7 +1442,7 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
 	int idx = srcu_read_lock(&kfd_processes_srcu);
 
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-		seq_printf(m, "Process %d PASID %d:\n",
+		seq_printf(m, "Process %d PASID 0x%x:\n",
 			   p->lead_thread->tgid, p->pasid);
 
 		mutex_lock(&p->mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index a594945097a3..227fb0ec8115 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -53,7 +53,7 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
 	pr_debug("The new slot id %lu\n", found);
 
 	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
-		pr_info("Cannot open more queues for process with pasid %d\n",
+		pr_info("Cannot open more queues for process with pasid 0x%x\n",
 				pqm->process->pasid);
 		return -ENOMEM;
 	}
@@ -298,7 +298,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	}
 
 	if (retval != 0) {
-		pr_err("Pasid %d DQM create queue %d failed. ret %d\n",
+		pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n",
 			pqm->process->pasid, type, retval);
 		goto err_create_queue;
 	}
@@ -378,7 +378,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 		dqm = pqn->q->device->dqm;
 		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
 		if (retval) {
-			pr_err("Pasid %d destroy queue %d failed, ret %d\n",
+			pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
 				pqm->process->pasid,
 				pqn->q->properties.queue_id, retval);
 			if (retval != -ETIME)
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/6] drm/amdkfd: Record vmid pasid mapping in the driver
       [not found] ` <20190926183826.18813-1-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
  2019-09-26 18:38   ` [PATCH 2/6] drm/amdkfd: Delete unused defines Zhao, Yong
  2019-09-26 18:38   ` [PATCH 3/6] drm/amdkfd: Use hex print format for pasid Zhao, Yong
@ 2019-09-26 18:38   ` Zhao, Yong
       [not found]     ` <20190926183826.18813-4-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
  2019-09-26 18:38   ` [PATCH 5/6] drm/amdkfd: Query vmid pasid mapping through stored info Zhao, Yong
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 10+ messages in thread
From: Zhao, Yong @ 2019-09-26 18:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Zhao, Yong

This makes possible the vmid pasid mapping query through software.

Change-Id: Ib539aae277a227cc39f6469ae23c46c4d289b87b
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 34 +++++++++++++------
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 +-
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e7f0a32e0e44..d006adefef55 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -224,20 +224,30 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 			struct qcm_process_device *qpd,
 			struct queue *q)
 {
-	int bit, allocated_vmid;
+	int idx = -1, allocated_vmid, i;
 
-	if (dqm->vmid_bitmap == 0)
+	for (i = 0; i < dqm->dev->vm_info.vmid_num_kfd; i++) {
+		if (!dqm->vmid_pasid[i]) {
+			idx = i;
+			break;
+		}
+	}
+
+	if (idx < 0) {
+		pr_err("no more vmid to allocate\n");
 		return -ENOMEM;
+	}
+
+	dqm->vmid_pasid[idx] = q->process->pasid;
 
-	bit = ffs(dqm->vmid_bitmap) - 1;
-	dqm->vmid_bitmap &= ~(1 << bit);
+	allocated_vmid = idx + dqm->dev->vm_info.first_vmid_kfd;
+	pr_debug("vmid allocated: %d\n", allocated_vmid);
+
+	set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
 
-	allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
-	pr_debug("vmid allocation %d\n", allocated_vmid);
 	qpd->vmid = allocated_vmid;
 	q->properties.vmid = allocated_vmid;
 
-	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
 	program_sh_mem_settings(dqm, qpd);
 
 	/* qpd->page_table_base is set earlier when register_process()
@@ -278,7 +288,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 				struct qcm_process_device *qpd,
 				struct queue *q)
 {
-	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
+	int idx;
 
 	/* On GFX v7, CP doesn't flush TC at dequeue */
 	if (q->device->device_info->asic_family == CHIP_HAWAII)
@@ -290,7 +300,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 	/* Release the vmid mapping */
 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
 
-	dqm->vmid_bitmap |= (1 << bit);
+	idx = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
+	dqm->vmid_pasid[idx] = 0;
+
 	qpd->vmid = 0;
 	q->properties.vmid = 0;
 }
@@ -1017,7 +1029,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
 				dqm->allocated_queues[pipe] |= 1 << queue;
 	}
 
-	dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
+	dqm->vmid_pasid = kcalloc(dqm->dev->vm_info.vmid_num_kfd,
+			sizeof(uint16_t), GFP_KERNEL);
 	dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
 	dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
 
@@ -1030,6 +1043,7 @@ static void uninitialize(struct device_queue_manager *dqm)
 
 	WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
 
+	kfree(dqm->vmid_pasid);
 	kfree(dqm->allocated_queues);
 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
 		kfree(dqm->mqd_mgrs[i]);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index eed8f950b663..67b5e5fadd95 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -188,7 +188,8 @@ struct device_queue_manager {
 	unsigned int		*allocated_queues;
 	uint64_t		sdma_bitmap;
 	uint64_t		xgmi_sdma_bitmap;
-	unsigned int		vmid_bitmap;
+	/* the pasid mapping for each kfd vmid */
+	uint16_t		*vmid_pasid;
 	uint64_t		pipelines_addr;
 	struct kfd_mem_obj	*pipeline_mem;
 	uint64_t		fence_gpu_addr;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 5/6] drm/amdkfd: Query vmid pasid mapping through stored info
       [not found] ` <20190926183826.18813-1-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2019-09-26 18:38   ` [PATCH 4/6] drm/amdkfd: Record vmid pasid mapping in the driver Zhao, Yong
@ 2019-09-26 18:38   ` Zhao, Yong
  2019-09-26 18:38   ` [PATCH 6/6] drm/amdkfd: Eliminate get_atc_vmid_pasid_mapping_valid Zhao, Yong
  2019-09-26 19:05   ` [PATCH 1/6] drm/amdkfd: Move the control stack on GFX10 to userspace buffer Kuehling, Felix
  5 siblings, 0 replies; 10+ messages in thread
From: Zhao, Yong @ 2019-09-26 18:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Zhao, Yong

Because we record the mapping in the software, we can query pasid
through vmid using the stored mapping instead of reading from ATC
registers.

This also prepares for the defeatured ATC block in future ASICs.

Change-Id: I781cb9d30dc0cc93379908ff1cf8da798bb26f13
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c    | 12 ++++++++++++
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h    |  3 +++
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c      |  3 +--
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d006adefef55..57d33e887f43 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -220,6 +220,18 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
 	WARN_ON(!old);
 }
 
+uint16_t get_pasid_from_vmid_nocpsch(struct device_queue_manager *dqm,
+					uint16_t vmid)
+{
+	int idx = vmid - dqm->dev->vm_info.first_vmid_kfd;
+
+	uint16_t pasid = dqm->dev->dqm->vmid_pasid[idx];
+	if (!pasid)
+		pr_err("pasid is not queried correctly\n");
+
+	return pasid;
+}
+
 static int allocate_vmid(struct device_queue_manager *dqm,
 			struct qcm_process_device *qpd,
 			struct queue *q)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 67b5e5fadd95..9e8f6cde397e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -244,6 +244,9 @@ void set_queue_snapshot_entry(struct device_queue_manager *dqm,
 			      int flags,
 			      struct kfd_queue_snapshot_entry *qss_entry);
 
+uint16_t get_pasid_from_vmid_nocpsch(struct device_queue_manager *dqm,
+					uint16_t vmid);
+
 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
 {
 	return (pdd->lds_base >> 16) & 0xFF;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index ab8a695c4a3c..adb5bbab7160 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -58,8 +58,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
 		memcpy(patched_ihre, ih_ring_entry,
 				dev->device_info->ih_ring_entry_size);
 
-		pasid = dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid(
-				dev->kgd, vmid);
+		pasid = get_pasid_from_vmid_nocpsch(dev->dqm, vmid);
 
 		/* Patch the pasid field */
 		patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3])
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 6/6] drm/amdkfd: Eliminate get_atc_vmid_pasid_mapping_valid
       [not found] ` <20190926183826.18813-1-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2019-09-26 18:38   ` [PATCH 5/6] drm/amdkfd: Query vmid pasid mapping through stored info Zhao, Yong
@ 2019-09-26 18:38   ` Zhao, Yong
       [not found]     ` <20190926183826.18813-6-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
  2019-09-26 19:05   ` [PATCH 1/6] drm/amdkfd: Move the control stack on GFX10 to userspace buffer Kuehling, Felix
  5 siblings, 1 reply; 10+ messages in thread
From: Zhao, Yong @ 2019-09-26 18:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Zhao, Yong

get_atc_vmid_pasid_mapping_valid() is very similar to
get_atc_vmid_pasid_mapping_pasid(), so they can be merged into a new
function get_atc_vmid_pasid_mapping_info() to reduce register access
times.

Change-Id: I255ebf2629012400b07fe6a69c3d075cfd46612e
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  6 +--
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 49 +++++++------------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 28 ++++-------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 32 ++++--------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 45 +++++++----------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  6 +--
 .../gpu/drm/amd/amdkfd/cik_event_interrupt.c  |  8 +--
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c       | 16 +++---
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  8 ++-
 9 files changed, 76 insertions(+), 122 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index eb6e8b232729..5e1bd6500fe2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -279,10 +279,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.address_watch_execute = kgd_gfx_v9_address_watch_execute,
 	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
 	.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
-	.get_atc_vmid_pasid_mapping_pasid =
-			kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
-	.get_atc_vmid_pasid_mapping_valid =
-			kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
+	.get_atc_vmid_pasid_mapping_info =
+			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = kgd_gfx_v9_get_tile_config,
 	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
 	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 09d50949c5b9..57ff698f51bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -100,10 +100,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 					unsigned int watch_point_id,
 					unsigned int reg_offset);
 
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-		uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-		uint8_t vmid);
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid);
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		uint64_t page_table_base);
 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
@@ -157,10 +155,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.address_watch_execute = kgd_address_watch_execute,
 	.wave_control_execute = kgd_wave_control_execute,
 	.address_watch_get_offset = kgd_address_watch_get_offset,
-	.get_atc_vmid_pasid_mapping_pasid =
-			get_atc_vmid_pasid_mapping_pasid,
-	.get_atc_vmid_pasid_mapping_valid =
-			get_atc_vmid_pasid_mapping_valid,
+	.get_atc_vmid_pasid_mapping_info =
+			get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = amdgpu_amdkfd_get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
 	.invalidate_tlbs = invalidate_tlbs,
@@ -772,26 +768,17 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 	return 0;
 }
 
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-							uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid)
 {
-	uint32_t reg;
+	uint32_t value;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
-	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
 		     + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-								uint8_t vmid)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 
-	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
-		     + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
 static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
@@ -823,6 +810,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	int vmid;
+	uint16_t queried_pasid;
+	bool ret;
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
 
 	if (amdgpu_emu_mode == 0 && ring->sched.ready)
@@ -831,13 +820,13 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 	for (vmid = 0; vmid < 16; vmid++) {
 		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
 			continue;
-		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
-			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
-				== pasid) {
-				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-						AMDGPU_GFXHUB_0, 0);
-				break;
-			}
+
+		ret = get_atc_vmid_pasid_mapping_info(kgd, vmid,
+				&queried_pasid);
+		if (ret	&& queried_pasid == pasid) {
+			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
+					AMDGPU_GFXHUB_0, 0);
+			break;
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index dd7548e9932b..ac811361246d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -136,9 +136,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 					unsigned int watch_point_id,
 					unsigned int reg_offset);
 
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-							uint8_t vmid);
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid);
 
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
@@ -189,8 +188,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.address_watch_execute = kgd_address_watch_execute,
 	.wave_control_execute = kgd_wave_control_execute,
 	.address_watch_get_offset = kgd_address_watch_get_offset,
-	.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
-	.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
+	.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
@@ -756,24 +754,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 	return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
 }
 
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-							uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid)
 {
-	uint32_t reg;
+	uint32_t value;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
-	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-								uint8_t vmid)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 
-	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
 static void set_scratch_backing_va(struct kgd_dev *kgd,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index f12ac78707b4..b7f0d594ec7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -99,10 +99,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 					unsigned int watch_point_id,
 					unsigned int reg_offset);
 
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-		uint8_t vmid);
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-		uint8_t vmid);
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid);
 static void set_scratch_backing_va(struct kgd_dev *kgd,
 					uint64_t va, uint32_t vmid);
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
@@ -151,10 +149,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.address_watch_execute = kgd_address_watch_execute,
 	.wave_control_execute = kgd_wave_control_execute,
 	.address_watch_get_offset = kgd_address_watch_get_offset,
-	.get_atc_vmid_pasid_mapping_pasid =
-			get_atc_vmid_pasid_mapping_pasid,
-	.get_atc_vmid_pasid_mapping_valid =
-			get_atc_vmid_pasid_mapping_valid,
+	.get_atc_vmid_pasid_mapping_info =
+			get_atc_vmid_pasid_mapping_info,
 	.set_scratch_backing_va = set_scratch_backing_va,
 	.get_tile_config = get_tile_config,
 	.set_vm_context_page_table_base = set_vm_context_page_table_base,
@@ -677,24 +673,16 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 	return 0;
 }
 
-static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-							uint8_t vmid)
+static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid)
 {
-	uint32_t reg;
+	uint32_t value;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
-	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-								uint8_t vmid)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 
-	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
 static int kgd_address_watch_disable(struct kgd_dev *kgd)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index f8f8d6fe8b52..4d2cbbdc439b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -630,26 +630,17 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
 	return 0;
 }
 
-bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-							uint8_t vmid)
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid)
 {
-	uint32_t reg;
+	uint32_t value;
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 
-	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
 		     + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-								uint8_t vmid)
-{
-	uint32_t reg;
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 
-	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
-		     + vmid);
-	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
 }
 
 static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
@@ -684,6 +675,8 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	int vmid, i;
+	uint16_t queried_pasid;
+	bool ret;
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
 	uint32_t flush_type = 0;
 
@@ -699,14 +692,14 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 	for (vmid = 0; vmid < 16; vmid++) {
 		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
 			continue;
-		if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
-			if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
-				== pasid) {
-				for (i = 0; i < adev->num_vmhubs; i++)
-					amdgpu_gmc_flush_gpu_tlb(adev, vmid,
-								i, flush_type);
-				break;
-			}
+
+		ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid,
+				&queried_pasid);
+		if (ret && queried_pasid == pasid) {
+			for (i = 0; i < adev->num_vmhubs; i++)
+				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
+							i, flush_type);
+			break;
 		}
 	}
 
@@ -1040,10 +1033,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.address_watch_execute = kgd_gfx_v9_address_watch_execute,
 	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
 	.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
-	.get_atc_vmid_pasid_mapping_pasid =
-			kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
-	.get_atc_vmid_pasid_mapping_valid =
-			kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
+	.get_atc_vmid_pasid_mapping_info =
+			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.get_tile_config = kgd_gfx_v9_get_tile_config,
 	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
 	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index c95c9fe3eb3d..fd4f504668cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -53,10 +53,8 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
 					unsigned int watch_point_id,
 					unsigned int reg_offset);
 
-bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-		uint8_t vmid);
-uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-		uint8_t vmid);
+bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
+					uint8_t vmid, uint16_t *p_pasid);
 void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		uint64_t page_table_base);
 int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 177d1e5329a5..9f59ba93cfe0 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -33,7 +33,9 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
 	const struct cik_ih_ring_entry *ihre =
 			(const struct cik_ih_ring_entry *)ih_ring_entry;
 	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
-	unsigned int vmid, pasid;
+	unsigned int vmid;
+	uint16_t pasid;
+	bool ret;
 
 	/* This workaround is due to HW/FW limitation on Hawaii that
 	 * VMID and PASID are not written into ih_ring_entry
@@ -48,13 +50,13 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
 		*tmp_ihre = *ihre;
 
 		vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
-		pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid);
+		ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid);
 
 		tmp_ihre->ring_id &= 0x000000ff;
 		tmp_ihre->ring_id |= vmid << 8;
 		tmp_ihre->ring_id |= pasid << 16;
 
-		return (pasid != 0) &&
+		return ret && (pasid != 0) &&
 			vmid >= dev->vm_info.first_vmid_kfd &&
 			vmid <= dev->vm_info.last_vmid_kfd;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 492951cad143..1eb0c2bedcd9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -775,6 +775,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 {
 	int status = 0;
 	unsigned int vmid;
+	uint16_t queried_pasid;
 	union SQ_CMD_BITS reg_sq_cmd;
 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
 	struct kfd_process_device *pdd;
@@ -796,14 +797,13 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 	 */
 
 	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
-		if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
-				(dev->kgd, vmid)) {
-			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
-					(dev->kgd, vmid) == p->pasid) {
-				pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
-						vmid, p->pasid);
-				break;
-			}
+		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
+				(dev->kgd, vmid, &queried_pasid);
+
+		if (status && queried_pasid == p->pasid) {
+			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
+					vmid, p->pasid);
+			break;
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 9f593c2ee4e0..32a1a53012dd 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -300,12 +300,10 @@ struct kfd2kgd_calls {
 	uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd,
 					unsigned int watch_point_id,
 					unsigned int reg_offset);
-	bool (*get_atc_vmid_pasid_mapping_valid)(
+	bool (*get_atc_vmid_pasid_mapping_info)(
 					struct kgd_dev *kgd,
-					uint8_t vmid);
-	uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
-					struct kgd_dev *kgd,
-					uint8_t vmid);
+					uint8_t vmid,
+					uint16_t *p_pasid);
 
 	/* No longer needed from GFXv9 onward. The scratch base address is
 	 * passed to the shader by the CP. It's the user mode driver's
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/6] drm/amdkfd: Record vmid pasid mapping in the driver
       [not found]     ` <20190926183826.18813-4-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
@ 2019-09-26 18:54       ` Kuehling, Felix
       [not found]         ` <cc578b05-a918-2c5f-d6ac-122f24d763ce-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 10+ messages in thread
From: Kuehling, Felix @ 2019-09-26 18:54 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Zhao, Yong

On 2019-09-26 2:38 p.m., Zhao, Yong wrote:
> This makes possible the vmid pasid mapping query through software.
>
> Change-Id: Ib539aae277a227cc39f6469ae23c46c4d289b87b
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> ---
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 34 +++++++++++++------
>   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 +-
>   2 files changed, 26 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index e7f0a32e0e44..d006adefef55 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -224,20 +224,30 @@ static int allocate_vmid(struct device_queue_manager *dqm,
>   			struct qcm_process_device *qpd,
>   			struct queue *q)
>   {
> -	int bit, allocated_vmid;
> +	int idx = -1, allocated_vmid, i;
>   
> -	if (dqm->vmid_bitmap == 0)
> +	for (i = 0; i < dqm->dev->vm_info.vmid_num_kfd; i++) {
> +		if (!dqm->vmid_pasid[i]) {
> +			idx = i;
> +			break;
> +		}
> +	}
> +
> +	if (idx < 0) {
> +		pr_err("no more vmid to allocate\n");
>   		return -ENOMEM;
> +	}
> +
> +	dqm->vmid_pasid[idx] = q->process->pasid;
>   
> -	bit = ffs(dqm->vmid_bitmap) - 1;
> -	dqm->vmid_bitmap &= ~(1 << bit);
> +	allocated_vmid = idx + dqm->dev->vm_info.first_vmid_kfd;
> +	pr_debug("vmid allocated: %d\n", allocated_vmid);
> +
> +	set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
>   
> -	allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
> -	pr_debug("vmid allocation %d\n", allocated_vmid);
>   	qpd->vmid = allocated_vmid;
>   	q->properties.vmid = allocated_vmid;
>   
> -	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
>   	program_sh_mem_settings(dqm, qpd);
>   
>   	/* qpd->page_table_base is set earlier when register_process()
> @@ -278,7 +288,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
>   				struct qcm_process_device *qpd,
>   				struct queue *q)
>   {
> -	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
> +	int idx;
>   
>   	/* On GFX v7, CP doesn't flush TC at dequeue */
>   	if (q->device->device_info->asic_family == CHIP_HAWAII)
> @@ -290,7 +300,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
>   	/* Release the vmid mapping */
>   	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>   
> -	dqm->vmid_bitmap |= (1 << bit);
> +	idx = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
> +	dqm->vmid_pasid[idx] = 0;
> +
>   	qpd->vmid = 0;
>   	q->properties.vmid = 0;
>   }
> @@ -1017,7 +1029,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
>   				dqm->allocated_queues[pipe] |= 1 << queue;
>   	}
>   
> -	dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
> +	dqm->vmid_pasid = kcalloc(dqm->dev->vm_info.vmid_num_kfd,
> +			sizeof(uint16_t), GFP_KERNEL);

If you allocate this dynamically, you need to check the return value. 
But see below ...


>   	dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
>   	dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
>   
> @@ -1030,6 +1043,7 @@ static void uninitialize(struct device_queue_manager *dqm)
>   
>   	WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
>   
> +	kfree(dqm->vmid_pasid);
>   	kfree(dqm->allocated_queues);
>   	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
>   		kfree(dqm->mqd_mgrs[i]);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index eed8f950b663..67b5e5fadd95 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -188,7 +188,8 @@ struct device_queue_manager {
>   	unsigned int		*allocated_queues;
>   	uint64_t		sdma_bitmap;
>   	uint64_t		xgmi_sdma_bitmap;
> -	unsigned int		vmid_bitmap;
> +	/* the pasid mapping for each kfd vmid */
> +	uint16_t		*vmid_pasid;

This could be a fixed-size array since the number of user mode VMIDs is 
limited to 15 by the HW. The size of the pointer alone is enough to 
store 4 PASIDs. Add overhead of kmalloc and you don't really save 
anything by allocating this dynamically. It only adds indirection, 
complexity (error handling) and the risk of memory leaks.

Regards,
   Felix


>   	uint64_t		pipelines_addr;
>   	struct kfd_mem_obj	*pipeline_mem;
>   	uint64_t		fence_gpu_addr;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/6] drm/amdkfd: Record vmid pasid mapping in the driver
       [not found]         ` <cc578b05-a918-2c5f-d6ac-122f24d763ce-5C7GfCeVMHo@public.gmane.org>
@ 2019-09-26 19:01           ` Zhao, Yong
  0 siblings, 0 replies; 10+ messages in thread
From: Zhao, Yong @ 2019-09-26 19:01 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

I agree.

Yong

On 2019-09-26 2:54 p.m., Kuehling, Felix wrote:
> On 2019-09-26 2:38 p.m., Zhao, Yong wrote:
>> This makes possible the vmid pasid mapping query through software.
>>
>> Change-Id: Ib539aae277a227cc39f6469ae23c46c4d289b87b
>> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
>> ---
>>    .../drm/amd/amdkfd/kfd_device_queue_manager.c | 34 +++++++++++++------
>>    .../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 +-
>>    2 files changed, 26 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index e7f0a32e0e44..d006adefef55 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -224,20 +224,30 @@ static int allocate_vmid(struct device_queue_manager *dqm,
>>    			struct qcm_process_device *qpd,
>>    			struct queue *q)
>>    {
>> -	int bit, allocated_vmid;
>> +	int idx = -1, allocated_vmid, i;
>>    
>> -	if (dqm->vmid_bitmap == 0)
>> +	for (i = 0; i < dqm->dev->vm_info.vmid_num_kfd; i++) {
>> +		if (!dqm->vmid_pasid[i]) {
>> +			idx = i;
>> +			break;
>> +		}
>> +	}
>> +
>> +	if (idx < 0) {
>> +		pr_err("no more vmid to allocate\n");
>>    		return -ENOMEM;
>> +	}
>> +
>> +	dqm->vmid_pasid[idx] = q->process->pasid;
>>    
>> -	bit = ffs(dqm->vmid_bitmap) - 1;
>> -	dqm->vmid_bitmap &= ~(1 << bit);
>> +	allocated_vmid = idx + dqm->dev->vm_info.first_vmid_kfd;
>> +	pr_debug("vmid allocated: %d\n", allocated_vmid);
>> +
>> +	set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
>>    
>> -	allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
>> -	pr_debug("vmid allocation %d\n", allocated_vmid);
>>    	qpd->vmid = allocated_vmid;
>>    	q->properties.vmid = allocated_vmid;
>>    
>> -	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
>>    	program_sh_mem_settings(dqm, qpd);
>>    
>>    	/* qpd->page_table_base is set earlier when register_process()
>> @@ -278,7 +288,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
>>    				struct qcm_process_device *qpd,
>>    				struct queue *q)
>>    {
>> -	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
>> +	int idx;
>>    
>>    	/* On GFX v7, CP doesn't flush TC at dequeue */
>>    	if (q->device->device_info->asic_family == CHIP_HAWAII)
>> @@ -290,7 +300,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
>>    	/* Release the vmid mapping */
>>    	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>>    
>> -	dqm->vmid_bitmap |= (1 << bit);
>> +	idx = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
>> +	dqm->vmid_pasid[idx] = 0;
>> +
>>    	qpd->vmid = 0;
>>    	q->properties.vmid = 0;
>>    }
>> @@ -1017,7 +1029,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
>>    				dqm->allocated_queues[pipe] |= 1 << queue;
>>    	}
>>    
>> -	dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
>> +	dqm->vmid_pasid = kcalloc(dqm->dev->vm_info.vmid_num_kfd,
>> +			sizeof(uint16_t), GFP_KERNEL);
> If you allocate this dynamically, you need to check the return value.
> But see below ...
>
>
>>    	dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
>>    	dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
>>    
>> @@ -1030,6 +1043,7 @@ static void uninitialize(struct device_queue_manager *dqm)
>>    
>>    	WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
>>    
>> +	kfree(dqm->vmid_pasid);
>>    	kfree(dqm->allocated_queues);
>>    	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
>>    		kfree(dqm->mqd_mgrs[i]);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> index eed8f950b663..67b5e5fadd95 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> @@ -188,7 +188,8 @@ struct device_queue_manager {
>>    	unsigned int		*allocated_queues;
>>    	uint64_t		sdma_bitmap;
>>    	uint64_t		xgmi_sdma_bitmap;
>> -	unsigned int		vmid_bitmap;
>> +	/* the pasid mapping for each kfd vmid */
>> +	uint16_t		*vmid_pasid;
> This could be a fixed-size array since the number of user mode VMIDs is
> limited to 15 by the HW. The size of the pointer alone is enough to
> store 4 PASIDs. Add overhead of kmalloc and you don't really save
> anything by allocating this dynamically. It only adds indirection,
> complexity (error handling) and the risk of memory leaks.
>
> Regards,
>     Felix
>
>
>>    	uint64_t		pipelines_addr;
>>    	struct kfd_mem_obj	*pipeline_mem;
>>    	uint64_t		fence_gpu_addr;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: Eliminate get_atc_vmid_pasid_mapping_valid
       [not found]     ` <20190926183826.18813-6-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
@ 2019-09-26 19:03       ` Kuehling, Felix
  0 siblings, 0 replies; 10+ messages in thread
From: Kuehling, Felix @ 2019-09-26 19:03 UTC (permalink / raw)
  To: Zhao, Yong, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2019-09-26 2:38 p.m., Zhao, Yong wrote:
> get_atc_vmid_pasid_mapping_valid() is very similar to
> get_atc_vmid_pasid_mapping_pasid(), so they can be merged into a new
> function get_atc_vmid_pasid_mapping_info() to reduce register access
> times.

Hmm, the most important part may actually not be the time saved, but 
getting the PASID and the valid bit atomically with a single read. That 
could fix some potential race conditions where the mapping changes 
between the two reads.

Add that to the patch description and the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


>
> Change-Id: I255ebf2629012400b07fe6a69c3d075cfd46612e
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> ---
>   .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  6 +--
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 49 +++++++------------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 28 ++++-------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 32 ++++--------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 45 +++++++----------
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  6 +--
>   .../gpu/drm/amd/amdkfd/cik_event_interrupt.c  |  8 +--
>   drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c       | 16 +++---
>   .../gpu/drm/amd/include/kgd_kfd_interface.h   |  8 ++-
>   9 files changed, 76 insertions(+), 122 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index eb6e8b232729..5e1bd6500fe2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -279,10 +279,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
>   	.address_watch_execute = kgd_gfx_v9_address_watch_execute,
>   	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
>   	.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
> -	.get_atc_vmid_pasid_mapping_pasid =
> -			kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
> -	.get_atc_vmid_pasid_mapping_valid =
> -			kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
> +	.get_atc_vmid_pasid_mapping_info =
> +			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
>   	.get_tile_config = kgd_gfx_v9_get_tile_config,
>   	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
>   	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 09d50949c5b9..57ff698f51bb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -100,10 +100,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
>   					unsigned int watch_point_id,
>   					unsigned int reg_offset);
>   
> -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
> -		uint8_t vmid);
> -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
> -		uint8_t vmid);
> +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
> +					uint8_t vmid, uint16_t *p_pasid);
>   static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
>   		uint64_t page_table_base);
>   static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
> @@ -157,10 +155,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
>   	.address_watch_execute = kgd_address_watch_execute,
>   	.wave_control_execute = kgd_wave_control_execute,
>   	.address_watch_get_offset = kgd_address_watch_get_offset,
> -	.get_atc_vmid_pasid_mapping_pasid =
> -			get_atc_vmid_pasid_mapping_pasid,
> -	.get_atc_vmid_pasid_mapping_valid =
> -			get_atc_vmid_pasid_mapping_valid,
> +	.get_atc_vmid_pasid_mapping_info =
> +			get_atc_vmid_pasid_mapping_info,
>   	.get_tile_config = amdgpu_amdkfd_get_tile_config,
>   	.set_vm_context_page_table_base = set_vm_context_page_table_base,
>   	.invalidate_tlbs = invalidate_tlbs,
> @@ -772,26 +768,17 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
>   	return 0;
>   }
>   
> -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
> -							uint8_t vmid)
> +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
> +					uint8_t vmid, uint16_t *p_pasid)
>   {
> -	uint32_t reg;
> +	uint32_t value;
>   	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>   
> -	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
>   		     + vmid);
> -	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
> -}
> -
> -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
> -								uint8_t vmid)
> -{
> -	uint32_t reg;
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>   
> -	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> -		     + vmid);
> -	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>   }
>   
>   static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
> @@ -823,6 +810,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>   	int vmid;
> +	uint16_t queried_pasid;
> +	bool ret;
>   	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>   
>   	if (amdgpu_emu_mode == 0 && ring->sched.ready)
> @@ -831,13 +820,13 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
>   	for (vmid = 0; vmid < 16; vmid++) {
>   		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
>   			continue;
> -		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
> -			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
> -				== pasid) {
> -				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> -						AMDGPU_GFXHUB_0, 0);
> -				break;
> -			}
> +
> +		ret = get_atc_vmid_pasid_mapping_info(kgd, vmid,
> +				&queried_pasid);
> +		if (ret	&& queried_pasid == pasid) {
> +			amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> +					AMDGPU_GFXHUB_0, 0);
> +			break;
>   		}
>   	}
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index dd7548e9932b..ac811361246d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -136,9 +136,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
>   					unsigned int watch_point_id,
>   					unsigned int reg_offset);
>   
> -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
> -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
> -							uint8_t vmid);
> +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
> +					uint8_t vmid, uint16_t *p_pasid);
>   
>   static void set_scratch_backing_va(struct kgd_dev *kgd,
>   					uint64_t va, uint32_t vmid);
> @@ -189,8 +188,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
>   	.address_watch_execute = kgd_address_watch_execute,
>   	.wave_control_execute = kgd_wave_control_execute,
>   	.address_watch_get_offset = kgd_address_watch_get_offset,
> -	.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
> -	.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
> +	.get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info,
>   	.set_scratch_backing_va = set_scratch_backing_va,
>   	.get_tile_config = get_tile_config,
>   	.set_vm_context_page_table_base = set_vm_context_page_table_base,
> @@ -756,24 +754,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
>   	return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
>   }
>   
> -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
> -							uint8_t vmid)
> +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
> +					uint8_t vmid, uint16_t *p_pasid)
>   {
> -	uint32_t reg;
> +	uint32_t value;
>   	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>   
> -	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> -	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
> -}
> -
> -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
> -								uint8_t vmid)
> -{
> -	uint32_t reg;
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> +	value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>   
> -	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> -	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>   }
>   
>   static void set_scratch_backing_va(struct kgd_dev *kgd,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index f12ac78707b4..b7f0d594ec7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -99,10 +99,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
>   					unsigned int watch_point_id,
>   					unsigned int reg_offset);
>   
> -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
> -		uint8_t vmid);
> -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
> -		uint8_t vmid);
> +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
> +					uint8_t vmid, uint16_t *p_pasid);
>   static void set_scratch_backing_va(struct kgd_dev *kgd,
>   					uint64_t va, uint32_t vmid);
>   static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
> @@ -151,10 +149,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
>   	.address_watch_execute = kgd_address_watch_execute,
>   	.wave_control_execute = kgd_wave_control_execute,
>   	.address_watch_get_offset = kgd_address_watch_get_offset,
> -	.get_atc_vmid_pasid_mapping_pasid =
> -			get_atc_vmid_pasid_mapping_pasid,
> -	.get_atc_vmid_pasid_mapping_valid =
> -			get_atc_vmid_pasid_mapping_valid,
> +	.get_atc_vmid_pasid_mapping_info =
> +			get_atc_vmid_pasid_mapping_info,
>   	.set_scratch_backing_va = set_scratch_backing_va,
>   	.get_tile_config = get_tile_config,
>   	.set_vm_context_page_table_base = set_vm_context_page_table_base,
> @@ -677,24 +673,16 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
>   	return 0;
>   }
>   
> -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
> -							uint8_t vmid)
> +static bool get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
> +					uint8_t vmid, uint16_t *p_pasid)
>   {
> -	uint32_t reg;
> +	uint32_t value;
>   	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>   
> -	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> -	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
> -}
> -
> -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
> -								uint8_t vmid)
> -{
> -	uint32_t reg;
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> +	value = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>   
> -	reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> -	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>   }
>   
>   static int kgd_address_watch_disable(struct kgd_dev *kgd)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index f8f8d6fe8b52..4d2cbbdc439b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -630,26 +630,17 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
>   	return 0;
>   }
>   
> -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
> -							uint8_t vmid)
> +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
> +					uint8_t vmid, uint16_t *p_pasid)
>   {
> -	uint32_t reg;
> +	uint32_t value;
>   	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>   
> -	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> +	value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
>   		     + vmid);
> -	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
> -}
> -
> -uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
> -								uint8_t vmid)
> -{
> -	uint32_t reg;
> -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
> +	*p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
>   
> -	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> -		     + vmid);
> -	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +	return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
>   }
>   
>   static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
> @@ -684,6 +675,8 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
>   	int vmid, i;
> +	uint16_t queried_pasid;
> +	bool ret;
>   	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>   	uint32_t flush_type = 0;
>   
> @@ -699,14 +692,14 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
>   	for (vmid = 0; vmid < 16; vmid++) {
>   		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
>   			continue;
> -		if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
> -			if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
> -				== pasid) {
> -				for (i = 0; i < adev->num_vmhubs; i++)
> -					amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> -								i, flush_type);
> -				break;
> -			}
> +
> +		ret = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(kgd, vmid,
> +				&queried_pasid);
> +		if (ret && queried_pasid == pasid) {
> +			for (i = 0; i < adev->num_vmhubs; i++)
> +				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> +							i, flush_type);
> +			break;
>   		}
>   	}
>   
> @@ -1040,10 +1033,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
>   	.address_watch_execute = kgd_gfx_v9_address_watch_execute,
>   	.wave_control_execute = kgd_gfx_v9_wave_control_execute,
>   	.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
> -	.get_atc_vmid_pasid_mapping_pasid =
> -			kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid,
> -	.get_atc_vmid_pasid_mapping_valid =
> -			kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid,
> +	.get_atc_vmid_pasid_mapping_info =
> +			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
>   	.get_tile_config = kgd_gfx_v9_get_tile_config,
>   	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
>   	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index c95c9fe3eb3d..fd4f504668cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -53,10 +53,8 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,
>   					unsigned int watch_point_id,
>   					unsigned int reg_offset);
>   
> -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
> -		uint8_t vmid);
> -uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
> -		uint8_t vmid);
> +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
> +					uint8_t vmid, uint16_t *p_pasid);
>   void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
>   		uint64_t page_table_base);
>   int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
> diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
> index 177d1e5329a5..9f59ba93cfe0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
> +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
> @@ -33,7 +33,9 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
>   	const struct cik_ih_ring_entry *ihre =
>   			(const struct cik_ih_ring_entry *)ih_ring_entry;
>   	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
> -	unsigned int vmid, pasid;
> +	unsigned int vmid;
> +	uint16_t pasid;
> +	bool ret;
>   
>   	/* This workaround is due to HW/FW limitation on Hawaii that
>   	 * VMID and PASID are not written into ih_ring_entry
> @@ -48,13 +50,13 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
>   		*tmp_ihre = *ihre;
>   
>   		vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
> -		pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid);
> +		ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid);
>   
>   		tmp_ihre->ring_id &= 0x000000ff;
>   		tmp_ihre->ring_id |= vmid << 8;
>   		tmp_ihre->ring_id |= pasid << 16;
>   
> -		return (pasid != 0) &&
> +		return ret && (pasid != 0) &&
>   			vmid >= dev->vm_info.first_vmid_kfd &&
>   			vmid <= dev->vm_info.last_vmid_kfd;
>   	}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
> index 492951cad143..1eb0c2bedcd9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
> @@ -775,6 +775,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
>   {
>   	int status = 0;
>   	unsigned int vmid;
> +	uint16_t queried_pasid;
>   	union SQ_CMD_BITS reg_sq_cmd;
>   	union GRBM_GFX_INDEX_BITS reg_gfx_index;
>   	struct kfd_process_device *pdd;
> @@ -796,14 +797,13 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
>   	 */
>   
>   	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
> -		if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
> -				(dev->kgd, vmid)) {
> -			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
> -					(dev->kgd, vmid) == p->pasid) {
> -				pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
> -						vmid, p->pasid);
> -				break;
> -			}
> +		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
> +				(dev->kgd, vmid, &queried_pasid);
> +
> +		if (status && queried_pasid == p->pasid) {
> +			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
> +					vmid, p->pasid);
> +			break;
>   		}
>   	}
>   
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index 9f593c2ee4e0..32a1a53012dd 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -300,12 +300,10 @@ struct kfd2kgd_calls {
>   	uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd,
>   					unsigned int watch_point_id,
>   					unsigned int reg_offset);
> -	bool (*get_atc_vmid_pasid_mapping_valid)(
> +	bool (*get_atc_vmid_pasid_mapping_info)(
>   					struct kgd_dev *kgd,
> -					uint8_t vmid);
> -	uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
> -					struct kgd_dev *kgd,
> -					uint8_t vmid);
> +					uint8_t vmid,
> +					uint16_t *p_pasid);
>   
>   	/* No longer needed from GFXv9 onward. The scratch base address is
>   	 * passed to the shader by the CP. It's the user mode driver's
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/6] drm/amdkfd: Move the control stack on GFX10 to userspace buffer
       [not found] ` <20190926183826.18813-1-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2019-09-26 18:38   ` [PATCH 6/6] drm/amdkfd: Eliminate get_atc_vmid_pasid_mapping_valid Zhao, Yong
@ 2019-09-26 19:05   ` Kuehling, Felix
  5 siblings, 0 replies; 10+ messages in thread
From: Kuehling, Felix @ 2019-09-26 19:05 UTC (permalink / raw)
  To: Zhao, Yong, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Patches 1-3 and patch 5 are

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>

See separate emails for patches 4 and 6.

On 2019-09-26 2:38 p.m., Zhao, Yong wrote:
> The GFX10 does not require the control stack to be right after mqd
> buffer any more, so move it back to usersapce allocated CSWR buffer.
>
> Change-Id: I446c9685549a09ac8846a42ee22d86cfb93fd98c
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
> ---
>   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 37 ++-----------------
>   1 file changed, 4 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
> index 29d50d6af9d7..e2fb76247f47 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
> @@ -69,35 +69,13 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
>   static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
>   		struct queue_properties *q)
>   {
> -	int retval;
> -	struct kfd_mem_obj *mqd_mem_obj = NULL;
> +	struct kfd_mem_obj *mqd_mem_obj;
>   
> -	/* From V9,  for CWSR, the control stack is located on the next page
> -	 * boundary after the mqd, we will use the gtt allocation function
> -	 * instead of sub-allocation function.
> -	 */
> -	if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
> -		mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
> -		if (!mqd_mem_obj)
> -			return NULL;
> -		retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
> -			ALIGN(q->ctl_stack_size, PAGE_SIZE) +
> -				ALIGN(sizeof(struct v10_compute_mqd), PAGE_SIZE),
> -			&(mqd_mem_obj->gtt_mem),
> -			&(mqd_mem_obj->gpu_addr),
> -			(void *)&(mqd_mem_obj->cpu_ptr), true);
> -	} else {
> -		retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
> -				&mqd_mem_obj);
> -	}
> -
> -	if (retval) {
> -		kfree(mqd_mem_obj);
> +	if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
> +			&mqd_mem_obj))
>   		return NULL;
> -	}
>   
>   	return mqd_mem_obj;
> -
>   }
>   
>   static void init_mqd(struct mqd_manager *mm, void **mqd,
> @@ -250,14 +228,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
>   static void free_mqd(struct mqd_manager *mm, void *mqd,
>   			struct kfd_mem_obj *mqd_mem_obj)
>   {
> -	struct kfd_dev *kfd = mm->dev;
> -
> -	if (mqd_mem_obj->gtt_mem) {
> -		amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
> -		kfree(mqd_mem_obj);
> -	} else {
> -		kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
> -	}
> +	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
>   }
>   
>   static bool is_occupied(struct mqd_manager *mm, void *mqd,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-09-26 19:05 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-26 18:38 [PATCH 1/6] drm/amdkfd: Move the control stack on GFX10 to userspace buffer Zhao, Yong
     [not found] ` <20190926183826.18813-1-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
2019-09-26 18:38   ` [PATCH 2/6] drm/amdkfd: Delete unused defines Zhao, Yong
2019-09-26 18:38   ` [PATCH 3/6] drm/amdkfd: Use hex print format for pasid Zhao, Yong
2019-09-26 18:38   ` [PATCH 4/6] drm/amdkfd: Record vmid pasid mapping in the driver Zhao, Yong
     [not found]     ` <20190926183826.18813-4-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
2019-09-26 18:54       ` Kuehling, Felix
     [not found]         ` <cc578b05-a918-2c5f-d6ac-122f24d763ce-5C7GfCeVMHo@public.gmane.org>
2019-09-26 19:01           ` Zhao, Yong
2019-09-26 18:38   ` [PATCH 5/6] drm/amdkfd: Query vmid pasid mapping through stored info Zhao, Yong
2019-09-26 18:38   ` [PATCH 6/6] drm/amdkfd: Eliminate get_atc_vmid_pasid_mapping_valid Zhao, Yong
     [not found]     ` <20190926183826.18813-6-Yong.Zhao-5C7GfCeVMHo@public.gmane.org>
2019-09-26 19:03       ` Kuehling, Felix
2019-09-26 19:05   ` [PATCH 1/6] drm/amdkfd: Move the control stack on GFX10 to userspace buffer Kuehling, Felix

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.