All of lore.kernel.org
 help / color / mirror / Atom feed
From: Philip Yang <Philip.Yang@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: Philip Yang <Philip.Yang@amd.com>, felix.kuehling@amd.com
Subject: [PATCH 3/5] drm/amdkfd: add page fault SMI event
Date: Fri, 14 Jan 2022 15:38:37 -0500	[thread overview]
Message-ID: <20220114203839.21707-4-Philip.Yang@amd.com> (raw)
In-Reply-To: <20220114203839.21707-1-Philip.Yang@amd.com>

After GPU page fault is recovered, output timestamp when fault is
received, duration to recover the fault, if migration or only
GPU page table is updated, fault address, read or write fault.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 41 +++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  3 ++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c        | 12 ++++--
 3 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index 5818ea8ad4ce..6ed3d85348d6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -265,6 +265,47 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
 	add_event_to_kfifo(task_info.pid, dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
 }
 
+static bool kfd_smi_event_pid_duration(struct kfd_dev *dev, uint16_t pasid,
+				       pid_t *pid, uint64_t ts,
+				       uint64_t *duration)
+{
+	struct amdgpu_task_info task_info = {0};
+
+	if (list_empty(&dev->smi_clients))
+		return false;
+
+	amdgpu_vm_get_task_info(dev->adev, pasid, &task_info);
+	if (!task_info.pid) {
+		pr_debug("task is gone\n");
+		return false;
+	}
+	if (pid)
+		*pid = task_info.pid;
+	if (duration)
+		*duration = ktime_get_ns() - ts;
+	return true;
+}
+
+void kfd_smi_event_page_fault(struct kfd_dev *dev, uint16_t pasid,
+			      unsigned long address, bool migration,
+			      bool write_fault, uint64_t ts)
+{
+	char fifo_in[128];
+	uint64_t duration;
+	pid_t pid;
+	int len;
+
+	if (!kfd_smi_event_pid_duration(dev, pasid, &pid, ts, &duration))
+		return;
+
+	len = snprintf(fifo_in, sizeof(fifo_in), "%d ts=%lld duration=%lld"
+		       " pid=%d pfn=0x%lx write=%d migration=%d\n",
+		       KFD_SMI_EVENT_PAGE_FAULT, ts, duration, pid, address,
+		       write_fault, migration);
+
+	add_event_to_kfifo(pid, dev, KFD_SMI_EVENT_PAGE_FAULT, fifo_in, len);
+}
+
 int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
 {
 	struct kfd_smi_client *client;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
index bffd0c32b060..fa3a8fdad69f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -28,5 +28,8 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
 					     uint64_t throttle_bitmask);
 void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
+void kfd_smi_event_page_fault(struct kfd_dev *dev, uint16_t pasid,
+			      unsigned long address, bool migration,
+			      bool write_fault, uint64_t ts);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 37b3191615b6..b81667162dc1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -32,6 +32,7 @@
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 #include "kfd_migrate.h"
+#include "kfd_smi_events.h"
 
 #ifdef dev_fmt
 #undef dev_fmt
@@ -2657,11 +2658,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	struct svm_range_list *svms;
 	struct svm_range *prange;
 	struct kfd_process *p;
-	uint64_t timestamp;
+	uint64_t timestamp = ktime_get_ns();
 	int32_t best_loc;
 	int32_t gpuidx = MAX_GPU_INSTANCE;
 	bool write_locked = false;
 	struct vm_area_struct *vma;
+	bool migration = false;
 	int r = 0;
 
 	if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
@@ -2737,9 +2739,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 		goto out_unlock_range;
 	}
 
-	timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
 	/* skip duplicate vm fault on different pages of same range */
-	if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
+	if ((ktime_to_us(timestamp) -  prange->validate_timestamp) <
+	    AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
 		pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
 			 svms, prange->start, prange->last);
 		r = 0;
@@ -2776,6 +2778,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 		 prange->actual_loc);
 
 	if (prange->actual_loc != best_loc) {
+		migration = true;
 		if (best_loc) {
 			r = svm_migrate_to_vram(prange, best_loc, mm);
 			if (r) {
@@ -2804,6 +2807,9 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
 			 r, svms, prange->start, prange->last);
 
+	kfd_smi_event_page_fault(adev->kfd.dev, p->pasid, addr, migration,
+				 write_fault, timestamp);
+
 out_unlock_range:
 	mutex_unlock(&prange->migrate_mutex);
 out_unlock_svms:
-- 
2.17.1


  parent reply	other threads:[~2022-01-14 20:39 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-14 20:38 [PATCH 0/5] HMM profiler interface Philip Yang
2022-01-14 20:38 ` [PATCH 1/5] drm/amdkfd: correct SMI event read size Philip Yang
2022-01-14 20:38 ` [PATCH 2/5] drm/amdkfd: enable per process SMI event Philip Yang
2022-01-14 22:07   ` Felix Kuehling
2022-01-14 20:38 ` Philip Yang [this message]
2022-01-14 22:37   ` [PATCH 3/5] drm/amdkfd: add page fault " Felix Kuehling
2022-01-17 16:16     ` philip yang
2022-01-14 20:38 ` [PATCH 4/5] drm/amdkfd: add migration " Philip Yang
2022-01-14 20:38 ` [PATCH 5/5] drm/amdkfd: add user queue eviction restore " Philip Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220114203839.21707-4-Philip.Yang@amd.com \
    --to=philip.yang@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=felix.kuehling@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.