All of lore.kernel.org
 help / color / mirror / Atom feed
From: Philip Yang <Philip.Yang@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: Philip Yang <Philip.Yang@amd.com>
Subject: [PATCH 3/4] drm/amdkfd: add sysfs counters for vm fault and migration
Date: Tue, 22 Jun 2021 09:32:12 -0400	[thread overview]
Message-ID: <20210622133213.21393-3-Philip.Yang@amd.com> (raw)
In-Reply-To: <20210622133213.21393-1-Philip.Yang@amd.com>

This is part of SVM profiling API, export sysfs counters for
per-process, per-GPU vm retry fault, pages migrated in and out of GPU vram.

counters will not be updated in parallel in GPU retry fault handler and
migration to vram/ram path, use READ_ONCE to avoid compiler
optimization.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |   9 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 151 ++++++++++++++++++-----
 2 files changed, 131 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6dc22fa1e555..3426743ed228 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -730,6 +730,15 @@ struct kfd_process_device {
 	 *  number of CU's a device has along with number of other competing processes
 	 */
 	struct attribute attr_cu_occupancy;
+
+	/* sysfs counters for GPU retry fault and page migration tracking */
+	struct kobject *kobj_counters;
+	struct attribute attr_faults;
+	struct attribute attr_page_in;
+	struct attribute attr_page_out;
+	uint64_t faults;
+	uint64_t page_in;
+	uint64_t page_out;
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index cfc36fceac8a..21ec8a18cad2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -416,6 +416,29 @@ static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
 	return 0;
 }
 
+static ssize_t kfd_sysfs_counters_show(struct kobject *kobj,
+				       struct attribute *attr, char *buf)
+{
+	struct kfd_process_device *pdd;
+
+	if (!strcmp(attr->name, "faults")) {
+		pdd = container_of(attr, struct kfd_process_device,
+				   attr_faults);
+		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->faults));
+	}
+	if (!strcmp(attr->name, "page_in")) {
+		pdd = container_of(attr, struct kfd_process_device,
+				   attr_page_in);
+		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_in));
+	}
+	if (!strcmp(attr->name, "page_out")) {
+		pdd = container_of(attr, struct kfd_process_device,
+				   attr_page_out);
+		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_out));
+	}
+	return 0;
+}
+
 static struct attribute attr_queue_size = {
 	.name = "size",
 	.mode = KFD_SYSFS_FILE_MODE
@@ -456,6 +479,15 @@ static struct kobj_type procfs_stats_type = {
 	.release = kfd_procfs_kobj_release,
 };
 
+static const struct sysfs_ops sysfs_counters_ops = {
+	.show = kfd_sysfs_counters_show,
+};
+
+static struct kobj_type sysfs_counters_type = {
+	.sysfs_ops = &sysfs_counters_ops,
+	.release = kfd_procfs_kobj_release,
+};
+
 int kfd_procfs_add_queue(struct queue *q)
 {
 	struct kfd_process *proc;
@@ -544,6 +576,50 @@ static void kfd_procfs_add_sysfs_stats(struct kfd_process *p)
 	}
 }
 
+static void kfd_procfs_add_sysfs_counters(struct kfd_process *p)
+{
+	int ret = 0;
+	int i;
+	char counters_dir_filename[MAX_SYSFS_FILENAME_LEN];
+
+	if (!p || !p->kobj)
+		return;
+
+	/*
+	 * Create sysfs files for each GPU which supports SVM
+	 * - proc/<pid>/counters_<gpuid>/
+	 * - proc/<pid>/counters_<gpuid>/faults
+	 * - proc/<pid>/counters_<gpuid>/page_in
+	 * - proc/<pid>/counters_<gpuid>/page_out
+	 */
+	for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
+		struct kfd_process_device *pdd = p->pdds[i];
+		struct kobject *kobj_counters;
+
+		snprintf(counters_dir_filename, MAX_SYSFS_FILENAME_LEN,
+			"counters_%u", pdd->dev->id);
+		kobj_counters = kfd_alloc_struct(kobj_counters);
+		if (!kobj_counters)
+			return;
+
+		ret = kobject_init_and_add(kobj_counters, &sysfs_counters_type,
+					   p->kobj, counters_dir_filename);
+		if (ret) {
+			pr_warn("Creating KFD proc/%s folder failed",
+				counters_dir_filename);
+			kobject_put(kobj_counters);
+			return;
+		}
+
+		pdd->kobj_counters = kobj_counters;
+		kfd_sysfs_create_file(kobj_counters, &pdd->attr_faults,
+				      "faults");
+		kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_in,
+				      "page_in");
+		kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_out,
+				      "page_out");
+	}
+}
 
 static void kfd_procfs_add_sysfs_files(struct kfd_process *p)
 {
@@ -777,6 +853,7 @@ struct kfd_process *kfd_create_process(struct file *filep)
 
 		kfd_procfs_add_sysfs_stats(process);
 		kfd_procfs_add_sysfs_files(process);
+		kfd_procfs_add_sysfs_counters(process);
 	}
 out:
 	if (!IS_ERR(process))
@@ -919,44 +996,60 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 	p->n_pdds = 0;
 }
 
-/* No process locking is needed in this function, because the process
- * is not findable any more. We must assume that no other thread is
- * using it any more, otherwise we couldn't safely free the process
- * structure in the end.
- */
-static void kfd_process_wq_release(struct work_struct *work)
+static void kfd_process_remove_sysfs(struct kfd_process *p)
 {
-	struct kfd_process *p = container_of(work, struct kfd_process,
-					     release_work);
+	struct kfd_process_device *pdd;
 	int i;
 
-	/* Remove the procfs files */
-	if (p->kobj) {
-		sysfs_remove_file(p->kobj, &p->attr_pasid);
-		kobject_del(p->kobj_queues);
-		kobject_put(p->kobj_queues);
-		p->kobj_queues = NULL;
+	if (!p->kobj)
+		return;
 
-		for (i = 0; i < p->n_pdds; i++) {
-			struct kfd_process_device *pdd = p->pdds[i];
+	sysfs_remove_file(p->kobj, &p->attr_pasid);
+	kobject_del(p->kobj_queues);
+	kobject_put(p->kobj_queues);
+	p->kobj_queues = NULL;
 
-			sysfs_remove_file(p->kobj, &pdd->attr_vram);
-			sysfs_remove_file(p->kobj, &pdd->attr_sdma);
+	for (i = 0; i < p->n_pdds; i++) {
+		pdd = p->pdds[i];
 
-			sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
-			if (pdd->dev->kfd2kgd->get_cu_occupancy)
-				sysfs_remove_file(pdd->kobj_stats,
-						  &pdd->attr_cu_occupancy);
-			kobject_del(pdd->kobj_stats);
-			kobject_put(pdd->kobj_stats);
-			pdd->kobj_stats = NULL;
-		}
+		sysfs_remove_file(p->kobj, &pdd->attr_vram);
+		sysfs_remove_file(p->kobj, &pdd->attr_sdma);
 
-		kobject_del(p->kobj);
-		kobject_put(p->kobj);
-		p->kobj = NULL;
+		sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
+		if (pdd->dev->kfd2kgd->get_cu_occupancy)
+			sysfs_remove_file(pdd->kobj_stats,
+					  &pdd->attr_cu_occupancy);
+		kobject_del(pdd->kobj_stats);
+		kobject_put(pdd->kobj_stats);
+		pdd->kobj_stats = NULL;
+	}
+
+	for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
+		pdd = p->pdds[i];
+
+		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults);
+		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in);
+		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_out);
+		kobject_del(pdd->kobj_counters);
+		kobject_put(pdd->kobj_counters);
+		pdd->kobj_counters = NULL;
 	}
 
+	kobject_del(p->kobj);
+	kobject_put(p->kobj);
+	p->kobj = NULL;
+}
+
+/* No process locking is needed in this function, because the process
+ * is not findable any more. We must assume that no other thread is
+ * using it any more, otherwise we couldn't safely free the process
+ * structure in the end.
+ */
+static void kfd_process_wq_release(struct work_struct *work)
+{
+	struct kfd_process *p = container_of(work, struct kfd_process,
+					     release_work);
+	kfd_process_remove_sysfs(p);
 	kfd_iommu_unbind_process(p);
 
 	kfd_process_free_outstanding_kfd_bos(p);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2021-06-22 13:32 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-22 13:32 [PATCH 1/4] drm/amdkfd: add helper function for kfd sysfs create Philip Yang
2021-06-22 13:32 ` [PATCH 2/4] drm/amdkfd: fix sysfs kobj leak Philip Yang
2021-06-22 13:32 ` Philip Yang [this message]
2021-06-22 13:32 ` [PATCH 4/4] drm/amdkfd: implement counters for vm fault and migration Philip Yang
2021-06-22 23:31   ` Felix Kuehling
2021-06-23 14:54     ` philip yang
2021-06-23 15:02 ` [PATCH v2 " Philip Yang
2021-06-23 18:52   ` Felix Kuehling

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210622133213.21393-3-Philip.Yang@amd.com \
    --to=philip.yang@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.