dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [RFC 1/3] proc: Show GPU runtimes
@ 2021-02-04 12:11 Chris Wilson
  2021-02-04 12:11 ` [RFC 2/3] drm/i915: Look up clients by pid Chris Wilson
  2021-02-04 12:11 ` [RFC 3/3] drm/i915/gt: Export device and per-process runtimes via procfs Chris Wilson
  0 siblings, 2 replies; 7+ messages in thread
From: Chris Wilson @ 2021-02-04 12:11 UTC (permalink / raw)
  To: dri-devel; +Cc: intel-gfx, Chris Wilson

Present an interface for system monitors to watch the GPU usage as a
whole and by individual applications. By consolidating the information
into a canonical location, we have a single interface that can track the
utilisation of all GPU devices and sub-devices. This is preferrable to
asking the system monitors to walk the sysfs, or other interfaces, of
each device and parse the custom information presented by each driver.

Opens:
- Should we try to name each channel so that it can be shown in UI?

In gnome-system-monitor, we would have a task list:
	Process ... GPU0% GPU1%
and charts that would show the GPU% on/next the CPU overview.

Then we could have a futher expansion of a GPU% into per-channel
utilisation. That would be useful to check to see what is saturating a
particular channel, e.g. find the video decoder bottleneck.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 fs/proc/Makefile         |  1 +
 fs/proc/base.c           |  2 +
 fs/proc/gpu.c            | 83 ++++++++++++++++++++++++++++++++++++++++
 fs/proc/internal.h       |  6 +++
 include/linux/proc_gpu.h | 33 ++++++++++++++++
 5 files changed, 125 insertions(+)
 create mode 100644 fs/proc/gpu.c
 create mode 100644 include/linux/proc_gpu.h

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index bd08616ed8ba..bdc42b592e3e 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -16,6 +16,7 @@ proc-y	+= cmdline.o
 proc-y	+= consoles.o
 proc-y	+= cpuinfo.o
 proc-y	+= devices.o
+proc-y	+= gpu.o
 proc-y	+= interrupts.o
 proc-y	+= loadavg.o
 proc-y	+= meminfo.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b3422cda2a91..062298f5f6c8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3266,6 +3266,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_SECCOMP_CACHE_DEBUG
 	ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
 #endif
+	ONE("gpu", S_IRUGO, proc_pid_gpu),
 };
 
 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3598,6 +3599,7 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_SECCOMP_CACHE_DEBUG
 	ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
 #endif
+	ONE("gpu", S_IRUGO, proc_pid_gpu),
 };
 
 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/gpu.c b/fs/proc/gpu.c
new file mode 100644
index 000000000000..7264bf1f2f7b
--- /dev/null
+++ b/fs/proc/gpu.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/proc_gpu.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#include "internal.h"
+
+static LIST_HEAD(gpu);
+static DEFINE_SPINLOCK(lock);
+
+void proc_gpu_register(struct proc_gpu *pg)
+{
+	spin_lock(&lock);
+	list_add_tail(&pg->link, &gpu);
+	spin_unlock(&lock);
+}
+EXPORT_SYMBOL_GPL(proc_gpu_register);
+
+void proc_gpu_unregister(struct proc_gpu *pg)
+{
+	spin_lock(&lock);
+	list_del(&pg->link);
+	spin_unlock(&lock);
+}
+EXPORT_SYMBOL_GPL(proc_gpu_unregister);
+
+static void print_runtime(struct seq_file *m, const struct proc_gpu_runtime *rt)
+{
+	int i;
+
+	seq_printf(m, "%llu", rt->device);
+
+	for (i = 0; i < rt->nchannel; i++)
+		seq_printf(m, " %llu", rt->channel[i]);
+
+	seq_printf(m, " %s\n", rt->name);
+}
+
+int proc_pid_gpu(struct seq_file *m, struct pid_namespace *ns,
+		 struct pid *pid, struct task_struct *task)
+{
+	struct proc_gpu *p, *pn, mark = {};
+	struct proc_gpu_runtime rt;
+
+	spin_lock(&lock);
+	list_for_each_entry_safe(p, pn, &gpu, link) {
+		if (!p->fn)
+			continue;
+
+		rt.name[0] = '\0';
+		p->fn(p, pid, &rt);
+		if (!rt.name[0])
+			continue;
+
+		list_add(&mark.link, &p->link);
+		spin_unlock(&lock);
+
+		print_runtime(m, &rt);
+
+		spin_lock(&lock);
+		list_safe_reset_next(&mark, pn, link);
+		list_del(&mark.link);
+	}
+	spin_unlock(&lock);
+
+	return 0;
+}
+
+static int proc_gpu_show(struct seq_file *m, void *v)
+{
+	return proc_pid_gpu(m, NULL, NULL, NULL);
+}
+
+static int __init proc_gpu_init(void)
+{
+	proc_create_single("gpu", 0, NULL, proc_gpu_show);
+	return 0;
+}
+fs_initcall(proc_gpu_init);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f60b379dcdc7..08bf45bec975 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -221,6 +221,12 @@ void set_proc_pid_nlink(void);
 extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
 extern void proc_entry_rundown(struct proc_dir_entry *);
 
+/*
+ * proc_gpu.c
+ */
+int proc_pid_gpu(struct seq_file *m, struct pid_namespace *ns,
+		 struct pid *pid, struct task_struct *task);
+
 /*
  * proc_namespaces.c
  */
diff --git a/include/linux/proc_gpu.h b/include/linux/proc_gpu.h
new file mode 100644
index 000000000000..05c1db951c80
--- /dev/null
+++ b/include/linux/proc_gpu.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Interface for showing per-gpu/per-process runtimes in /proc.
+ */
+#ifndef _LINUX_PROC_GPU_H
+#define _LINUX_PROC_GPU_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+struct pid;
+struct proc_gpu;
+
+struct proc_gpu_runtime {
+	char name[60];
+	int nchannel;
+	u64 device;
+	u64 channel[64];
+};
+
+typedef void (*proc_gpu_fn_t)(struct proc_gpu *arg,
+			      struct pid *pid,
+			      struct proc_gpu_runtime *rt);
+
+struct proc_gpu {
+	struct list_head link;
+	proc_gpu_fn_t fn;
+};
+
+void proc_gpu_register(struct proc_gpu *pg);
+void proc_gpu_unregister(struct proc_gpu *pg);
+
+#endif /* _LINUX_PROC_GPU_H */
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [RFC 2/3] drm/i915: Look up clients by pid
  2021-02-04 12:11 [RFC 1/3] proc: Show GPU runtimes Chris Wilson
@ 2021-02-04 12:11 ` Chris Wilson
  2021-02-04 12:11 ` [RFC 3/3] drm/i915/gt: Export device and per-process runtimes via procfs Chris Wilson
  1 sibling, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2021-02-04 12:11 UTC (permalink / raw)
  To: dri-devel; +Cc: intel-gfx, Chris Wilson

Use the pid to find associated clients, and report their runtime. This
will be used to provide the information via procfs.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drm_client.c | 70 +++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_drm_client.h | 12 +++--
 2 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 1f8b08a413d4..52d9ae97ba25 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -26,6 +26,9 @@ void i915_drm_clients_init(struct i915_drm_clients *clients,
 
 	clients->next_id = 0;
 	xa_init_flags(&clients->xarray, XA_FLAGS_ALLOC);
+
+	hash_init(clients->pids);
+	spin_lock_init(&clients->pid_lock);
 }
 
 static ssize_t
@@ -95,6 +98,50 @@ show_busy(struct device *kdev, struct device_attribute *attr, char *buf)
 	return sysfs_emit(buf, "%llu\n", total);
 }
 
+u64 i915_drm_clients_get_runtime(struct i915_drm_clients *clients,
+				 struct pid *pid,
+				 u64 *rt)
+{
+	struct i915_drm_client_name *name;
+	u64 total = 0;
+	u64 t;
+
+	memset64(rt, 0, MAX_ENGINE_CLASS + 1);
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(clients->pids, name, node, pid_nr(pid)) {
+		struct i915_drm_client *client = name->client;
+		struct list_head *list = &client->ctx_list;
+		struct i915_gem_context *ctx;
+		int i;
+
+		if (name->pid != pid)
+			continue;
+
+		for (i = 0; i < ARRAY_SIZE(client->past_runtime); i++) {
+			t = atomic64_read(&client->past_runtime[i]);
+			rt[i] += t;
+			total += t;
+		}
+
+		list_for_each_entry_rcu(ctx, list, client_link) {
+			struct i915_gem_engines_iter it;
+			struct intel_context *ce;
+
+			for_each_gem_engine(ce,
+					    rcu_dereference(ctx->engines),
+					    it) {
+				t = intel_context_get_total_runtime_ns(ce);
+				rt[ce->engine->class] += t;
+				total += t;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	return total;
+}
+
 static const char * const uabi_class_names[] = {
 	[I915_ENGINE_CLASS_RENDER] = "0",
 	[I915_ENGINE_CLASS_COPY] = "1",
@@ -242,7 +289,10 @@ __i915_drm_client_register(struct i915_drm_client *client,
 	if (!name)
 		return -ENOMEM;
 
+	spin_lock(&clients->pid_lock);
+	hash_add_rcu(clients->pids, &name->node, pid_nr(name->pid));
 	RCU_INIT_POINTER(client->name, name);
+	spin_unlock(&clients->pid_lock);
 
 	if (!clients->root)
 		return 0; /* intel_fbdev_init registers a client before sysfs */
@@ -254,20 +304,25 @@ __i915_drm_client_register(struct i915_drm_client *client,
 	return 0;
 
 err_sysfs:
+	spin_lock(&clients->pid_lock);
 	RCU_INIT_POINTER(client->name, NULL);
+	hash_del_rcu(&name->node);
+	spin_unlock(&clients->pid_lock);
 	call_rcu(&name->rcu, free_name);
 	return ret;
 }
 
 static void __i915_drm_client_unregister(struct i915_drm_client *client)
 {
+	struct i915_drm_clients *clients = client->clients;
 	struct i915_drm_client_name *name;
 
 	__client_unregister_sysfs(client);
 
-	mutex_lock(&client->update_lock);
+	spin_lock(&clients->pid_lock);
 	name = rcu_replace_pointer(client->name, NULL, true);
-	mutex_unlock(&client->update_lock);
+	hash_del_rcu(&name->node);
+	spin_unlock(&clients->pid_lock);
 
 	call_rcu(&name->rcu, free_name);
 }
@@ -294,7 +349,6 @@ i915_drm_client_add(struct i915_drm_clients *clients, struct task_struct *task)
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&client->kref);
-	mutex_init(&client->update_lock);
 	spin_lock_init(&client->ctx_lock);
 	INIT_LIST_HEAD(&client->ctx_list);
 
@@ -339,16 +393,20 @@ int
 i915_drm_client_update(struct i915_drm_client *client,
 		       struct task_struct *task)
 {
+	struct i915_drm_clients *clients = client->clients;
 	struct i915_drm_client_name *name;
 
 	name = get_name(client, task);
 	if (!name)
 		return -ENOMEM;
 
-	mutex_lock(&client->update_lock);
-	if (name->pid != rcu_dereference_protected(client->name, true)->pid)
+	spin_lock(&clients->pid_lock);
+	if (name->pid != rcu_dereference_protected(client->name, true)->pid) {
+		hash_add_rcu(clients->pids, &name->node, pid_nr(name->pid));
 		name = rcu_replace_pointer(client->name, name, true);
-	mutex_unlock(&client->update_lock);
+		hash_del_rcu(&name->node);
+	}
+	spin_unlock(&clients->pid_lock);
 
 	call_rcu(&name->rcu, free_name);
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 83660fa9d2d7..080b8506a86e 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -7,10 +7,10 @@
 #define __I915_DRM_CLIENT_H__
 
 #include <linux/device.h>
+#include <linux/hashtable.h>
 #include <linux/kobject.h>
 #include <linux/kref.h>
 #include <linux/list.h>
-#include <linux/mutex.h>
 #include <linux/pid.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
@@ -28,6 +28,9 @@ struct i915_drm_clients {
 	u32 next_id;
 
 	struct kobject *root;
+
+	spinlock_t pid_lock; /* protects the pid lut */
+	DECLARE_HASHTABLE(pids, 6);
 };
 
 struct i915_drm_client;
@@ -40,6 +43,7 @@ struct i915_engine_busy_attribute {
 
 struct i915_drm_client_name {
 	struct rcu_head rcu;
+	struct hlist_node node;
 	struct i915_drm_client *client;
 	struct pid *pid;
 	char name[];
@@ -50,8 +54,6 @@ struct i915_drm_client {
 
 	struct rcu_work rcu;
 
-	struct mutex update_lock; /* Serializes name and pid updates. */
-
 	unsigned int id;
 	struct i915_drm_client_name __rcu *name;
 	bool closed;
@@ -100,6 +102,10 @@ struct i915_drm_client *i915_drm_client_add(struct i915_drm_clients *clients,
 int i915_drm_client_update(struct i915_drm_client *client,
 			   struct task_struct *task);
 
+u64 i915_drm_clients_get_runtime(struct i915_drm_clients *clients,
+				 struct pid *pid,
+				 u64 *rt);
+
 static inline const struct i915_drm_client_name *
 __i915_drm_client_name(const struct i915_drm_client *client)
 {
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [RFC 3/3] drm/i915/gt: Export device and per-process runtimes via procfs
  2021-02-04 12:11 [RFC 1/3] proc: Show GPU runtimes Chris Wilson
  2021-02-04 12:11 ` [RFC 2/3] drm/i915: Look up clients by pid Chris Wilson
@ 2021-02-04 12:11 ` Chris Wilson
  2021-02-12 14:57   ` Emil Velikov
  1 sibling, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2021-02-04 12:11 UTC (permalink / raw)
  To: dri-devel; +Cc: intel-gfx, Chris Wilson

Register with /proc/gpu to provide the client runtimes for generic
top-like overview, e.g. gnome-system-monitor can use this information to
show the per-process multi-GPU usage.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile            |  1 +
 drivers/gpu/drm/i915/gt/intel_gt.c       |  5 ++
 drivers/gpu/drm/i915/gt/intel_gt_proc.c  | 66 ++++++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_proc.h  | 14 +++++
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  3 ++
 5 files changed, 89 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_proc.c
 create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_proc.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index ce01634d4ea7..16171f65f5d1 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -104,6 +104,7 @@ gt-y += \
 	gt/intel_gt_irq.o \
 	gt/intel_gt_pm.o \
 	gt/intel_gt_pm_irq.o \
+	gt/intel_gt_proc.o \
 	gt/intel_gt_requests.o \
 	gt/intel_gtt.o \
 	gt/intel_llc.o \
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index ca76f93bc03d..72199c13330d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -12,6 +12,7 @@
 #include "intel_gt_buffer_pool.h"
 #include "intel_gt_clock_utils.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_proc.h"
 #include "intel_gt_requests.h"
 #include "intel_mocs.h"
 #include "intel_rc6.h"
@@ -373,6 +374,8 @@ void intel_gt_driver_register(struct intel_gt *gt)
 	intel_rps_driver_register(&gt->rps);
 
 	debugfs_gt_register(gt);
+
+	intel_gt_driver_register__proc(gt);
 }
 
 static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
@@ -656,6 +659,8 @@ void intel_gt_driver_unregister(struct intel_gt *gt)
 {
 	intel_wakeref_t wakeref;
 
+	intel_gt_driver_unregister__proc(gt);
+
 	intel_rps_driver_unregister(&gt->rps);
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_proc.c b/drivers/gpu/drm/i915/gt/intel_gt_proc.c
new file mode 100644
index 000000000000..42db22326c7c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_proc.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/proc_gpu.h>
+
+#include "i915_drm_client.h"
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_proc.h"
+
+static void proc_runtime_pid(struct intel_gt *gt,
+			     struct pid *pid,
+			     struct proc_gpu_runtime *rt)
+{
+	struct i915_drm_clients *clients = &gt->i915->clients;
+
+	BUILD_BUG_ON(MAX_ENGINE_CLASS >= ARRAY_SIZE(rt->channel));
+
+	rt->device = i915_drm_clients_get_runtime(clients, pid, rt->channel);
+	rt->nchannel = MAX_ENGINE_CLASS + 1;
+}
+
+static void proc_runtime_device(struct intel_gt *gt,
+				struct pid *pid,
+				struct proc_gpu_runtime *rt)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	ktime_t dummy;
+
+	rt->nchannel = 0;
+	for_each_engine(engine, gt, id) {
+		rt->channel[rt->nchannel++] =
+			intel_engine_get_busy_time(engine, &dummy);
+		if (rt->nchannel == ARRAY_SIZE(rt->channel))
+			break;
+	}
+	rt->device = intel_gt_get_awake_time(gt);
+}
+
+static void proc_runtime(struct proc_gpu *pg,
+			 struct pid *pid,
+			 struct proc_gpu_runtime *rt)
+{
+	struct intel_gt *gt = container_of(pg, typeof(*gt), proc);
+
+	strscpy(rt->name, dev_name(gt->i915->drm.dev), sizeof(rt->name));
+	if (pid)
+		proc_runtime_pid(gt, pid, rt);
+	else
+		proc_runtime_device(gt, pid, rt);
+}
+
+void intel_gt_driver_register__proc(struct intel_gt *gt)
+{
+	gt->proc.fn = proc_runtime;
+	proc_gpu_register(&gt->proc);
+}
+
+void intel_gt_driver_unregister__proc(struct intel_gt *gt)
+{
+	proc_gpu_unregister(&gt->proc);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_proc.h b/drivers/gpu/drm/i915/gt/intel_gt_proc.h
new file mode 100644
index 000000000000..7a9bff0fb020
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_proc.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef INTEL_GT_PROC_H
+#define INTEL_GT_PROC_H
+
+struct intel_gt;
+
+void intel_gt_driver_register__proc(struct intel_gt *gt);
+void intel_gt_driver_unregister__proc(struct intel_gt *gt);
+
+#endif /* INTEL_GT_PROC_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 626af37c7790..3fc6d9741764 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -10,6 +10,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/notifier.h>
+#include <linux/proc_gpu.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
@@ -135,6 +136,8 @@ struct intel_gt {
 
 	struct i915_vma *scratch;
 
+	struct proc_gpu proc;
+
 	struct intel_gt_info {
 		intel_engine_mask_t engine_mask;
 		u8 num_engines;
-- 
2.20.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC 3/3] drm/i915/gt: Export device and per-process runtimes via procfs
  2021-02-04 12:11 ` [RFC 3/3] drm/i915/gt: Export device and per-process runtimes via procfs Chris Wilson
@ 2021-02-12 14:57   ` Emil Velikov
  2021-02-12 15:16     ` Chris Wilson
  0 siblings, 1 reply; 7+ messages in thread
From: Emil Velikov @ 2021-02-12 14:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, ML dri-devel

Hi Chris,

On Thu, 4 Feb 2021 at 12:11, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Register with /proc/gpu to provide the client runtimes for generic
> top-like overview, e.g. gnome-system-monitor can use this information to
> show the per-process multi-GPU usage.
>
Exposing this information to userspace sounds great IMHO and like the
proposed "channels" for the device engines.
If it were me, I would have the channel names a) exposed to userspace
and b) be a "fixed set".

Whereby with a "fixed set" I mean, we should have these akin to the
KMS UAPI properties, where we have core helpers exposing prop X/Y and
there should be no driver specific ones.
This would allow for consistent and deterministic userspace handling,
even if some hardware/drivers do not have all engines - say no copy
engine.


> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_proc.c
> @@ -0,0 +1,66 @@
> +// SPDX-License-Identifier: MIT
Thanks for making these available under MIT.

> +/*
> + * Copyright © 2020 Intel Corporation

Might want to make this 2021 in the next revision.

HTH
Emil
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC 3/3] drm/i915/gt: Export device and per-process runtimes via procfs
  2021-02-12 14:57   ` Emil Velikov
@ 2021-02-12 15:16     ` Chris Wilson
  2021-02-12 15:45       ` Emil Velikov
  0 siblings, 1 reply; 7+ messages in thread
From: Chris Wilson @ 2021-02-12 15:16 UTC (permalink / raw)
  To: Emil Velikov; +Cc: Intel Graphics Development, ML dri-devel

Quoting Emil Velikov (2021-02-12 14:57:56)
> Hi Chris,
> 
> On Thu, 4 Feb 2021 at 12:11, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Register with /proc/gpu to provide the client runtimes for generic
> > top-like overview, e.g. gnome-system-monitor can use this information to
> > show the per-process multi-GPU usage.
> >
> Exposing this information to userspace sounds great IMHO and like the
> proposed "channels" for the device engines.
> If it were me, I would have the channel names a) exposed to userspace
> and b) be a "fixed set".

- Total
- Graphics
- Compute
- Unified
- Video
- Copy
- Display
- Other

Enough versatility for the foreseeable future?
But plan for extension.

The other aspect then is the capacity of each channel. We can keep it
simple as the union/average (whichever the driver has to hand) runtime in
nanoseconds over all IP blocks within a channel.
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC 3/3] drm/i915/gt: Export device and per-process runtimes via procfs
  2021-02-12 15:16     ` Chris Wilson
@ 2021-02-12 15:45       ` Emil Velikov
  2021-02-12 16:07         ` Chris Wilson
  0 siblings, 1 reply; 7+ messages in thread
From: Emil Velikov @ 2021-02-12 15:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, ML dri-devel

On Fri, 12 Feb 2021 at 15:16, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Quoting Emil Velikov (2021-02-12 14:57:56)
> > Hi Chris,
> >
> > On Thu, 4 Feb 2021 at 12:11, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > >
> > > Register with /proc/gpu to provide the client runtimes for generic
> > > top-like overview, e.g. gnome-system-monitor can use this information to
> > > show the per-process multi-GPU usage.
> > >
> > Exposing this information to userspace sounds great IMHO and like the
> > proposed "channels" for the device engines.
> > If it were me, I would have the channel names a) exposed to userspace
> > and b) be a "fixed set".
>
> - Total
> - Graphics
> - Compute
> - Unified
> - Video
> - Copy
> - Display
> - Other
>
> Enough versatility for the foreseeable future?
> But plan for extension.
>
With a bit of documentation about "unified" (is it a metric also
counted towards any of the rest) it would be perfect.
For future extension one might consider splitting video into
encoder/decoder/post-processing.

> The other aspect then is the capacity of each channel. We can keep it
> simple as the union/average (whichever the driver has to hand) runtime in
> nanoseconds over all IP blocks within a channel.

Not sure what you mean with capacity. Are you referring to having
multiple instances of the same engine (say 3 separate copy engines)?
Personally I'm inclined to keep these separate entries, since some
hardware can have multiple ones.

For example - before the latest changes nouveau had 8 copy engines,
3+3 video 'generic' video (enc,dec)oder engines, amongst others.

Thanks
Emil
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC 3/3] drm/i915/gt: Export device and per-process runtimes via procfs
  2021-02-12 15:45       ` Emil Velikov
@ 2021-02-12 16:07         ` Chris Wilson
  0 siblings, 0 replies; 7+ messages in thread
From: Chris Wilson @ 2021-02-12 16:07 UTC (permalink / raw)
  To: Emil Velikov; +Cc: Intel Graphics Development, ML dri-devel

Quoting Emil Velikov (2021-02-12 15:45:04)
> On Fri, 12 Feb 2021 at 15:16, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Quoting Emil Velikov (2021-02-12 14:57:56)
> > > Hi Chris,
> > >
> > > On Thu, 4 Feb 2021 at 12:11, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > >
> > > > Register with /proc/gpu to provide the client runtimes for generic
> > > > top-like overview, e.g. gnome-system-monitor can use this information to
> > > > show the per-process multi-GPU usage.
> > > >
> > > Exposing this information to userspace sounds great IMHO and like the
> > > proposed "channels" for the device engines.
> > > If it were me, I would have the channel names a) exposed to userspace
> > > and b) be a "fixed set".
> >
> > - Total
> > - Graphics
> > - Compute
> > - Unified
> > - Video
> > - Copy
> > - Display
> > - Other
> >
> > Enough versatility for the foreseeable future?
> > But plan for extension.
> >
> With a bit of documentation about "unified" (is it a metric also
> counted towards any of the rest) it would be perfect.

With unified I was trying to find a place to things that are neither
wholly graphics nor compute, as some may prefer not to categorise
themselves as one or the other. Also whether or not some cores are more
compute than others (so should there be an AI/RT/ALU?)

> For future extension one might consider splitting video into
> encoder/decoder/post-processing.

Ok, I wasn't sure how commonly those functions were split on different
HW.

> > The other aspect then is the capacity of each channel. We can keep it
> > simple as the union/average (whichever the driver has to hand) runtime in
> > nanoseconds over all IP blocks within a channel.
> 
> Not sure what you mean with capacity. Are you referring to having
> multiple instances of the same engine (say 3 separate copy engines)?
> Personally I'm inclined to keep these separate entries, since some
> hardware can have multiple ones.
> 
> For example - before the latest changes nouveau had 8 copy engines,
> 3+3 video 'generic' video (enc,dec)oder engines, amongst others.

Yes, most HW have multiple engines within a family. Trying to keep it
simple, I thought presenting just one runtime metric for the whole
channel. Especially for the single-line per device format I had picked :)

If we switch to a more extensible format,

	-'$device0' : 
		-$channel0 : {
			Total : $total # avg/union over all engines
			Engines : [ $0, $1, ... ]
		}
		...

	-'$device1' : 
		...

Using the same fixed channel names, and dev_name(), pesky concerns such
as keeping it as a simple scanf can be forgotten.
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-02-12 16:07 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-04 12:11 [RFC 1/3] proc: Show GPU runtimes Chris Wilson
2021-02-04 12:11 ` [RFC 2/3] drm/i915: Look up clients by pid Chris Wilson
2021-02-04 12:11 ` [RFC 3/3] drm/i915/gt: Export device and per-process runtimes via procfs Chris Wilson
2021-02-12 14:57   ` Emil Velikov
2021-02-12 15:16     ` Chris Wilson
2021-02-12 15:45       ` Emil Velikov
2021-02-12 16:07         ` Chris Wilson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).