All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats.
@ 2023-02-10 14:34 Jose Maria Casanova Crespo
  2023-02-10 16:40   ` kernel test robot
  2023-02-11  7:47   ` kernel test robot
  0 siblings, 2 replies; 7+ messages in thread
From: Jose Maria Casanova Crespo @ 2023-02-10 14:34 UTC (permalink / raw)
  To: Emma Anholt, Melissa Wen, David Airlie, Daniel Vetter
  Cc: Jose Maria Casanova Crespo, dri-devel

Two new debugfs interfaces are implemented to expose
the usage stats of the GPU scheduling queues.

- gpu_usage: exposes the total runtime since boot of each
of the 5 scheduling queues available at V3D (BIN, RENDER,
CSD, TFU, CACHE_CLEAN). So if the interface is queried at
two different points of time the usage percentage of each
of the queues can be calculated.

- gpu_pid_usage: exposes the same information but to the
level of detail of each process using the V3D driver. The
runtime for process using the driver is stored. So the
percentages of usage by PID can be calculated with
measures at different timestamps.

The storage of gpu_pid_usage stats is only done if
the debugfs interface is polled during the last 70 seconds.
If a process does not submit a GPU job during last 70
seconds its stats will also be purged.

Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
---
 drivers/gpu/drm/v3d/v3d_debugfs.c |  91 +++++++++++++++++++
 drivers/gpu/drm/v3d/v3d_drv.h     |  60 ++++++++++++
 drivers/gpu/drm/v3d/v3d_gem.c     |   1 +
 drivers/gpu/drm/v3d/v3d_irq.c     |   5 +
 drivers/gpu/drm/v3d/v3d_sched.c   | 146 +++++++++++++++++++++++++++++-
 5 files changed, 302 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
index 330669f51fa7..42d36e3f6fa8 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -5,6 +5,7 @@
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/sched/clock.h>
 #include <linux/string_helpers.h>
 
 #include <drm/drm_debugfs.h>
@@ -202,6 +203,94 @@ static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static int v3d_debugfs_gpu_usage(struct seq_file *m, void *unused)
+{
+	struct drm_debugfs_entry *entry = m->private;
+	struct drm_device *dev = entry->dev;
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_queue_stats *queue_stats;
+	enum v3d_queue queue;
+	u64 timestamp = local_clock();
+	u64 active_runtime;
+	char active;
+
+	seq_printf(m, "timestamp;%llu;\n", local_clock());
+	seq_puts(m, "\"QUEUE\";\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
+	for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+		if (!v3d->queue[queue].sched.ready)
+			continue;
+
+		queue_stats = &v3d->gpu_queue_stats[queue];
+		mutex_lock(&queue_stats->lock);
+		v3d_sched_stats_update(queue_stats);
+		if (queue_stats->last_pid) {
+			active_runtime = timestamp -
+				queue_stats->last_exec_start;
+			active = '1';
+		} else {
+			active_runtime = 0;
+			active = '0';
+		}
+
+		seq_printf(m, "%s;%d;%llu;%c;\n",
+			   v3d_queue_to_string(queue),
+			   queue_stats->jobs_sent,
+			   queue_stats->runtime + active_runtime,
+			   active);
+		mutex_unlock(&queue_stats->lock);
+	}
+
+	return 0;
+}
+
+static int v3d_debugfs_gpu_pid_usage(struct seq_file *m, void *unused)
+{
+	struct drm_debugfs_entry *entry = m->private;
+	struct drm_device *dev = entry->dev;
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_queue_stats *queue_stats;
+	struct v3d_queue_pid_stats *cur;
+	enum v3d_queue queue;
+	u64 active_runtime;
+	u64 timestamp = local_clock();
+	char active;
+
+	seq_printf(m, "timestamp;%llu;\n", timestamp);
+	seq_puts(m, "\"QUEUE\";\"PID\",\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
+	for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+
+		if (!v3d->queue[queue].sched.ready)
+			continue;
+
+		queue_stats = &v3d->gpu_queue_stats[queue];
+		mutex_lock(&queue_stats->lock);
+		v3d_sched_stats_update(queue_stats);
+		queue_stats->collect_pid_stats = 1;
+		queue_stats->pid_stats_timeout =
+			jiffies + V3D_QUEUE_STATS_TIMEOUT;
+		list_for_each_entry(cur, &queue_stats->pid_stats_list, list) {
+
+			if (cur->pid == queue_stats->last_pid) {
+				active_runtime = timestamp -
+						 queue_stats->last_exec_start;
+				active = '1';
+			} else {
+				active_runtime = 0;
+				active = '0';
+			}
+
+			seq_printf(m, "%s;%d;%d;%llu;%c;\n",
+				   v3d_queue_to_string(queue),
+				   cur->pid, cur->jobs_sent,
+				   cur->runtime + active_runtime,
+				   active);
+		}
+		mutex_unlock(&queue_stats->lock);
+	}
+
+	return 0;
+}
+
 static int v3d_measure_clock(struct seq_file *m, void *unused)
 {
 	struct drm_debugfs_entry *entry = m->private;
@@ -241,6 +330,8 @@ static const struct drm_debugfs_info v3d_debugfs_list[] = {
 	{"v3d_regs", v3d_v3d_debugfs_regs, 0},
 	{"measure_clock", v3d_measure_clock, 0},
 	{"bo_stats", v3d_debugfs_bo_stats, 0},
+	{"gpu_usage", v3d_debugfs_gpu_usage, 0},
+	{"gpu_pid_usage", v3d_debugfs_gpu_pid_usage, 0},
 };
 
 void
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index b74b1351bfc8..5c1f3177fc86 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -21,6 +21,19 @@ struct reset_control;
 
 #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
 
+static inline char *
+v3d_queue_to_string(enum v3d_queue queue)
+{
+	switch (queue) {
+	case V3D_BIN: return "v3d_bin";
+	case V3D_RENDER: return "v3d_render";
+	case V3D_TFU: return "v3d_tfu";
+	case V3D_CSD: return "v3d_csd";
+	case V3D_CACHE_CLEAN: return "v3d_cache_clean";
+	}
+	return "UNKNOWN";
+}
+
 struct v3d_queue_state {
 	struct drm_gpu_scheduler sched;
 
@@ -28,6 +41,45 @@ struct v3d_queue_state {
 	u64 emit_seqno;
 };
 
+struct v3d_queue_pid_stats {
+	struct	list_head list;
+	u64	runtime;
+	/* Time in jiffes.to purge the stats of this process. Every time a
+	 * process sends a new job to the queue, this timeout is delayed by
+	 * V3D_QUEUE_STATS_TIMEOUT while the pid_stats_timeout of the queue
+	 * is not reached.
+	 */
+	unsigned long timeout_purge;
+	u32	jobs_sent;
+	pid_t	pid;
+};
+
+struct v3d_queue_stats {
+	struct mutex	 lock;
+	u64		 last_exec_start;
+	u64		 last_exec_end;
+	u64		 runtime;
+	u32		 jobs_sent;
+	pid_t		 last_pid;
+	bool		 collect_pid_stats;
+	/* Time in jiffes to stop collecting gpu stats by process. This is
+	 * increased by every access to*the debugfs interface gpu_pid_usage.
+	 * If the debugfs is not used stats are not collected.
+	 */
+	unsigned long	 pid_stats_timeout;
+	struct list_head pid_stats_list;
+};
+
+/* pid_stats by process (v3d_queue_pid_stats) are recorded if there is an
+ * access to the gpu_pid_usageare debugfs interface for the last
+ * V3D_QUEUE_STATS_TIMEOUT (70s).
+ *
+ * The same timeout is used to purge the stats by process for those process
+ * that have not sent jobs this period.
+ */
+#define V3D_QUEUE_STATS_TIMEOUT (70 * HZ)
+
+
 /* Performance monitor object. The perform lifetime is controlled by userspace
  * using perfmon related ioctls. A perfmon can be attached to a submit_cl
  * request, and when this is the case, HW perf counters will be activated just
@@ -141,6 +193,8 @@ struct v3d_dev {
 		u32 num_allocated;
 		u32 pages_allocated;
 	} bo_stats;
+
+	struct v3d_queue_stats gpu_queue_stats[V3D_MAX_QUEUES];
 };
 
 static inline struct v3d_dev *
@@ -238,6 +292,11 @@ struct v3d_job {
 	 */
 	struct v3d_perfmon *perfmon;
 
+	/* PID of the process that submitted the job that could be used to
+	 * for collecting stats by process of gpu usage.
+	 */
+	pid_t client_pid;
+
 	/* Callback for the freeing of the job on refcount going to 0. */
 	void (*free)(struct kref *ref);
 };
@@ -402,6 +461,7 @@ void v3d_mmu_remove_ptes(struct v3d_bo *bo);
 /* v3d_sched.c */
 int v3d_sched_init(struct v3d_dev *v3d);
 void v3d_sched_fini(struct v3d_dev *v3d);
+void v3d_sched_stats_update(struct v3d_queue_stats *queue_stats);
 
 /* v3d_perfmon.c */
 void v3d_perfmon_get(struct v3d_perfmon *perfmon);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 5da1806f3969..8ec56470a403 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -429,6 +429,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	job = *container;
 	job->v3d = v3d;
 	job->free = free;
+	job->client_pid = current->pid;
 
 	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
 				 v3d_priv);
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index e714d5318f30..9b8e8a3229cb 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/platform_device.h>
+#include <linux/sched/clock.h>
 
 #include "v3d_drv.h"
 #include "v3d_regs.h"
@@ -100,6 +101,7 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_FLDONE) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->bin_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_BIN].last_exec_end = local_clock();
 
 		trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -109,6 +111,7 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_FRDONE) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->render_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_RENDER].last_exec_end = local_clock();
 
 		trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -118,6 +121,7 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_CSDDONE) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->csd_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_CSD].last_exec_end = local_clock();
 
 		trace_v3d_csd_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -154,6 +158,7 @@ v3d_hub_irq(int irq, void *arg)
 	if (intsts & V3D_HUB_INT_TFUC) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->tfu_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_TFU].last_exec_end = local_clock();
 
 		trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 06238e6d7f5c..94dc2df2110f 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -18,6 +18,7 @@
  * semaphores to interlock between them.
  */
 
+#include <linux/sched/clock.h>
 #include <linux/kthread.h>
 
 #include "v3d_drv.h"
@@ -72,6 +73,120 @@ v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
 		v3d_perfmon_start(v3d, job->perfmon);
 }
 
+/*
+ * Updates the scheduling stats of the gpu queues runtime for completed jobs.
+ *
+ * It should be called before any new job submission to the queue or before
+ * accessing the stats from the debugfs interface.
+ *
+ * It is expected that calls to this function are done with queue_stats->lock
+ * locked.
+ */
+void
+v3d_sched_stats_update(struct v3d_queue_stats *queue_stats)
+{
+	struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
+	struct v3d_queue_pid_stats *cur, *tmp;
+	u64 runtime = 0;
+	bool purge_all_pid_stats = 0;
+
+	/* If debugfs stats gpu_pid_usage has not been polled for a period,
+	 * the pid stats collection is stopped and we purge any existing
+	 * pid_stats.
+	 *
+	 * pid_stats are also purged for clients that have reached the
+	 * timeout_purge because the process probably does not exist anymore.
+	 */
+	if (queue_stats->collect_pid_stats) {
+		if (time_is_before_jiffies(queue_stats->pid_stats_timeout)) {
+			purge_all_pid_stats = 1;
+			queue_stats->collect_pid_stats = 0;
+		}
+		list_for_each_entry_safe_reverse(cur, tmp, pid_stats_list, list) {
+			if (time_is_before_jiffies(cur->timeout_purge) ||
+			    purge_all_pid_stats) {
+				list_del(&cur->list);
+				kfree(cur);
+			} else {
+				break;
+			}
+		}
+	}
+	/* If a job has finished its stats are updated. */
+	if (queue_stats->last_pid && queue_stats->last_exec_end) {
+		runtime = queue_stats->last_exec_end -
+			  queue_stats->last_exec_start;
+		queue_stats->runtime += runtime;
+
+		if (queue_stats->collect_pid_stats) {
+			struct v3d_queue_pid_stats *pid_stats;
+			/* Last job info is always at the head of the list */
+			pid_stats = list_first_entry_or_null(pid_stats_list,
+				struct v3d_queue_pid_stats, list);
+			if (pid_stats &&
+			    pid_stats->pid == queue_stats->last_pid) {
+				pid_stats->runtime += runtime;
+			}
+		}
+		queue_stats->last_pid = 0;
+	}
+}
+
+/*
+ * Updates the queue usage adding the information of a new job that is
+ * about to be sent to the GPU to be executed.
+ */
+int
+v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
+			struct drm_sched_job *sched_job)
+{
+
+	struct v3d_queue_pid_stats *pid_stats = NULL;
+	struct v3d_job *job = sched_job?to_v3d_job(sched_job):NULL;
+	struct v3d_queue_pid_stats *cur;
+	struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
+	int ret = 0;
+
+	mutex_lock(&queue_stats->lock);
+
+	/* Completion of previous job requires an update of its runtime */
+	v3d_sched_stats_update(queue_stats);
+
+	queue_stats->last_exec_start = local_clock();
+	queue_stats->last_exec_end = 0;
+	queue_stats->jobs_sent++;
+	queue_stats->last_pid = job->client_pid;
+
+	/* gpu usage stats by process are being collected */
+	if (queue_stats->collect_pid_stats) {
+		list_for_each_entry(cur, pid_stats_list, list) {
+			if (cur->pid == job->client_pid) {
+				pid_stats = cur;
+				break;
+			}
+		}
+		/* client pid_stats is moved to the head of the list. */
+		if (pid_stats) {
+			list_move(&pid_stats->list, pid_stats_list);
+		} else {
+			pid_stats = kzalloc(sizeof(struct v3d_queue_pid_stats),
+					    GFP_KERNEL);
+			if (!pid_stats) {
+				ret = -ENOMEM;
+				goto err_mem;
+			}
+			pid_stats->pid = job->client_pid;
+			list_add(&pid_stats->list, pid_stats_list);
+		}
+		pid_stats->jobs_sent++;
+		pid_stats->timeout_purge = jiffies + V3D_QUEUE_STATS_TIMEOUT;
+	}
+
+err_mem:
+	mutex_unlock(&queue_stats->lock);
+	return ret;
+}
+
 static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 {
 	struct v3d_bin_job *job = to_bin_job(sched_job);
@@ -107,6 +222,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 	trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
 			    job->start, job->end);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_BIN], sched_job);
 	v3d_switch_perfmon(v3d, &job->base);
 
 	/* Set the current and end address of the control list.
@@ -158,6 +274,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
 	trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
 			    job->start, job->end);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_RENDER], sched_job);
 	v3d_switch_perfmon(v3d, &job->base);
 
 	/* XXX: Set the QCFG */
@@ -190,6 +307,8 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 
 	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_TFU], sched_job);
+
 	V3D_WRITE(V3D_TFU_IIA, job->args.iia);
 	V3D_WRITE(V3D_TFU_IIS, job->args.iis);
 	V3D_WRITE(V3D_TFU_ICA, job->args.ica);
@@ -231,6 +350,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
 
 	trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CSD], sched_job);
 	v3d_switch_perfmon(v3d, &job->base);
 
 	for (i = 1; i <= 6; i++)
@@ -247,7 +367,10 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
 	struct v3d_job *job = to_v3d_job(sched_job);
 	struct v3d_dev *v3d = job->v3d;
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CACHE_CLEAN],
+				sched_job);
 	v3d_clean_caches(v3d);
+	v3d->gpu_queue_stats[V3D_CACHE_CLEAN].last_exec_end = local_clock();
 
 	return NULL;
 }
@@ -385,8 +508,18 @@ v3d_sched_init(struct v3d_dev *v3d)
 	int hw_jobs_limit = 1;
 	int job_hang_limit = 0;
 	int hang_limit_ms = 500;
+	enum v3d_queue q;
 	int ret;
 
+	for (q = 0; q < V3D_MAX_QUEUES; q++) {
+		INIT_LIST_HEAD(&v3d->gpu_queue_stats[q].pid_stats_list);
+		/* Setting timeout before current jiffies disables collecting
+		 * pid_stats on scheduling init.
+		 */
+		v3d->gpu_queue_stats[q].pid_stats_timeout = jiffies - 1;
+		mutex_init(&v3d->gpu_queue_stats[q].lock);
+	}
+
 	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
 			     &v3d_bin_sched_ops,
 			     hw_jobs_limit, job_hang_limit,
@@ -440,9 +573,20 @@ void
 v3d_sched_fini(struct v3d_dev *v3d)
 {
 	enum v3d_queue q;
+	struct v3d_queue_stats *queue_stats;
 
 	for (q = 0; q < V3D_MAX_QUEUES; q++) {
-		if (v3d->queue[q].sched.ready)
+		if (v3d->queue[q].sched.ready) {
+			queue_stats = &v3d->gpu_queue_stats[q];
+			mutex_lock(&queue_stats->lock);
+			/* Setting pid_stats_timeout to jiffies-1 will make
+			 * v3d_sched_stats_update to purge all allocated
+			 * pid_stats.
+			 */
+			queue_stats->pid_stats_timeout = jiffies - 1;
+			v3d_sched_stats_update(queue_stats);
+			mutex_unlock(&queue_stats->lock);
 			drm_sched_fini(&v3d->queue[q].sched);
+		}
 	}
 }
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats.
  2023-02-10 14:34 [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats Jose Maria Casanova Crespo
@ 2023-02-10 16:40   ` kernel test robot
  2023-02-11  7:47   ` kernel test robot
  1 sibling, 0 replies; 7+ messages in thread
From: kernel test robot @ 2023-02-10 16:40 UTC (permalink / raw)
  To: Jose Maria Casanova Crespo, Emma Anholt, Melissa Wen,
	David Airlie, Daniel Vetter
  Cc: oe-kbuild-all, Jose Maria Casanova Crespo, dri-devel

Hi Jose,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on linus/master v6.2-rc7 next-20230210]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Jose-Maria-Casanova-Crespo/drm-v3d-New-debugfs-end-points-to-query-GPU-usage-stats/20230210-223726
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20230210143421.1879915-1-jmcasanova%40igalia.com
patch subject: [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats.
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20230211/202302110040.uoXcpWUN-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/890feddd5e794bae0b486a18c0b905c78309d445
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Jose-Maria-Casanova-Crespo/drm-v3d-New-debugfs-end-points-to-query-GPU-usage-stats/20230210-223726
        git checkout 890feddd5e794bae0b486a18c0b905c78309d445
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash drivers/gpu/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302110040.uoXcpWUN-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/v3d/v3d_sched.c:140:1: warning: no previous prototype for 'v3d_sched_stats_add_job' [-Wmissing-prototypes]
     140 | v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
         | ^~~~~~~~~~~~~~~~~~~~~~~


vim +/v3d_sched_stats_add_job +140 drivers/gpu/drm/v3d/v3d_sched.c

   134	
   135	/*
   136	 * Updates the queue usage adding the information of a new job that is
   137	 * about to be sent to the GPU to be executed.
   138	 */
   139	int
 > 140	v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
   141				struct drm_sched_job *sched_job)
   142	{
   143	
   144		struct v3d_queue_pid_stats *pid_stats = NULL;
   145		struct v3d_job *job = sched_job?to_v3d_job(sched_job):NULL;
   146		struct v3d_queue_pid_stats *cur;
   147		struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
   148		int ret = 0;
   149	
   150		mutex_lock(&queue_stats->lock);
   151	
   152		/* Completion of previous job requires an update of its runtime */
   153		v3d_sched_stats_update(queue_stats);
   154	
   155		queue_stats->last_exec_start = local_clock();
   156		queue_stats->last_exec_end = 0;
   157		queue_stats->jobs_sent++;
   158		queue_stats->last_pid = job->client_pid;
   159	
   160		/* gpu usage stats by process are being collected */
   161		if (queue_stats->collect_pid_stats) {
   162			list_for_each_entry(cur, pid_stats_list, list) {
   163				if (cur->pid == job->client_pid) {
   164					pid_stats = cur;
   165					break;
   166				}
   167			}
   168			/* client pid_stats is moved to the head of the list. */
   169			if (pid_stats) {
   170				list_move(&pid_stats->list, pid_stats_list);
   171			} else {
   172				pid_stats = kzalloc(sizeof(struct v3d_queue_pid_stats),
   173						    GFP_KERNEL);
   174				if (!pid_stats) {
   175					ret = -ENOMEM;
   176					goto err_mem;
   177				}
   178				pid_stats->pid = job->client_pid;
   179				list_add(&pid_stats->list, pid_stats_list);
   180			}
   181			pid_stats->jobs_sent++;
   182			pid_stats->timeout_purge = jiffies + V3D_QUEUE_STATS_TIMEOUT;
   183		}
   184	
   185	err_mem:
   186		mutex_unlock(&queue_stats->lock);
   187		return ret;
   188	}
   189	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats.
@ 2023-02-10 16:40   ` kernel test robot
  0 siblings, 0 replies; 7+ messages in thread
From: kernel test robot @ 2023-02-10 16:40 UTC (permalink / raw)
  To: Jose Maria Casanova Crespo, Emma Anholt, Melissa Wen,
	David Airlie, Daniel Vetter
  Cc: Jose Maria Casanova Crespo, dri-devel, oe-kbuild-all

Hi Jose,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on linus/master v6.2-rc7 next-20230210]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Jose-Maria-Casanova-Crespo/drm-v3d-New-debugfs-end-points-to-query-GPU-usage-stats/20230210-223726
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20230210143421.1879915-1-jmcasanova%40igalia.com
patch subject: [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats.
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20230211/202302110040.uoXcpWUN-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/890feddd5e794bae0b486a18c0b905c78309d445
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Jose-Maria-Casanova-Crespo/drm-v3d-New-debugfs-end-points-to-query-GPU-usage-stats/20230210-223726
        git checkout 890feddd5e794bae0b486a18c0b905c78309d445
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash drivers/gpu/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302110040.uoXcpWUN-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/v3d/v3d_sched.c:140:1: warning: no previous prototype for 'v3d_sched_stats_add_job' [-Wmissing-prototypes]
     140 | v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
         | ^~~~~~~~~~~~~~~~~~~~~~~


vim +/v3d_sched_stats_add_job +140 drivers/gpu/drm/v3d/v3d_sched.c

   134	
   135	/*
   136	 * Updates the queue usage adding the information of a new job that is
   137	 * about to be sent to the GPU to be executed.
   138	 */
   139	int
 > 140	v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
   141				struct drm_sched_job *sched_job)
   142	{
   143	
   144		struct v3d_queue_pid_stats *pid_stats = NULL;
   145		struct v3d_job *job = sched_job?to_v3d_job(sched_job):NULL;
   146		struct v3d_queue_pid_stats *cur;
   147		struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
   148		int ret = 0;
   149	
   150		mutex_lock(&queue_stats->lock);
   151	
   152		/* Completion of previous job requires an update of its runtime */
   153		v3d_sched_stats_update(queue_stats);
   154	
   155		queue_stats->last_exec_start = local_clock();
   156		queue_stats->last_exec_end = 0;
   157		queue_stats->jobs_sent++;
   158		queue_stats->last_pid = job->client_pid;
   159	
   160		/* gpu usage stats by process are being collected */
   161		if (queue_stats->collect_pid_stats) {
   162			list_for_each_entry(cur, pid_stats_list, list) {
   163				if (cur->pid == job->client_pid) {
   164					pid_stats = cur;
   165					break;
   166				}
   167			}
   168			/* client pid_stats is moved to the head of the list. */
   169			if (pid_stats) {
   170				list_move(&pid_stats->list, pid_stats_list);
   171			} else {
   172				pid_stats = kzalloc(sizeof(struct v3d_queue_pid_stats),
   173						    GFP_KERNEL);
   174				if (!pid_stats) {
   175					ret = -ENOMEM;
   176					goto err_mem;
   177				}
   178				pid_stats->pid = job->client_pid;
   179				list_add(&pid_stats->list, pid_stats_list);
   180			}
   181			pid_stats->jobs_sent++;
   182			pid_stats->timeout_purge = jiffies + V3D_QUEUE_STATS_TIMEOUT;
   183		}
   184	
   185	err_mem:
   186		mutex_unlock(&queue_stats->lock);
   187		return ret;
   188	}
   189	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats.
  2023-02-10 14:34 [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats Jose Maria Casanova Crespo
@ 2023-02-11  7:47   ` kernel test robot
  2023-02-11  7:47   ` kernel test robot
  1 sibling, 0 replies; 7+ messages in thread
From: kernel test robot @ 2023-02-11  7:47 UTC (permalink / raw)
  To: Jose Maria Casanova Crespo, Emma Anholt, Melissa Wen,
	David Airlie, Daniel Vetter
  Cc: Jose Maria Casanova Crespo, llvm, dri-devel, oe-kbuild-all

Hi Jose,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on linus/master v6.2-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Jose-Maria-Casanova-Crespo/drm-v3d-New-debugfs-end-points-to-query-GPU-usage-stats/20230210-223726
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20230210143421.1879915-1-jmcasanova%40igalia.com
patch subject: [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats.
config: hexagon-randconfig-r025-20230210 (https://download.01.org/0day-ci/archive/20230211/202302111549.NR71LTx9-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project db0e6591612b53910a1b366863348bdb9d7d2fb1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/890feddd5e794bae0b486a18c0b905c78309d445
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Jose-Maria-Casanova-Crespo/drm-v3d-New-debugfs-end-points-to-query-GPU-usage-stats/20230210-223726
        git checkout 890feddd5e794bae0b486a18c0b905c78309d445
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash drivers/gpu/drm/v3d/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302111549.NR71LTx9-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from drivers/gpu/drm/v3d/v3d_sched.c:24:
   In file included from drivers/gpu/drm/v3d/v3d_drv.h:9:
   In file included from include/drm/drm_encoder.h:28:
   In file included from include/drm/drm_crtc.h:32:
   In file included from include/drm/drm_modes.h:33:
   In file included from include/drm/drm_connector.h:32:
   In file included from include/drm/drm_util.h:35:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __raw_readb(PCI_IOBASE + addr);
                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
   #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
                                                     ^
   In file included from drivers/gpu/drm/v3d/v3d_sched.c:24:
   In file included from drivers/gpu/drm/v3d/v3d_drv.h:9:
   In file included from include/drm/drm_encoder.h:28:
   In file included from include/drm/drm_crtc.h:32:
   In file included from include/drm/drm_modes.h:33:
   In file included from include/drm/drm_connector.h:32:
   In file included from include/drm/drm_util.h:35:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
   #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
                                                     ^
   In file included from drivers/gpu/drm/v3d/v3d_sched.c:24:
   In file included from drivers/gpu/drm/v3d/v3d_drv.h:9:
   In file included from include/drm/drm_encoder.h:28:
   In file included from include/drm/drm_crtc.h:32:
   In file included from include/drm/drm_modes.h:33:
   In file included from include/drm/drm_connector.h:32:
   In file included from include/drm/drm_util.h:35:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writeb(value, PCI_IOBASE + addr);
                               ~~~~~~~~~~ ^
   include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
>> drivers/gpu/drm/v3d/v3d_sched.c:140:1: warning: no previous prototype for function 'v3d_sched_stats_add_job' [-Wmissing-prototypes]
   v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
   ^
   drivers/gpu/drm/v3d/v3d_sched.c:139:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   int
   ^
   static 
   7 warnings generated.


vim +/v3d_sched_stats_add_job +140 drivers/gpu/drm/v3d/v3d_sched.c

   134	
   135	/*
   136	 * Updates the queue usage adding the information of a new job that is
   137	 * about to be sent to the GPU to be executed.
   138	 */
   139	int
 > 140	v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
   141				struct drm_sched_job *sched_job)
   142	{
   143	
   144		struct v3d_queue_pid_stats *pid_stats = NULL;
   145		struct v3d_job *job = sched_job?to_v3d_job(sched_job):NULL;
   146		struct v3d_queue_pid_stats *cur;
   147		struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
   148		int ret = 0;
   149	
   150		mutex_lock(&queue_stats->lock);
   151	
   152		/* Completion of previous job requires an update of its runtime */
   153		v3d_sched_stats_update(queue_stats);
   154	
   155		queue_stats->last_exec_start = local_clock();
   156		queue_stats->last_exec_end = 0;
   157		queue_stats->jobs_sent++;
   158		queue_stats->last_pid = job->client_pid;
   159	
   160		/* gpu usage stats by process are being collected */
   161		if (queue_stats->collect_pid_stats) {
   162			list_for_each_entry(cur, pid_stats_list, list) {
   163				if (cur->pid == job->client_pid) {
   164					pid_stats = cur;
   165					break;
   166				}
   167			}
   168			/* client pid_stats is moved to the head of the list. */
   169			if (pid_stats) {
   170				list_move(&pid_stats->list, pid_stats_list);
   171			} else {
   172				pid_stats = kzalloc(sizeof(struct v3d_queue_pid_stats),
   173						    GFP_KERNEL);
   174				if (!pid_stats) {
   175					ret = -ENOMEM;
   176					goto err_mem;
   177				}
   178				pid_stats->pid = job->client_pid;
   179				list_add(&pid_stats->list, pid_stats_list);
   180			}
   181			pid_stats->jobs_sent++;
   182			pid_stats->timeout_purge = jiffies + V3D_QUEUE_STATS_TIMEOUT;
   183		}
   184	
   185	err_mem:
   186		mutex_unlock(&queue_stats->lock);
   187		return ret;
   188	}
   189	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats.
@ 2023-02-11  7:47   ` kernel test robot
  0 siblings, 0 replies; 7+ messages in thread
From: kernel test robot @ 2023-02-11  7:47 UTC (permalink / raw)
  To: Jose Maria Casanova Crespo, Emma Anholt, Melissa Wen,
	David Airlie, Daniel Vetter
  Cc: llvm, oe-kbuild-all, Jose Maria Casanova Crespo, dri-devel

Hi Jose,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-misc/drm-misc-next]
[also build test WARNING on linus/master v6.2-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Jose-Maria-Casanova-Crespo/drm-v3d-New-debugfs-end-points-to-query-GPU-usage-stats/20230210-223726
base:   git://anongit.freedesktop.org/drm/drm-misc drm-misc-next
patch link:    https://lore.kernel.org/r/20230210143421.1879915-1-jmcasanova%40igalia.com
patch subject: [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats.
config: hexagon-randconfig-r025-20230210 (https://download.01.org/0day-ci/archive/20230211/202302111549.NR71LTx9-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project db0e6591612b53910a1b366863348bdb9d7d2fb1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/890feddd5e794bae0b486a18c0b905c78309d445
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Jose-Maria-Casanova-Crespo/drm-v3d-New-debugfs-end-points-to-query-GPU-usage-stats/20230210-223726
        git checkout 890feddd5e794bae0b486a18c0b905c78309d445
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash drivers/gpu/drm/v3d/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302111549.NR71LTx9-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from drivers/gpu/drm/v3d/v3d_sched.c:24:
   In file included from drivers/gpu/drm/v3d/v3d_drv.h:9:
   In file included from include/drm/drm_encoder.h:28:
   In file included from include/drm/drm_crtc.h:32:
   In file included from include/drm/drm_modes.h:33:
   In file included from include/drm/drm_connector.h:32:
   In file included from include/drm/drm_util.h:35:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __raw_readb(PCI_IOBASE + addr);
                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
   #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
                                                     ^
   In file included from drivers/gpu/drm/v3d/v3d_sched.c:24:
   In file included from drivers/gpu/drm/v3d/v3d_drv.h:9:
   In file included from include/drm/drm_encoder.h:28:
   In file included from include/drm/drm_crtc.h:32:
   In file included from include/drm/drm_modes.h:33:
   In file included from include/drm/drm_connector.h:32:
   In file included from include/drm/drm_util.h:35:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
   #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
                                                     ^
   In file included from drivers/gpu/drm/v3d/v3d_sched.c:24:
   In file included from drivers/gpu/drm/v3d/v3d_drv.h:9:
   In file included from include/drm/drm_encoder.h:28:
   In file included from include/drm/drm_crtc.h:32:
   In file included from include/drm/drm_modes.h:33:
   In file included from include/drm/drm_connector.h:32:
   In file included from include/drm/drm_util.h:35:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writeb(value, PCI_IOBASE + addr);
                               ~~~~~~~~~~ ^
   include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
>> drivers/gpu/drm/v3d/v3d_sched.c:140:1: warning: no previous prototype for function 'v3d_sched_stats_add_job' [-Wmissing-prototypes]
   v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
   ^
   drivers/gpu/drm/v3d/v3d_sched.c:139:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   int
   ^
   static 
   7 warnings generated.


vim +/v3d_sched_stats_add_job +140 drivers/gpu/drm/v3d/v3d_sched.c

   134	
   135	/*
   136	 * Updates the queue usage adding the information of a new job that is
   137	 * about to be sent to the GPU to be executed.
   138	 */
   139	int
 > 140	v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
   141				struct drm_sched_job *sched_job)
   142	{
   143	
   144		struct v3d_queue_pid_stats *pid_stats = NULL;
   145		struct v3d_job *job = sched_job?to_v3d_job(sched_job):NULL;
   146		struct v3d_queue_pid_stats *cur;
   147		struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
   148		int ret = 0;
   149	
   150		mutex_lock(&queue_stats->lock);
   151	
   152		/* Completion of previous job requires an update of its runtime */
   153		v3d_sched_stats_update(queue_stats);
   154	
   155		queue_stats->last_exec_start = local_clock();
   156		queue_stats->last_exec_end = 0;
   157		queue_stats->jobs_sent++;
   158		queue_stats->last_pid = job->client_pid;
   159	
   160		/* gpu usage stats by process are being collected */
   161		if (queue_stats->collect_pid_stats) {
   162			list_for_each_entry(cur, pid_stats_list, list) {
   163				if (cur->pid == job->client_pid) {
   164					pid_stats = cur;
   165					break;
   166				}
   167			}
   168			/* client pid_stats is moved to the head of the list. */
   169			if (pid_stats) {
   170				list_move(&pid_stats->list, pid_stats_list);
   171			} else {
   172				pid_stats = kzalloc(sizeof(struct v3d_queue_pid_stats),
   173						    GFP_KERNEL);
   174				if (!pid_stats) {
   175					ret = -ENOMEM;
   176					goto err_mem;
   177				}
   178				pid_stats->pid = job->client_pid;
   179				list_add(&pid_stats->list, pid_stats_list);
   180			}
   181			pid_stats->jobs_sent++;
   182			pid_stats->timeout_purge = jiffies + V3D_QUEUE_STATS_TIMEOUT;
   183		}
   184	
   185	err_mem:
   186		mutex_unlock(&queue_stats->lock);
   187		return ret;
   188	}
   189	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2] drm/v3d: New debugfs end-points to query GPU usage stats.
  2023-02-10 16:40   ` kernel test robot
@ 2023-02-28 11:44     ` Jose Maria Casanova Crespo
  -1 siblings, 0 replies; 7+ messages in thread
From: Jose Maria Casanova Crespo @ 2023-02-28 11:44 UTC (permalink / raw)
  To: lkp; +Cc: emma, dri-devel, mwen, oe-kbuild-all, jmcasanova

Two new debugfs interfaces are implemented to expose
the usage stats of the GPU scheduling queues.

- gpu_usage: exposes the total runtime since boot of each
of the 5 scheduling queues available at V3D (BIN, RENDER,
CSD, TFU, CACHE_CLEAN). So if the interface is queried at
two different points of time the usage percentage of each
of the queues can be calculated.

- gpu_pid_usage: exposes the same information but to the
level of detail of each process using the V3D driver. The
runtime for process using the driver is stored. So the
percentages of usage by PID can be calculated with
measures at different timestamps.

The storage of gpu_pid_usage stats is only done if
the debugfs interface is polled during the last 70 seconds.
If a process does not submit a GPU job during last 70
seconds its stats will also be purged.

v2: Declare function as static (kernel test robot <lkp@intel.com>)

Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
---
 drivers/gpu/drm/v3d/v3d_debugfs.c |  91 +++++++++++++++++++
 drivers/gpu/drm/v3d/v3d_drv.h     |  60 ++++++++++++
 drivers/gpu/drm/v3d/v3d_gem.c     |   1 +
 drivers/gpu/drm/v3d/v3d_irq.c     |   5 +
 drivers/gpu/drm/v3d/v3d_sched.c   | 146 +++++++++++++++++++++++++++++-
 5 files changed, 302 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
index 330669f51fa7..42d36e3f6fa8 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -5,6 +5,7 @@
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/sched/clock.h>
 #include <linux/string_helpers.h>
 
 #include <drm/drm_debugfs.h>
@@ -202,6 +203,94 @@ static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static int v3d_debugfs_gpu_usage(struct seq_file *m, void *unused)
+{
+	struct drm_debugfs_entry *entry = m->private;
+	struct drm_device *dev = entry->dev;
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_queue_stats *queue_stats;
+	enum v3d_queue queue;
+	u64 timestamp = local_clock();
+	u64 active_runtime;
+	char active;
+
+	seq_printf(m, "timestamp;%llu;\n", local_clock());
+	seq_puts(m, "\"QUEUE\";\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
+	for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+		if (!v3d->queue[queue].sched.ready)
+			continue;
+
+		queue_stats = &v3d->gpu_queue_stats[queue];
+		mutex_lock(&queue_stats->lock);
+		v3d_sched_stats_update(queue_stats);
+		if (queue_stats->last_pid) {
+			active_runtime = timestamp -
+				queue_stats->last_exec_start;
+			active = '1';
+		} else {
+			active_runtime = 0;
+			active = '0';
+		}
+
+		seq_printf(m, "%s;%d;%llu;%c;\n",
+			   v3d_queue_to_string(queue),
+			   queue_stats->jobs_sent,
+			   queue_stats->runtime + active_runtime,
+			   active);
+		mutex_unlock(&queue_stats->lock);
+	}
+
+	return 0;
+}
+
+static int v3d_debugfs_gpu_pid_usage(struct seq_file *m, void *unused)
+{
+	struct drm_debugfs_entry *entry = m->private;
+	struct drm_device *dev = entry->dev;
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_queue_stats *queue_stats;
+	struct v3d_queue_pid_stats *cur;
+	enum v3d_queue queue;
+	u64 active_runtime;
+	u64 timestamp = local_clock();
+	char active;
+
+	seq_printf(m, "timestamp;%llu;\n", timestamp);
+	seq_puts(m, "\"QUEUE\";\"PID\",\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
+	for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+
+		if (!v3d->queue[queue].sched.ready)
+			continue;
+
+		queue_stats = &v3d->gpu_queue_stats[queue];
+		mutex_lock(&queue_stats->lock);
+		v3d_sched_stats_update(queue_stats);
+		queue_stats->collect_pid_stats = 1;
+		queue_stats->pid_stats_timeout =
+			jiffies + V3D_QUEUE_STATS_TIMEOUT;
+		list_for_each_entry(cur, &queue_stats->pid_stats_list, list) {
+
+			if (cur->pid == queue_stats->last_pid) {
+				active_runtime = timestamp -
+						 queue_stats->last_exec_start;
+				active = '1';
+			} else {
+				active_runtime = 0;
+				active = '0';
+			}
+
+			seq_printf(m, "%s;%d;%d;%llu;%c;\n",
+				   v3d_queue_to_string(queue),
+				   cur->pid, cur->jobs_sent,
+				   cur->runtime + active_runtime,
+				   active);
+		}
+		mutex_unlock(&queue_stats->lock);
+	}
+
+	return 0;
+}
+
 static int v3d_measure_clock(struct seq_file *m, void *unused)
 {
 	struct drm_debugfs_entry *entry = m->private;
@@ -241,6 +330,8 @@ static const struct drm_debugfs_info v3d_debugfs_list[] = {
 	{"v3d_regs", v3d_v3d_debugfs_regs, 0},
 	{"measure_clock", v3d_measure_clock, 0},
 	{"bo_stats", v3d_debugfs_bo_stats, 0},
+	{"gpu_usage", v3d_debugfs_gpu_usage, 0},
+	{"gpu_pid_usage", v3d_debugfs_gpu_pid_usage, 0},
 };
 
 void
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index b74b1351bfc8..5c1f3177fc86 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -21,6 +21,19 @@ struct reset_control;
 
 #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
 
+static inline char *
+v3d_queue_to_string(enum v3d_queue queue)
+{
+	switch (queue) {
+	case V3D_BIN: return "v3d_bin";
+	case V3D_RENDER: return "v3d_render";
+	case V3D_TFU: return "v3d_tfu";
+	case V3D_CSD: return "v3d_csd";
+	case V3D_CACHE_CLEAN: return "v3d_cache_clean";
+	}
+	return "UNKNOWN";
+}
+
 struct v3d_queue_state {
 	struct drm_gpu_scheduler sched;
 
@@ -28,6 +41,45 @@ struct v3d_queue_state {
 	u64 emit_seqno;
 };
 
+struct v3d_queue_pid_stats {
+	struct	list_head list;
+	u64	runtime;
+	/* Time in jiffes.to purge the stats of this process. Every time a
+	 * process sends a new job to the queue, this timeout is delayed by
+	 * V3D_QUEUE_STATS_TIMEOUT while the pid_stats_timeout of the queue
+	 * is not reached.
+	 */
+	unsigned long timeout_purge;
+	u32	jobs_sent;
+	pid_t	pid;
+};
+
+struct v3d_queue_stats {
+	struct mutex	 lock;
+	u64		 last_exec_start;
+	u64		 last_exec_end;
+	u64		 runtime;
+	u32		 jobs_sent;
+	pid_t		 last_pid;
+	bool		 collect_pid_stats;
+	/* Time in jiffes to stop collecting gpu stats by process. This is
+	 * increased by every access to*the debugfs interface gpu_pid_usage.
+	 * If the debugfs is not used stats are not collected.
+	 */
+	unsigned long	 pid_stats_timeout;
+	struct list_head pid_stats_list;
+};
+
+/* pid_stats by process (v3d_queue_pid_stats) are recorded if there is an
+ * access to the gpu_pid_usageare debugfs interface for the last
+ * V3D_QUEUE_STATS_TIMEOUT (70s).
+ *
+ * The same timeout is used to purge the stats by process for those process
+ * that have not sent jobs this period.
+ */
+#define V3D_QUEUE_STATS_TIMEOUT (70 * HZ)
+
+
 /* Performance monitor object. The perform lifetime is controlled by userspace
  * using perfmon related ioctls. A perfmon can be attached to a submit_cl
  * request, and when this is the case, HW perf counters will be activated just
@@ -141,6 +193,8 @@ struct v3d_dev {
 		u32 num_allocated;
 		u32 pages_allocated;
 	} bo_stats;
+
+	struct v3d_queue_stats gpu_queue_stats[V3D_MAX_QUEUES];
 };
 
 static inline struct v3d_dev *
@@ -238,6 +292,11 @@ struct v3d_job {
 	 */
 	struct v3d_perfmon *perfmon;
 
+	/* PID of the process that submitted the job that could be used to
+	 * for collecting stats by process of gpu usage.
+	 */
+	pid_t client_pid;
+
 	/* Callback for the freeing of the job on refcount going to 0. */
 	void (*free)(struct kref *ref);
 };
@@ -402,6 +461,7 @@ void v3d_mmu_remove_ptes(struct v3d_bo *bo);
 /* v3d_sched.c */
 int v3d_sched_init(struct v3d_dev *v3d);
 void v3d_sched_fini(struct v3d_dev *v3d);
+void v3d_sched_stats_update(struct v3d_queue_stats *queue_stats);
 
 /* v3d_perfmon.c */
 void v3d_perfmon_get(struct v3d_perfmon *perfmon);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 2e94ce788c71..2a4034d6cd41 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -415,6 +415,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	job = *container;
 	job->v3d = v3d;
 	job->free = free;
+	job->client_pid = current->pid;
 
 	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
 				 v3d_priv);
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index e714d5318f30..9b8e8a3229cb 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/platform_device.h>
+#include <linux/sched/clock.h>
 
 #include "v3d_drv.h"
 #include "v3d_regs.h"
@@ -100,6 +101,7 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_FLDONE) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->bin_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_BIN].last_exec_end = local_clock();
 
 		trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -109,6 +111,7 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_FRDONE) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->render_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_RENDER].last_exec_end = local_clock();
 
 		trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -118,6 +121,7 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_CSDDONE) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->csd_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_CSD].last_exec_end = local_clock();
 
 		trace_v3d_csd_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -154,6 +158,7 @@ v3d_hub_irq(int irq, void *arg)
 	if (intsts & V3D_HUB_INT_TFUC) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->tfu_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_TFU].last_exec_end = local_clock();
 
 		trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 06238e6d7f5c..f6ad63539e69 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -18,6 +18,7 @@
  * semaphores to interlock between them.
  */
 
+#include <linux/sched/clock.h>
 #include <linux/kthread.h>
 
 #include "v3d_drv.h"
@@ -72,6 +73,120 @@ v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
 		v3d_perfmon_start(v3d, job->perfmon);
 }
 
+/*
+ * Updates the scheduling stats of the gpu queues runtime for completed jobs.
+ *
+ * It should be called before any new job submission to the queue or before
+ * accessing the stats from the debugfs interface.
+ *
+ * It is expected that calls to this function are done with queue_stats->lock
+ * locked.
+ */
+void
+v3d_sched_stats_update(struct v3d_queue_stats *queue_stats)
+{
+	struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
+	struct v3d_queue_pid_stats *cur, *tmp;
+	u64 runtime = 0;
+	bool purge_all_pid_stats = 0;
+
+	/* If debugfs stats gpu_pid_usage has not been polled for a period,
+	 * the pid stats collection is stopped and we purge any existing
+	 * pid_stats.
+	 *
+	 * pid_stats are also purged for clients that have reached the
+	 * timeout_purge because the process probably does not exist anymore.
+	 */
+	if (queue_stats->collect_pid_stats) {
+		if (time_is_before_jiffies(queue_stats->pid_stats_timeout)) {
+			purge_all_pid_stats = 1;
+			queue_stats->collect_pid_stats = 0;
+		}
+		list_for_each_entry_safe_reverse(cur, tmp, pid_stats_list, list) {
+			if (time_is_before_jiffies(cur->timeout_purge) ||
+			    purge_all_pid_stats) {
+				list_del(&cur->list);
+				kfree(cur);
+			} else {
+				break;
+			}
+		}
+	}
+	/* If a job has finished its stats are updated. */
+	if (queue_stats->last_pid && queue_stats->last_exec_end) {
+		runtime = queue_stats->last_exec_end -
+			  queue_stats->last_exec_start;
+		queue_stats->runtime += runtime;
+
+		if (queue_stats->collect_pid_stats) {
+			struct v3d_queue_pid_stats *pid_stats;
+			/* Last job info is always at the head of the list */
+			pid_stats = list_first_entry_or_null(pid_stats_list,
+				struct v3d_queue_pid_stats, list);
+			if (pid_stats &&
+			    pid_stats->pid == queue_stats->last_pid) {
+				pid_stats->runtime += runtime;
+			}
+		}
+		queue_stats->last_pid = 0;
+	}
+}
+
+/*
+ * Updates the queue usage adding the information of a new job that is
+ * about to be sent to the GPU to be executed.
+ */
+static int
+v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
+			struct drm_sched_job *sched_job)
+{
+
+	struct v3d_queue_pid_stats *pid_stats = NULL;
+	struct v3d_job *job = sched_job?to_v3d_job(sched_job):NULL;
+	struct v3d_queue_pid_stats *cur;
+	struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
+	int ret = 0;
+
+	mutex_lock(&queue_stats->lock);
+
+	/* Completion of previous job requires an update of its runtime */
+	v3d_sched_stats_update(queue_stats);
+
+	queue_stats->last_exec_start = local_clock();
+	queue_stats->last_exec_end = 0;
+	queue_stats->jobs_sent++;
+	queue_stats->last_pid = job->client_pid;
+
+	/* gpu usage stats by process are being collected */
+	if (queue_stats->collect_pid_stats) {
+		list_for_each_entry(cur, pid_stats_list, list) {
+			if (cur->pid == job->client_pid) {
+				pid_stats = cur;
+				break;
+			}
+		}
+		/* client pid_stats is moved to the head of the list. */
+		if (pid_stats) {
+			list_move(&pid_stats->list, pid_stats_list);
+		} else {
+			pid_stats = kzalloc(sizeof(struct v3d_queue_pid_stats),
+					    GFP_KERNEL);
+			if (!pid_stats) {
+				ret = -ENOMEM;
+				goto err_mem;
+			}
+			pid_stats->pid = job->client_pid;
+			list_add(&pid_stats->list, pid_stats_list);
+		}
+		pid_stats->jobs_sent++;
+		pid_stats->timeout_purge = jiffies + V3D_QUEUE_STATS_TIMEOUT;
+	}
+
+err_mem:
+	mutex_unlock(&queue_stats->lock);
+	return ret;
+}
+
 static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 {
 	struct v3d_bin_job *job = to_bin_job(sched_job);
@@ -107,6 +222,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 	trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
 			    job->start, job->end);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_BIN], sched_job);
 	v3d_switch_perfmon(v3d, &job->base);
 
 	/* Set the current and end address of the control list.
@@ -158,6 +274,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
 	trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
 			    job->start, job->end);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_RENDER], sched_job);
 	v3d_switch_perfmon(v3d, &job->base);
 
 	/* XXX: Set the QCFG */
@@ -190,6 +307,8 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 
 	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_TFU], sched_job);
+
 	V3D_WRITE(V3D_TFU_IIA, job->args.iia);
 	V3D_WRITE(V3D_TFU_IIS, job->args.iis);
 	V3D_WRITE(V3D_TFU_ICA, job->args.ica);
@@ -231,6 +350,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
 
 	trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CSD], sched_job);
 	v3d_switch_perfmon(v3d, &job->base);
 
 	for (i = 1; i <= 6; i++)
@@ -247,7 +367,10 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
 	struct v3d_job *job = to_v3d_job(sched_job);
 	struct v3d_dev *v3d = job->v3d;
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CACHE_CLEAN],
+				sched_job);
 	v3d_clean_caches(v3d);
+	v3d->gpu_queue_stats[V3D_CACHE_CLEAN].last_exec_end = local_clock();
 
 	return NULL;
 }
@@ -385,8 +508,18 @@ v3d_sched_init(struct v3d_dev *v3d)
 	int hw_jobs_limit = 1;
 	int job_hang_limit = 0;
 	int hang_limit_ms = 500;
+	enum v3d_queue q;
 	int ret;
 
+	for (q = 0; q < V3D_MAX_QUEUES; q++) {
+		INIT_LIST_HEAD(&v3d->gpu_queue_stats[q].pid_stats_list);
+		/* Setting timeout before current jiffies disables collecting
+		 * pid_stats on scheduling init.
+		 */
+		v3d->gpu_queue_stats[q].pid_stats_timeout = jiffies - 1;
+		mutex_init(&v3d->gpu_queue_stats[q].lock);
+	}
+
 	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
 			     &v3d_bin_sched_ops,
 			     hw_jobs_limit, job_hang_limit,
@@ -440,9 +573,20 @@ void
 v3d_sched_fini(struct v3d_dev *v3d)
 {
 	enum v3d_queue q;
+	struct v3d_queue_stats *queue_stats;
 
 	for (q = 0; q < V3D_MAX_QUEUES; q++) {
-		if (v3d->queue[q].sched.ready)
+		if (v3d->queue[q].sched.ready) {
+			queue_stats = &v3d->gpu_queue_stats[q];
+			mutex_lock(&queue_stats->lock);
+			/* Setting pid_stats_timeout to jiffies-1 will make
+			 * v3d_sched_stats_update to purge all allocated
+			 * pid_stats.
+			 */
+			queue_stats->pid_stats_timeout = jiffies - 1;
+			v3d_sched_stats_update(queue_stats);
+			mutex_unlock(&queue_stats->lock);
 			drm_sched_fini(&v3d->queue[q].sched);
+		}
 	}
 }
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2] drm/v3d: New debugfs end-points to query GPU usage stats.
@ 2023-02-28 11:44     ` Jose Maria Casanova Crespo
  0 siblings, 0 replies; 7+ messages in thread
From: Jose Maria Casanova Crespo @ 2023-02-28 11:44 UTC (permalink / raw)
  To: lkp; +Cc: airlied, daniel, dri-devel, emma, jmcasanova, mwen, oe-kbuild-all

Two new debugfs interfaces are implemented to expose
the usage stats of the GPU scheduling queues.

- gpu_usage: exposes the total runtime since boot of each
of the 5 scheduling queues available at V3D (BIN, RENDER,
CSD, TFU, CACHE_CLEAN). So if the interface is queried at
two different points of time the usage percentage of each
of the queues can be calculated.

- gpu_pid_usage: exposes the same information but to the
level of detail of each process using the V3D driver. The
runtime for process using the driver is stored. So the
percentages of usage by PID can be calculated with
measures at different timestamps.

The storage of gpu_pid_usage stats is only done if
the debugfs interface is polled during the last 70 seconds.
If a process does not submit a GPU job during last 70
seconds its stats will also be purged.

v2: Declare function as static (kernel test robot <lkp@intel.com>)

Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
---
 drivers/gpu/drm/v3d/v3d_debugfs.c |  91 +++++++++++++++++++
 drivers/gpu/drm/v3d/v3d_drv.h     |  60 ++++++++++++
 drivers/gpu/drm/v3d/v3d_gem.c     |   1 +
 drivers/gpu/drm/v3d/v3d_irq.c     |   5 +
 drivers/gpu/drm/v3d/v3d_sched.c   | 146 +++++++++++++++++++++++++++++-
 5 files changed, 302 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
index 330669f51fa7..42d36e3f6fa8 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -5,6 +5,7 @@
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/sched/clock.h>
 #include <linux/string_helpers.h>
 
 #include <drm/drm_debugfs.h>
@@ -202,6 +203,94 @@ static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static int v3d_debugfs_gpu_usage(struct seq_file *m, void *unused)
+{
+	struct drm_debugfs_entry *entry = m->private;
+	struct drm_device *dev = entry->dev;
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_queue_stats *queue_stats;
+	enum v3d_queue queue;
+	u64 timestamp = local_clock();
+	u64 active_runtime;
+	char active;
+
+	seq_printf(m, "timestamp;%llu;\n", local_clock());
+	seq_puts(m, "\"QUEUE\";\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
+	for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+		if (!v3d->queue[queue].sched.ready)
+			continue;
+
+		queue_stats = &v3d->gpu_queue_stats[queue];
+		mutex_lock(&queue_stats->lock);
+		v3d_sched_stats_update(queue_stats);
+		if (queue_stats->last_pid) {
+			active_runtime = timestamp -
+				queue_stats->last_exec_start;
+			active = '1';
+		} else {
+			active_runtime = 0;
+			active = '0';
+		}
+
+		seq_printf(m, "%s;%d;%llu;%c;\n",
+			   v3d_queue_to_string(queue),
+			   queue_stats->jobs_sent,
+			   queue_stats->runtime + active_runtime,
+			   active);
+		mutex_unlock(&queue_stats->lock);
+	}
+
+	return 0;
+}
+
+static int v3d_debugfs_gpu_pid_usage(struct seq_file *m, void *unused)
+{
+	struct drm_debugfs_entry *entry = m->private;
+	struct drm_device *dev = entry->dev;
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_queue_stats *queue_stats;
+	struct v3d_queue_pid_stats *cur;
+	enum v3d_queue queue;
+	u64 active_runtime;
+	u64 timestamp = local_clock();
+	char active;
+
+	seq_printf(m, "timestamp;%llu;\n", timestamp);
+	seq_puts(m, "\"QUEUE\";\"PID\",\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
+	for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+
+		if (!v3d->queue[queue].sched.ready)
+			continue;
+
+		queue_stats = &v3d->gpu_queue_stats[queue];
+		mutex_lock(&queue_stats->lock);
+		v3d_sched_stats_update(queue_stats);
+		queue_stats->collect_pid_stats = 1;
+		queue_stats->pid_stats_timeout =
+			jiffies + V3D_QUEUE_STATS_TIMEOUT;
+		list_for_each_entry(cur, &queue_stats->pid_stats_list, list) {
+
+			if (cur->pid == queue_stats->last_pid) {
+				active_runtime = timestamp -
+						 queue_stats->last_exec_start;
+				active = '1';
+			} else {
+				active_runtime = 0;
+				active = '0';
+			}
+
+			seq_printf(m, "%s;%d;%d;%llu;%c;\n",
+				   v3d_queue_to_string(queue),
+				   cur->pid, cur->jobs_sent,
+				   cur->runtime + active_runtime,
+				   active);
+		}
+		mutex_unlock(&queue_stats->lock);
+	}
+
+	return 0;
+}
+
 static int v3d_measure_clock(struct seq_file *m, void *unused)
 {
 	struct drm_debugfs_entry *entry = m->private;
@@ -241,6 +330,8 @@ static const struct drm_debugfs_info v3d_debugfs_list[] = {
 	{"v3d_regs", v3d_v3d_debugfs_regs, 0},
 	{"measure_clock", v3d_measure_clock, 0},
 	{"bo_stats", v3d_debugfs_bo_stats, 0},
+	{"gpu_usage", v3d_debugfs_gpu_usage, 0},
+	{"gpu_pid_usage", v3d_debugfs_gpu_pid_usage, 0},
 };
 
 void
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index b74b1351bfc8..5c1f3177fc86 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -21,6 +21,19 @@ struct reset_control;
 
 #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
 
+static inline char *
+v3d_queue_to_string(enum v3d_queue queue)
+{
+	switch (queue) {
+	case V3D_BIN: return "v3d_bin";
+	case V3D_RENDER: return "v3d_render";
+	case V3D_TFU: return "v3d_tfu";
+	case V3D_CSD: return "v3d_csd";
+	case V3D_CACHE_CLEAN: return "v3d_cache_clean";
+	}
+	return "UNKNOWN";
+}
+
 struct v3d_queue_state {
 	struct drm_gpu_scheduler sched;
 
@@ -28,6 +41,45 @@ struct v3d_queue_state {
 	u64 emit_seqno;
 };
 
+struct v3d_queue_pid_stats {
+	struct	list_head list;
+	u64	runtime;
+	/* Time in jiffes.to purge the stats of this process. Every time a
+	 * process sends a new job to the queue, this timeout is delayed by
+	 * V3D_QUEUE_STATS_TIMEOUT while the pid_stats_timeout of the queue
+	 * is not reached.
+	 */
+	unsigned long timeout_purge;
+	u32	jobs_sent;
+	pid_t	pid;
+};
+
+struct v3d_queue_stats {
+	struct mutex	 lock;
+	u64		 last_exec_start;
+	u64		 last_exec_end;
+	u64		 runtime;
+	u32		 jobs_sent;
+	pid_t		 last_pid;
+	bool		 collect_pid_stats;
+	/* Time in jiffes to stop collecting gpu stats by process. This is
+	 * increased by every access to*the debugfs interface gpu_pid_usage.
+	 * If the debugfs is not used stats are not collected.
+	 */
+	unsigned long	 pid_stats_timeout;
+	struct list_head pid_stats_list;
+};
+
+/* pid_stats by process (v3d_queue_pid_stats) are recorded if there is an
+ * access to the gpu_pid_usageare debugfs interface for the last
+ * V3D_QUEUE_STATS_TIMEOUT (70s).
+ *
+ * The same timeout is used to purge the stats by process for those process
+ * that have not sent jobs this period.
+ */
+#define V3D_QUEUE_STATS_TIMEOUT (70 * HZ)
+
+
 /* Performance monitor object. The perform lifetime is controlled by userspace
  * using perfmon related ioctls. A perfmon can be attached to a submit_cl
  * request, and when this is the case, HW perf counters will be activated just
@@ -141,6 +193,8 @@ struct v3d_dev {
 		u32 num_allocated;
 		u32 pages_allocated;
 	} bo_stats;
+
+	struct v3d_queue_stats gpu_queue_stats[V3D_MAX_QUEUES];
 };
 
 static inline struct v3d_dev *
@@ -238,6 +292,11 @@ struct v3d_job {
 	 */
 	struct v3d_perfmon *perfmon;
 
+	/* PID of the process that submitted the job that could be used to
+	 * for collecting stats by process of gpu usage.
+	 */
+	pid_t client_pid;
+
 	/* Callback for the freeing of the job on refcount going to 0. */
 	void (*free)(struct kref *ref);
 };
@@ -402,6 +461,7 @@ void v3d_mmu_remove_ptes(struct v3d_bo *bo);
 /* v3d_sched.c */
 int v3d_sched_init(struct v3d_dev *v3d);
 void v3d_sched_fini(struct v3d_dev *v3d);
+void v3d_sched_stats_update(struct v3d_queue_stats *queue_stats);
 
 /* v3d_perfmon.c */
 void v3d_perfmon_get(struct v3d_perfmon *perfmon);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 2e94ce788c71..2a4034d6cd41 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -415,6 +415,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
 	job = *container;
 	job->v3d = v3d;
 	job->free = free;
+	job->client_pid = current->pid;
 
 	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
 				 v3d_priv);
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index e714d5318f30..9b8e8a3229cb 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/platform_device.h>
+#include <linux/sched/clock.h>
 
 #include "v3d_drv.h"
 #include "v3d_regs.h"
@@ -100,6 +101,7 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_FLDONE) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->bin_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_BIN].last_exec_end = local_clock();
 
 		trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -109,6 +111,7 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_FRDONE) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->render_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_RENDER].last_exec_end = local_clock();
 
 		trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -118,6 +121,7 @@ v3d_irq(int irq, void *arg)
 	if (intsts & V3D_INT_CSDDONE) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->csd_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_CSD].last_exec_end = local_clock();
 
 		trace_v3d_csd_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -154,6 +158,7 @@ v3d_hub_irq(int irq, void *arg)
 	if (intsts & V3D_HUB_INT_TFUC) {
 		struct v3d_fence *fence =
 			to_v3d_fence(v3d->tfu_job->base.irq_fence);
+		v3d->gpu_queue_stats[V3D_TFU].last_exec_end = local_clock();
 
 		trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 06238e6d7f5c..f6ad63539e69 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -18,6 +18,7 @@
  * semaphores to interlock between them.
  */
 
+#include <linux/sched/clock.h>
 #include <linux/kthread.h>
 
 #include "v3d_drv.h"
@@ -72,6 +73,120 @@ v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
 		v3d_perfmon_start(v3d, job->perfmon);
 }
 
+/*
+ * Updates the scheduling stats of the gpu queues runtime for completed jobs.
+ *
+ * It should be called before any new job submission to the queue or before
+ * accessing the stats from the debugfs interface.
+ *
+ * It is expected that calls to this function are done with queue_stats->lock
+ * locked.
+ */
+void
+v3d_sched_stats_update(struct v3d_queue_stats *queue_stats)
+{
+	struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
+	struct v3d_queue_pid_stats *cur, *tmp;
+	u64 runtime = 0;
+	bool purge_all_pid_stats = 0;
+
+	/* If debugfs stats gpu_pid_usage has not been polled for a period,
+	 * the pid stats collection is stopped and we purge any existing
+	 * pid_stats.
+	 *
+	 * pid_stats are also purged for clients that have reached the
+	 * timeout_purge because the process probably does not exist anymore.
+	 */
+	if (queue_stats->collect_pid_stats) {
+		if (time_is_before_jiffies(queue_stats->pid_stats_timeout)) {
+			purge_all_pid_stats = 1;
+			queue_stats->collect_pid_stats = 0;
+		}
+		list_for_each_entry_safe_reverse(cur, tmp, pid_stats_list, list) {
+			if (time_is_before_jiffies(cur->timeout_purge) ||
+			    purge_all_pid_stats) {
+				list_del(&cur->list);
+				kfree(cur);
+			} else {
+				break;
+			}
+		}
+	}
+	/* If a job has finished its stats are updated. */
+	if (queue_stats->last_pid && queue_stats->last_exec_end) {
+		runtime = queue_stats->last_exec_end -
+			  queue_stats->last_exec_start;
+		queue_stats->runtime += runtime;
+
+		if (queue_stats->collect_pid_stats) {
+			struct v3d_queue_pid_stats *pid_stats;
+			/* Last job info is always at the head of the list */
+			pid_stats = list_first_entry_or_null(pid_stats_list,
+				struct v3d_queue_pid_stats, list);
+			if (pid_stats &&
+			    pid_stats->pid == queue_stats->last_pid) {
+				pid_stats->runtime += runtime;
+			}
+		}
+		queue_stats->last_pid = 0;
+	}
+}
+
+/*
+ * Updates the queue usage adding the information of a new job that is
+ * about to be sent to the GPU to be executed.
+ */
+static int
+v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
+			struct drm_sched_job *sched_job)
+{
+
+	struct v3d_queue_pid_stats *pid_stats = NULL;
+	struct v3d_job *job = sched_job?to_v3d_job(sched_job):NULL;
+	struct v3d_queue_pid_stats *cur;
+	struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
+	int ret = 0;
+
+	mutex_lock(&queue_stats->lock);
+
+	/* Completion of previous job requires an update of its runtime */
+	v3d_sched_stats_update(queue_stats);
+
+	queue_stats->last_exec_start = local_clock();
+	queue_stats->last_exec_end = 0;
+	queue_stats->jobs_sent++;
+	queue_stats->last_pid = job->client_pid;
+
+	/* gpu usage stats by process are being collected */
+	if (queue_stats->collect_pid_stats) {
+		list_for_each_entry(cur, pid_stats_list, list) {
+			if (cur->pid == job->client_pid) {
+				pid_stats = cur;
+				break;
+			}
+		}
+		/* client pid_stats is moved to the head of the list. */
+		if (pid_stats) {
+			list_move(&pid_stats->list, pid_stats_list);
+		} else {
+			pid_stats = kzalloc(sizeof(struct v3d_queue_pid_stats),
+					    GFP_KERNEL);
+			if (!pid_stats) {
+				ret = -ENOMEM;
+				goto err_mem;
+			}
+			pid_stats->pid = job->client_pid;
+			list_add(&pid_stats->list, pid_stats_list);
+		}
+		pid_stats->jobs_sent++;
+		pid_stats->timeout_purge = jiffies + V3D_QUEUE_STATS_TIMEOUT;
+	}
+
+err_mem:
+	mutex_unlock(&queue_stats->lock);
+	return ret;
+}
+
 static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 {
 	struct v3d_bin_job *job = to_bin_job(sched_job);
@@ -107,6 +222,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 	trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
 			    job->start, job->end);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_BIN], sched_job);
 	v3d_switch_perfmon(v3d, &job->base);
 
 	/* Set the current and end address of the control list.
@@ -158,6 +274,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
 	trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
 			    job->start, job->end);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_RENDER], sched_job);
 	v3d_switch_perfmon(v3d, &job->base);
 
 	/* XXX: Set the QCFG */
@@ -190,6 +307,8 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 
 	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_TFU], sched_job);
+
 	V3D_WRITE(V3D_TFU_IIA, job->args.iia);
 	V3D_WRITE(V3D_TFU_IIS, job->args.iis);
 	V3D_WRITE(V3D_TFU_ICA, job->args.ica);
@@ -231,6 +350,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
 
 	trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CSD], sched_job);
 	v3d_switch_perfmon(v3d, &job->base);
 
 	for (i = 1; i <= 6; i++)
@@ -247,7 +367,10 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
 	struct v3d_job *job = to_v3d_job(sched_job);
 	struct v3d_dev *v3d = job->v3d;
 
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CACHE_CLEAN],
+				sched_job);
 	v3d_clean_caches(v3d);
+	v3d->gpu_queue_stats[V3D_CACHE_CLEAN].last_exec_end = local_clock();
 
 	return NULL;
 }
@@ -385,8 +508,18 @@ v3d_sched_init(struct v3d_dev *v3d)
 	int hw_jobs_limit = 1;
 	int job_hang_limit = 0;
 	int hang_limit_ms = 500;
+	enum v3d_queue q;
 	int ret;
 
+	for (q = 0; q < V3D_MAX_QUEUES; q++) {
+		INIT_LIST_HEAD(&v3d->gpu_queue_stats[q].pid_stats_list);
+		/* Setting timeout before current jiffies disables collecting
+		 * pid_stats on scheduling init.
+		 */
+		v3d->gpu_queue_stats[q].pid_stats_timeout = jiffies - 1;
+		mutex_init(&v3d->gpu_queue_stats[q].lock);
+	}
+
 	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
 			     &v3d_bin_sched_ops,
 			     hw_jobs_limit, job_hang_limit,
@@ -440,9 +573,20 @@ void
 v3d_sched_fini(struct v3d_dev *v3d)
 {
 	enum v3d_queue q;
+	struct v3d_queue_stats *queue_stats;
 
 	for (q = 0; q < V3D_MAX_QUEUES; q++) {
-		if (v3d->queue[q].sched.ready)
+		if (v3d->queue[q].sched.ready) {
+			queue_stats = &v3d->gpu_queue_stats[q];
+			mutex_lock(&queue_stats->lock);
+			/* Setting pid_stats_timeout to jiffies-1 will make
+			 * v3d_sched_stats_update to purge all allocated
+			 * pid_stats.
+			 */
+			queue_stats->pid_stats_timeout = jiffies - 1;
+			v3d_sched_stats_update(queue_stats);
+			mutex_unlock(&queue_stats->lock);
 			drm_sched_fini(&v3d->queue[q].sched);
+		}
 	}
 }
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-02-28 12:00 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-10 14:34 [PATCH] drm/v3d: New debugfs end-points to query GPU usage stats Jose Maria Casanova Crespo
2023-02-10 16:40 ` kernel test robot
2023-02-10 16:40   ` kernel test robot
2023-02-28 11:44   ` [PATCH v2] " Jose Maria Casanova Crespo
2023-02-28 11:44     ` Jose Maria Casanova Crespo
2023-02-11  7:47 ` [PATCH] " kernel test robot
2023-02-11  7:47   ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.