All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] Support fdinfo runtime and memory stats on Panthor
@ 2024-03-05 21:05 Adrián Larumbe
  2024-03-05 21:05 ` [PATCH 1/2] drm/panthor: Enable fdinfo for cycle and time measurements Adrián Larumbe
  2024-03-05 21:05 ` [PATCH 2/2] drm/panthor: Enable fdinfo for memory stats Adrián Larumbe
  0 siblings, 2 replies; 6+ messages in thread
From: Adrián Larumbe @ 2024-03-05 21:05 UTC (permalink / raw)
  To: boris.brezillon, steven.price, liviu.dudau, maarten.lankhorst,
	mripard, tzimmermann, airlied, daniel
  Cc: adrian.larumbe, dri-devel, linux-kernel, kernel

This patch series enables userspace utilities like gputop and nvtop to
query a render context's fdinfo file and figure out rates of engine
and memory utilisation.

Adrián Larumbe (2):
  drm/panthor: Enable fdinfo for cycle and time measurements
  drm/panthor: Enable fdinfo for memory stats

 drivers/gpu/drm/panthor/panthor_devfreq.c |  10 +
 drivers/gpu/drm/panthor/panthor_device.h  |  11 ++
 drivers/gpu/drm/panthor/panthor_drv.c     |  32 ++++
 drivers/gpu/drm/panthor/panthor_gem.c     |  12 ++
 drivers/gpu/drm/panthor/panthor_sched.c   | 217 +++++++++++++++++++---
 5 files changed, 254 insertions(+), 28 deletions(-)


base-commit: e635b7eb7062b464bbd9795308b1a80eac0b01f5
-- 
2.43.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] drm/panthor: Enable fdinfo for cycle and time measurements
  2024-03-05 21:05 [PATCH 0/2] Support fdinfo runtime and memory stats on Panthor Adrián Larumbe
@ 2024-03-05 21:05 ` Adrián Larumbe
  2024-03-08  8:47   ` kernel test robot
  2024-03-28 15:49   ` Liviu Dudau
  2024-03-05 21:05 ` [PATCH 2/2] drm/panthor: Enable fdinfo for memory stats Adrián Larumbe
  1 sibling, 2 replies; 6+ messages in thread
From: Adrián Larumbe @ 2024-03-05 21:05 UTC (permalink / raw)
  To: boris.brezillon, steven.price, liviu.dudau, maarten.lankhorst,
	mripard, tzimmermann, airlied, daniel
  Cc: adrian.larumbe, dri-devel, linux-kernel, kernel

These values are sampled by the firmware right before jumping into the UM
command stream and immediately after returning from it, and then kept inside a
per-job accounting structure. That structure is held inside the group's syncobjs
buffer object, at an offset that depends on the job's queue slot number and the
queue's index within the group.

Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
---
 drivers/gpu/drm/panthor/panthor_devfreq.c |  10 +
 drivers/gpu/drm/panthor/panthor_device.h  |  11 ++
 drivers/gpu/drm/panthor/panthor_drv.c     |  31 ++++
 drivers/gpu/drm/panthor/panthor_sched.c   | 217 +++++++++++++++++++---
 4 files changed, 241 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c
index 7ac4fa290f27..51a7b734edcd 100644
--- a/drivers/gpu/drm/panthor/panthor_devfreq.c
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
@@ -91,6 +91,7 @@ static int panthor_devfreq_get_dev_status(struct device *dev,
 	spin_lock_irqsave(&pdevfreq->lock, irqflags);
 
 	panthor_devfreq_update_utilization(pdevfreq);
+	ptdev->current_frequency = status->current_frequency;
 
 	status->total_time = ktime_to_ns(ktime_add(pdevfreq->busy_time,
 						   pdevfreq->idle_time));
@@ -130,6 +131,7 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
 	struct panthor_devfreq *pdevfreq;
 	struct dev_pm_opp *opp;
 	unsigned long cur_freq;
+	unsigned long freq = ULONG_MAX;
 	int ret;
 
 	pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL);
@@ -204,6 +206,14 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
 
 	dev_pm_opp_put(opp);
 
+	/* Find the fastest defined rate  */
+	opp = dev_pm_opp_find_freq_floor(dev, &freq);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+	ptdev->fast_rate = freq;
+
+	dev_pm_opp_put(opp);
+
 	/*
 	 * Setup default thresholds for the simple_ondemand governor.
 	 * The values are chosen based on experiments.
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index 51c9d61b6796..10e970921ca3 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -162,6 +162,14 @@ struct panthor_device {
 		 */
 		u32 *dummy_latest_flush;
 	} pm;
+
+	unsigned long current_frequency;
+	unsigned long fast_rate;
+};
+
+struct panthor_gpu_usage {
+	u64 time;
+	u64 cycles;
 };
 
 /**
@@ -176,6 +184,9 @@ struct panthor_file {
 
 	/** @groups: Scheduling group pool attached to this file. */
 	struct panthor_group_pool *groups;
+
+	/** @stats: cycle and timestamp measures for job execution. */
+	struct panthor_gpu_usage stats;
 };
 
 int panthor_device_init(struct panthor_device *ptdev);
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index ff484506229f..fa06b9e2c6cd 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -3,6 +3,10 @@
 /* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
 /* Copyright 2019 Collabora ltd. */
 
+#ifdef CONFIG_HAVE_ARM_ARCH_TIMER
+#include <asm/arch_timer.h>
+#endif
+
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
@@ -28,6 +32,8 @@
 #include "panthor_regs.h"
 #include "panthor_sched.h"
 
+#define NS_PER_SEC      1000000000ULL
+
 /**
  * DOC: user <-> kernel object copy helpers.
  */
@@ -1336,6 +1342,29 @@ static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
 	return ret;
 }
 
+static void panthor_gpu_show_fdinfo(struct panthor_device *ptdev,
+				    struct panthor_file *pfile,
+				    struct drm_printer *p)
+{
+#ifdef CONFIG_HAVE_ARM_ARCH_TIMER
+	drm_printf(p, "drm-engine-panthor:\t%llu ns\n",
+		   DIV_ROUND_UP_ULL((pfile->stats.time * NS_PER_SEC),
+				    arch_timer_get_cntfrq()));
+#endif
+	drm_printf(p, "drm-cycles-panthor:\t%llu\n", pfile->stats.cycles);
+	drm_printf(p, "drm-maxfreq-panthor:\t%lu Hz\n", ptdev->fast_rate);
+	drm_printf(p, "drm-curfreq-panthor:\t%lu Hz\n", ptdev->current_frequency);
+}
+
+static void panthor_show_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+	struct drm_device *dev = file->minor->dev;
+	struct panthor_device *ptdev = container_of(dev, struct panthor_device, base);
+
+	panthor_gpu_show_fdinfo(ptdev, file->driver_priv, p);
+
+}
+
 static const struct file_operations panthor_drm_driver_fops = {
 	.open = drm_open,
 	.release = drm_release,
@@ -1345,6 +1374,7 @@ static const struct file_operations panthor_drm_driver_fops = {
 	.read = drm_read,
 	.llseek = noop_llseek,
 	.mmap = panthor_mmap,
+	.show_fdinfo = drm_show_fdinfo,
 };
 
 #ifdef CONFIG_DEBUG_FS
@@ -1363,6 +1393,7 @@ static const struct drm_driver panthor_drm_driver = {
 			   DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
 	.open = panthor_open,
 	.postclose = panthor_postclose,
+	.show_fdinfo = panthor_show_fdinfo,
 	.ioctls = panthor_drm_driver_ioctls,
 	.num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls),
 	.fops = &panthor_drm_driver_fops,
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
index 5f7803b6fc48..751d1453e7a1 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -93,6 +93,8 @@
 #define MIN_CSGS				3
 #define MAX_CSG_PRIO				0xf
 
+#define SLOTSIZE				(NUM_INSTRS_PER_SLOT * sizeof(u64))
+
 struct panthor_group;
 
 /**
@@ -393,7 +395,13 @@ struct panthor_queue {
 #define CSF_MAX_QUEUE_PRIO	GENMASK(3, 0)
 
 	/** @ringbuf: Command stream ring-buffer. */
-	struct panthor_kernel_bo *ringbuf;
+	struct {
+		/** @ringbuf: Kernel BO that holds ring buffer. */
+		struct panthor_kernel_bo *bo;
+
+		/** @nelem: Number of slots in the ring buffer. */
+		unsigned int nelem;
+	} ringbuf;
 
 	/** @iface: Firmware interface. */
 	struct {
@@ -466,6 +474,9 @@ struct panthor_queue {
 		 */
 		struct list_head in_flight_jobs;
 	} fence_ctx;
+
+	/** @time_offset: Offset of fdinfo stats structs in queue's syncobj. */
+	unsigned long time_offset;
 };
 
 /**
@@ -580,7 +591,26 @@ struct panthor_group {
 	 * One sync object per queue. The position of the sync object is
 	 * determined by the queue index.
 	 */
-	struct panthor_kernel_bo *syncobjs;
+
+	struct {
+		/** @bo: Kernel BO holding the sync objects. */
+		struct panthor_kernel_bo *bo;
+
+		/** @times_offset: Beginning of time stats after objects of sync pool. */
+		size_t times_offset;
+	} syncobjs;
+
+	/** @fdinfo: Per-file total cycle and timestamp values reference. */
+	struct {
+		/** @data: Pointer to actual per-file sample data. */
+		struct panthor_gpu_usage *data;
+
+		/**
+		 * @lock: Mutex to govern concurrent access from drm file's fdinfo callback
+		 * and job post-completion processing function
+		 */
+		struct mutex lock;
+	} fdinfo;
 
 	/** @state: Group state. */
 	enum panthor_group_state state;
@@ -639,6 +669,18 @@ struct panthor_group {
 	struct list_head wait_node;
 };
 
+struct panthor_job_times {
+	struct {
+		u64 before;
+		u64 after;
+	} cycles;
+
+	struct {
+		u64 before;
+		u64 after;
+	} time;
+};
+
 /**
  * group_queue_work() - Queue a group work
  * @group: Group to queue the work for.
@@ -718,6 +760,9 @@ struct panthor_job {
 	/** @queue_idx: Index of the queue inside @group. */
 	u32 queue_idx;
 
+	/** @ringbuf_idx: Index of the queue inside @queue. */
+	u32 ringbuf_idx;
+
 	/** @call_info: Information about the userspace command stream call. */
 	struct {
 		/** @start: GPU address of the userspace command stream. */
@@ -814,7 +859,7 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
 
 	panthor_queue_put_syncwait_obj(queue);
 
-	panthor_kernel_bo_destroy(group->vm, queue->ringbuf);
+	panthor_kernel_bo_destroy(group->vm, queue->ringbuf.bo);
 	panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem);
 
 	kfree(queue);
@@ -828,12 +873,14 @@ static void group_release_work(struct work_struct *work)
 	struct panthor_device *ptdev = group->ptdev;
 	u32 i;
 
+	mutex_destroy(&group->fdinfo.lock);
+
 	for (i = 0; i < group->queue_count; i++)
 		group_free_queue(group, group->queues[i]);
 
 	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf);
 	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf);
-	panthor_kernel_bo_destroy(group->vm, group->syncobjs);
+	panthor_kernel_bo_destroy(group->vm, group->syncobjs.bo);
 
 	panthor_vm_put(group->vm);
 	kfree(group);
@@ -970,8 +1017,8 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
 	queue->iface.input->extract = queue->iface.output->extract;
 	drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
 
-	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf);
-	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
+	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf.bo);
+	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf.bo);
 	cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
 	cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
 	cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) |
@@ -1926,7 +1973,7 @@ tick_ctx_init(struct panthor_scheduler *sched,
 	}
 }
 
-#define NUM_INSTRS_PER_SLOT		16
+#define NUM_INSTRS_PER_SLOT		32
 
 static void
 group_term_post_processing(struct panthor_group *group)
@@ -1964,7 +2011,7 @@ group_term_post_processing(struct panthor_group *group)
 		spin_unlock(&queue->fence_ctx.lock);
 
 		/* Manually update the syncobj seqno to unblock waiters. */
-		syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
+		syncobj = group->syncobjs.bo->kmap + (i * sizeof(*syncobj));
 		syncobj->status = ~0;
 		syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
 		sched_queue_work(group->ptdev->scheduler, sync_upd);
@@ -2715,6 +2762,30 @@ void panthor_sched_post_reset(struct panthor_device *ptdev)
 	sched_queue_work(sched, sync_upd);
 }
 
+static void update_fdinfo_stats(struct panthor_job *job)
+{
+	struct panthor_group *group = job->group;
+	struct panthor_queue *queue = group->queues[job->queue_idx];
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_gpu_usage *fdinfo;
+	struct panthor_job_times *times;
+
+	if (drm_WARN_ON(&ptdev->base, job->ringbuf_idx >= queue->ringbuf.nelem))
+		return;
+
+	times = (struct panthor_job_times *)
+		((unsigned long)group->syncobjs.bo->kmap + queue->time_offset +
+		 (job->ringbuf_idx * sizeof(struct panthor_job_times)));
+
+	mutex_lock(&group->fdinfo.lock);
+	if ((group->fdinfo.data)) {
+		fdinfo = group->fdinfo.data;
+		fdinfo->cycles += times->cycles.after - times->cycles.before;
+		fdinfo->time += times->time.after - times->time.before;
+	}
+	mutex_unlock(&group->fdinfo.lock);
+}
+
 static void group_sync_upd_work(struct work_struct *work)
 {
 	struct panthor_group *group =
@@ -2732,7 +2803,7 @@ static void group_sync_upd_work(struct work_struct *work)
 		if (!queue)
 			continue;
 
-		syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
+		syncobj = group->syncobjs.bo->kmap + (queue_idx * sizeof(*syncobj));
 
 		spin_lock(&queue->fence_ctx.lock);
 		list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
@@ -2750,6 +2821,7 @@ static void group_sync_upd_work(struct work_struct *work)
 	dma_fence_end_signalling(cookie);
 
 	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
+		update_fdinfo_stats(job);
 		list_del_init(&job->node);
 		panthor_job_put(&job->base);
 	}
@@ -2765,13 +2837,19 @@ queue_run_job(struct drm_sched_job *sched_job)
 	struct panthor_queue *queue = group->queues[job->queue_idx];
 	struct panthor_device *ptdev = group->ptdev;
 	struct panthor_scheduler *sched = ptdev->scheduler;
-	u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
+	u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf.bo);
 	u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1);
+	u32 ringbuf_index = ringbuf_insert / (SLOTSIZE);
 	u64 addr_reg = ptdev->csif_info.cs_reg_count -
 		       ptdev->csif_info.unpreserved_cs_reg_count;
 	u64 val_reg = addr_reg + 2;
-	u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
-			job->queue_idx * sizeof(struct panthor_syncobj_64b);
+	u64 cycle_reg = addr_reg;
+	u64 time_reg = val_reg;
+	u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs.bo) +
+		job->queue_idx * sizeof(struct panthor_syncobj_64b);
+	u64 times_addr = panthor_kernel_bo_gpuva(group->syncobjs.bo) + queue->time_offset +
+		(ringbuf_index * sizeof(struct panthor_job_times));
+
 	u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
 	struct dma_fence *done_fence;
 	int ret;
@@ -2783,6 +2861,18 @@ queue_run_job(struct drm_sched_job *sched_job)
 		/* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
 		(36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233,
 
+		/* MOV48 rX:rX+1, cycles_offset */
+		(1ull << 56) | (cycle_reg << 48) | (times_addr + offsetof(struct panthor_job_times, cycles.before)),
+
+		/* MOV48 rX:rX+1, time_offset */
+		(1ull << 56) | (time_reg << 48) | (times_addr + offsetof(struct panthor_job_times, time.before)),
+
+		/* STORE_STATE cycles */
+		(40ull << 56) |  (cycle_reg << 40) | (1ll << 32),
+
+		/* STORE_STATE timer */
+		(40ull << 56) |  (time_reg << 40) | (0ll << 32),
+
 		/* MOV48 rX:rX+1, cs.start */
 		(1ull << 56) | (addr_reg << 48) | job->call_info.start,
 
@@ -2795,6 +2885,18 @@ queue_run_job(struct drm_sched_job *sched_job)
 		/* CALL rX:rX+1, rX+2 */
 		(32ull << 56) | (addr_reg << 40) | (val_reg << 32),
 
+		/* MOV48 rX:rX+1, cycles_offset */
+		(1ull << 56) | (cycle_reg << 48) | (times_addr + offsetof(struct panthor_job_times, cycles.after)),
+
+		/* MOV48 rX:rX+1, time_offset */
+		(1ull << 56) | (time_reg << 48) | (times_addr + offsetof(struct panthor_job_times, time.after)),
+
+		/* STORE_STATE cycles */
+		(40ull << 56) |  (cycle_reg << 40) | (1ll << 32),
+
+		/* STORE_STATE timer */
+		(40ull << 56) |  (time_reg << 40) | (0ll << 32),
+
 		/* MOV48 rX:rX+1, sync_addr */
 		(1ull << 56) | (addr_reg << 48) | sync_addr,
 
@@ -2839,7 +2941,7 @@ queue_run_job(struct drm_sched_job *sched_job)
 		       queue->fence_ctx.id,
 		       atomic64_inc_return(&queue->fence_ctx.seqno));
 
-	memcpy(queue->ringbuf->kmap + ringbuf_insert,
+	memcpy(queue->ringbuf.bo->kmap + ringbuf_insert,
 	       call_instrs, sizeof(call_instrs));
 
 	panthor_job_get(&job->base);
@@ -2849,6 +2951,7 @@ queue_run_job(struct drm_sched_job *sched_job)
 
 	job->ringbuf.start = queue->iface.input->insert;
 	job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs);
+	job->ringbuf_idx = ringbuf_index;
 
 	/* Make sure the ring buffer is updated before the INSERT
 	 * register.
@@ -2939,7 +3042,8 @@ static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
 
 static struct panthor_queue *
 group_create_queue(struct panthor_group *group,
-		   const struct drm_panthor_queue_create *args)
+		   const struct drm_panthor_queue_create *args,
+		   unsigned int slots_so_far)
 {
 	struct drm_gpu_scheduler *drm_sched;
 	struct panthor_queue *queue;
@@ -2965,21 +3069,23 @@ group_create_queue(struct panthor_group *group,
 
 	queue->priority = args->priority;
 
-	queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm,
+	queue->ringbuf.bo = panthor_kernel_bo_create(group->ptdev, group->vm,
 						  args->ringbuf_size,
 						  DRM_PANTHOR_BO_NO_MMAP,
 						  DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
 						  DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
 						  PANTHOR_VM_KERNEL_AUTO_VA);
-	if (IS_ERR(queue->ringbuf)) {
-		ret = PTR_ERR(queue->ringbuf);
+	if (IS_ERR(queue->ringbuf.bo)) {
+		ret = PTR_ERR(queue->ringbuf.bo);
 		goto err_free_queue;
 	}
 
-	ret = panthor_kernel_bo_vmap(queue->ringbuf);
+	ret = panthor_kernel_bo_vmap(queue->ringbuf.bo);
 	if (ret)
 		goto err_free_queue;
 
+	queue->ringbuf.nelem = (args->ringbuf_size / (SLOTSIZE));
+
 	queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
 							    &queue->iface.input,
 							    &queue->iface.output,
@@ -2990,6 +3096,9 @@ group_create_queue(struct panthor_group *group,
 		goto err_free_queue;
 	}
 
+	queue->time_offset = group->syncobjs.times_offset +
+		(slots_so_far * sizeof(struct panthor_job_times));
+
 	ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
 			     group->ptdev->scheduler->wq, 1,
 			     args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),
@@ -3020,6 +3129,7 @@ int panthor_group_create(struct panthor_file *pfile,
 	struct panthor_scheduler *sched = ptdev->scheduler;
 	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
 	struct panthor_group *group = NULL;
+	unsigned int total_slots;
 	u32 gid, i, suspend_size;
 	int ret;
 
@@ -3086,33 +3196,77 @@ int panthor_group_create(struct panthor_file *pfile,
 		goto err_put_group;
 	}
 
-	group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
-						   group_args->queues.count *
-						   sizeof(struct panthor_syncobj_64b),
+	/*
+	 * Need to add size for the fdinfo sample structs, as many as the sum
+	 * of the number of job slots for every single queue ringbuffer.
+	 */
+
+	for (i = 0, total_slots = 0; i < group_args->queues.count; i++)
+		total_slots += (queue_args[i].ringbuf_size / (SLOTSIZE));
+
+	/*
+	 * Memory layout of group's syncobjs BO
+	 * group->syncobjs.bo {
+	 *	struct panthor_syncobj_64b sync1;
+	 *	struct panthor_syncobj_64b sync2;
+	 *		...
+	 *		As many as group_args->queues.count
+	 *		...
+	 *	struct panthor_syncobj_64b syncn;
+	 *	struct panthor_job_times queue_1slot_1
+	 *	struct panthor_job_times queue_1slot_2
+	 *		...
+	 *		As many as queue[i].ringbuf_size / SLOTSIZE
+	 *		...
+	 *	struct panthor_job_times queue_1slot_p
+	 *		...
+	 *		As many as group_args->queues.count
+	 *		...
+	 *	struct panthor_job_times queue_nslot_1
+	 *	struct panthor_job_times queue_nslot_2
+	 *		...
+	 *		As many as queue[n].ringbuf_size / SLOTSIZE
+	 *	struct panthor_job_times queue_nslot_q
+	 *
+	 *	Linearly, group->syncobjs.bo = {syncojb1,..,syncobjN,
+	 *	{queue1 = {js1,..,jsp},..,queueN = {js1,..,jsq}}}
+	 * }
+	 *
+	 */
+
+	group->syncobjs.bo = panthor_kernel_bo_create(ptdev, group->vm,
+						   (group_args->queues.count *
+						    sizeof(struct panthor_syncobj_64b))
+						   + (total_slots * sizeof(struct panthor_job_times)),
 						   DRM_PANTHOR_BO_NO_MMAP,
 						   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
 						   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
 						   PANTHOR_VM_KERNEL_AUTO_VA);
-	if (IS_ERR(group->syncobjs)) {
-		ret = PTR_ERR(group->syncobjs);
+	if (IS_ERR(group->syncobjs.bo)) {
+		ret = PTR_ERR(group->syncobjs.bo);
 		goto err_put_group;
 	}
 
-	ret = panthor_kernel_bo_vmap(group->syncobjs);
+	ret = panthor_kernel_bo_vmap(group->syncobjs.bo);
 	if (ret)
 		goto err_put_group;
 
-	memset(group->syncobjs->kmap, 0,
-	       group_args->queues.count * sizeof(struct panthor_syncobj_64b));
+	memset(group->syncobjs.bo->kmap, 0,
+	       (group_args->queues.count * sizeof(struct panthor_syncobj_64b)) +
+	       (total_slots * sizeof(struct panthor_job_times)));
 
-	for (i = 0; i < group_args->queues.count; i++) {
-		group->queues[i] = group_create_queue(group, &queue_args[i]);
+	group->syncobjs.times_offset =
+		group_args->queues.count * sizeof(struct panthor_syncobj_64b);
+
+	for (i = 0, total_slots = 0; i < group_args->queues.count; i++) {
+		group->queues[i] = group_create_queue(group, &queue_args[i], total_slots);
 		if (IS_ERR(group->queues[i])) {
 			ret = PTR_ERR(group->queues[i]);
 			group->queues[i] = NULL;
 			goto err_put_group;
 		}
 
+		total_slots += (queue_args[i].ringbuf_size / (SLOTSIZE));
 		group->queue_count++;
 	}
 
@@ -3133,6 +3287,9 @@ int panthor_group_create(struct panthor_file *pfile,
 	}
 	mutex_unlock(&sched->reset.lock);
 
+	group->fdinfo.data = &pfile->stats;
+	mutex_init(&group->fdinfo.lock);
+
 	return gid;
 
 err_put_group:
@@ -3172,6 +3329,10 @@ int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
 	mutex_unlock(&sched->lock);
 	mutex_unlock(&sched->reset.lock);
 
+	mutex_lock(&group->fdinfo.lock);
+	group->fdinfo.data = NULL;
+	mutex_unlock(&group->fdinfo.lock);
+
 	group_put(group);
 	return 0;
 }
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] drm/panthor: Enable fdinfo for memory stats
  2024-03-05 21:05 [PATCH 0/2] Support fdinfo runtime and memory stats on Panthor Adrián Larumbe
  2024-03-05 21:05 ` [PATCH 1/2] drm/panthor: Enable fdinfo for cycle and time measurements Adrián Larumbe
@ 2024-03-05 21:05 ` Adrián Larumbe
  1 sibling, 0 replies; 6+ messages in thread
From: Adrián Larumbe @ 2024-03-05 21:05 UTC (permalink / raw)
  To: boris.brezillon, steven.price, liviu.dudau, maarten.lankhorst,
	mripard, tzimmermann, airlied, daniel
  Cc: adrian.larumbe, dri-devel, linux-kernel, kernel

When vm-binding an already-created BO, the entirety of its virtual size is
then backed by system memory, so its RSS is always the same as its virtual
size.

Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
---
 drivers/gpu/drm/panthor/panthor_drv.c |  1 +
 drivers/gpu/drm/panthor/panthor_gem.c | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
index fa06b9e2c6cd..a5398e161f75 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -1363,6 +1363,7 @@ static void panthor_show_fdinfo(struct drm_printer *p, struct drm_file *file)
 
 	panthor_gpu_show_fdinfo(ptdev, file->driver_priv, p);
 
+	drm_show_memory_stats(p, file);
 }
 
 static const struct file_operations panthor_drm_driver_fops = {
diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
index d6483266d0c2..845724e3fd93 100644
--- a/drivers/gpu/drm/panthor/panthor_gem.c
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -143,6 +143,17 @@ panthor_gem_prime_export(struct drm_gem_object *obj, int flags)
 	return drm_gem_prime_export(obj, flags);
 }
 
+static enum drm_gem_object_status panthor_gem_status(struct drm_gem_object *obj)
+{
+	struct panthor_gem_object *bo = to_panthor_bo(obj);
+	enum drm_gem_object_status res = 0;
+
+	if (bo->base.pages)
+		res |= DRM_GEM_OBJECT_RESIDENT;
+
+	return res;
+}
+
 static const struct drm_gem_object_funcs panthor_gem_funcs = {
 	.free = panthor_gem_free_object,
 	.print_info = drm_gem_shmem_object_print_info,
@@ -152,6 +163,7 @@ static const struct drm_gem_object_funcs panthor_gem_funcs = {
 	.vmap = drm_gem_shmem_object_vmap,
 	.vunmap = drm_gem_shmem_object_vunmap,
 	.mmap = panthor_gem_mmap,
+	.status = panthor_gem_status,
 	.export = panthor_gem_prime_export,
 	.vm_ops = &drm_gem_shmem_vm_ops,
 };
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/panthor: Enable fdinfo for cycle and time measurements
  2024-03-05 21:05 ` [PATCH 1/2] drm/panthor: Enable fdinfo for cycle and time measurements Adrián Larumbe
@ 2024-03-08  8:47   ` kernel test robot
  2024-03-28 15:49   ` Liviu Dudau
  1 sibling, 0 replies; 6+ messages in thread
From: kernel test robot @ 2024-03-08  8:47 UTC (permalink / raw)
  To: Adrián Larumbe, boris.brezillon, steven.price, liviu.dudau,
	maarten.lankhorst, mripard, tzimmermann, airlied, daniel
  Cc: oe-kbuild-all, adrian.larumbe, dri-devel, linux-kernel, kernel

Hi Adrián,

kernel test robot noticed the following build warnings:

[auto build test WARNING on e635b7eb7062b464bbd9795308b1a80eac0b01f5]

url:    https://github.com/intel-lab-lkp/linux/commits/Adri-n-Larumbe/drm-panthor-Enable-fdinfo-for-cycle-and-time-measurements/20240306-051418
base:   e635b7eb7062b464bbd9795308b1a80eac0b01f5
patch link:    https://lore.kernel.org/r/20240305211000.659103-2-adrian.larumbe%40collabora.com
patch subject: [PATCH 1/2] drm/panthor: Enable fdinfo for cycle and time measurements
config: loongarch-allmodconfig (https://download.01.org/0day-ci/archive/20240308/202403081627.Y50ZoOIS-lkp@intel.com/config)
compiler: loongarch64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240308/202403081627.Y50ZoOIS-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202403081627.Y50ZoOIS-lkp@intel.com/

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/panthor/panthor_sched.c:321: warning: Excess struct member 'runnable' description in 'panthor_scheduler'
   drivers/gpu/drm/panthor/panthor_sched.c:321: warning: Excess struct member 'idle' description in 'panthor_scheduler'
   drivers/gpu/drm/panthor/panthor_sched.c:321: warning: Excess struct member 'waiting' description in 'panthor_scheduler'
   drivers/gpu/drm/panthor/panthor_sched.c:321: warning: Excess struct member 'has_ref' description in 'panthor_scheduler'
   drivers/gpu/drm/panthor/panthor_sched.c:321: warning: Excess struct member 'in_progress' description in 'panthor_scheduler'
   drivers/gpu/drm/panthor/panthor_sched.c:321: warning: Excess struct member 'stopped_groups' description in 'panthor_scheduler'
>> drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'nelem' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'mem' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'input' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'output' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'input_fw_va' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'output_fw_va' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'gpu_va' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'ref' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'gt' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'sync64' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'bo' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'offset' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'kmap' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'lock' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'id' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'seqno' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:480: warning: Excess struct member 'in_flight_jobs' description in 'panthor_queue'
   drivers/gpu/drm/panthor/panthor_sched.c:670: warning: Function parameter or struct member 'max_fragment_cores' not described in 'panthor_group'
>> drivers/gpu/drm/panthor/panthor_sched.c:670: warning: Excess struct member 'bo' description in 'panthor_group'
>> drivers/gpu/drm/panthor/panthor_sched.c:670: warning: Excess struct member 'times_offset' description in 'panthor_group'
>> drivers/gpu/drm/panthor/panthor_sched.c:670: warning: Excess struct member 'data' description in 'panthor_group'
>> drivers/gpu/drm/panthor/panthor_sched.c:670: warning: Excess struct member 'lock' description in 'panthor_group'
   drivers/gpu/drm/panthor/panthor_sched.c:800: warning: Excess struct member 'start' description in 'panthor_job'
   drivers/gpu/drm/panthor/panthor_sched.c:800: warning: Excess struct member 'size' description in 'panthor_job'
   drivers/gpu/drm/panthor/panthor_sched.c:800: warning: Excess struct member 'latest_flush' description in 'panthor_job'
   drivers/gpu/drm/panthor/panthor_sched.c:800: warning: Excess struct member 'start' description in 'panthor_job'
   drivers/gpu/drm/panthor/panthor_sched.c:800: warning: Excess struct member 'end' description in 'panthor_job'
   drivers/gpu/drm/panthor/panthor_sched.c:1043: warning: Cannot understand  * @cs_slot_reset_locked() - Reset a queue slot
    on line 1043 - I thought it was a doc line
   drivers/gpu/drm/panthor/panthor_sched.c:1645: warning: expecting prototype for panthor_sched_process_global_irq(). Prototype was for sched_process_global_irq_locked() instead
   drivers/gpu/drm/panthor/panthor_sched.c:1687: warning: Function parameter or struct member 'ptdev' not described in 'panthor_sched_report_fw_events'
   drivers/gpu/drm/panthor/panthor_sched.c:1687: warning: Function parameter or struct member 'events' not described in 'panthor_sched_report_fw_events'
   drivers/gpu/drm/panthor/panthor_sched.c:1711: warning: Cannot understand  */
    on line 1711 - I thought it was a doc line
   drivers/gpu/drm/panthor/panthor_sched.c:2583: warning: Function parameter or struct member 'ptdev' not described in 'panthor_sched_report_mmu_fault'


vim +480 drivers/gpu/drm/panthor/panthor_sched.c

de85488138247d Boris Brezillon 2024-02-29  396  
de85488138247d Boris Brezillon 2024-02-29  397  	/** @ringbuf: Command stream ring-buffer. */
93e3ba65963905 Adrián Larumbe  2024-03-05  398  	struct {
93e3ba65963905 Adrián Larumbe  2024-03-05  399  		/** @ringbuf: Kernel BO that holds ring buffer. */
93e3ba65963905 Adrián Larumbe  2024-03-05  400  		struct panthor_kernel_bo *bo;
93e3ba65963905 Adrián Larumbe  2024-03-05  401  
93e3ba65963905 Adrián Larumbe  2024-03-05  402  		/** @nelem: Number of slots in the ring buffer. */
93e3ba65963905 Adrián Larumbe  2024-03-05  403  		unsigned int nelem;
93e3ba65963905 Adrián Larumbe  2024-03-05  404  	} ringbuf;
de85488138247d Boris Brezillon 2024-02-29  405  
de85488138247d Boris Brezillon 2024-02-29  406  	/** @iface: Firmware interface. */
de85488138247d Boris Brezillon 2024-02-29  407  	struct {
de85488138247d Boris Brezillon 2024-02-29  408  		/** @mem: FW memory allocated for this interface. */
de85488138247d Boris Brezillon 2024-02-29  409  		struct panthor_kernel_bo *mem;
de85488138247d Boris Brezillon 2024-02-29  410  
de85488138247d Boris Brezillon 2024-02-29  411  		/** @input: Input interface. */
de85488138247d Boris Brezillon 2024-02-29  412  		struct panthor_fw_ringbuf_input_iface *input;
de85488138247d Boris Brezillon 2024-02-29  413  
de85488138247d Boris Brezillon 2024-02-29  414  		/** @output: Output interface. */
de85488138247d Boris Brezillon 2024-02-29  415  		const struct panthor_fw_ringbuf_output_iface *output;
de85488138247d Boris Brezillon 2024-02-29  416  
de85488138247d Boris Brezillon 2024-02-29  417  		/** @input_fw_va: FW virtual address of the input interface buffer. */
de85488138247d Boris Brezillon 2024-02-29  418  		u32 input_fw_va;
de85488138247d Boris Brezillon 2024-02-29  419  
de85488138247d Boris Brezillon 2024-02-29  420  		/** @output_fw_va: FW virtual address of the output interface buffer. */
de85488138247d Boris Brezillon 2024-02-29  421  		u32 output_fw_va;
de85488138247d Boris Brezillon 2024-02-29  422  	} iface;
de85488138247d Boris Brezillon 2024-02-29  423  
de85488138247d Boris Brezillon 2024-02-29  424  	/**
de85488138247d Boris Brezillon 2024-02-29  425  	 * @syncwait: Stores information about the synchronization object this
de85488138247d Boris Brezillon 2024-02-29  426  	 * queue is waiting on.
de85488138247d Boris Brezillon 2024-02-29  427  	 */
de85488138247d Boris Brezillon 2024-02-29  428  	struct {
de85488138247d Boris Brezillon 2024-02-29  429  		/** @gpu_va: GPU address of the synchronization object. */
de85488138247d Boris Brezillon 2024-02-29  430  		u64 gpu_va;
de85488138247d Boris Brezillon 2024-02-29  431  
de85488138247d Boris Brezillon 2024-02-29  432  		/** @ref: Reference value to compare against. */
de85488138247d Boris Brezillon 2024-02-29  433  		u64 ref;
de85488138247d Boris Brezillon 2024-02-29  434  
de85488138247d Boris Brezillon 2024-02-29  435  		/** @gt: True if this is a greater-than test. */
de85488138247d Boris Brezillon 2024-02-29  436  		bool gt;
de85488138247d Boris Brezillon 2024-02-29  437  
de85488138247d Boris Brezillon 2024-02-29  438  		/** @sync64: True if this is a 64-bit sync object. */
de85488138247d Boris Brezillon 2024-02-29  439  		bool sync64;
de85488138247d Boris Brezillon 2024-02-29  440  
de85488138247d Boris Brezillon 2024-02-29  441  		/** @bo: Buffer object holding the synchronization object. */
de85488138247d Boris Brezillon 2024-02-29  442  		struct drm_gem_object *obj;
de85488138247d Boris Brezillon 2024-02-29  443  
de85488138247d Boris Brezillon 2024-02-29  444  		/** @offset: Offset of the synchronization object inside @bo. */
de85488138247d Boris Brezillon 2024-02-29  445  		u64 offset;
de85488138247d Boris Brezillon 2024-02-29  446  
de85488138247d Boris Brezillon 2024-02-29  447  		/**
de85488138247d Boris Brezillon 2024-02-29  448  		 * @kmap: Kernel mapping of the buffer object holding the
de85488138247d Boris Brezillon 2024-02-29  449  		 * synchronization object.
de85488138247d Boris Brezillon 2024-02-29  450  		 */
de85488138247d Boris Brezillon 2024-02-29  451  		void *kmap;
de85488138247d Boris Brezillon 2024-02-29  452  	} syncwait;
de85488138247d Boris Brezillon 2024-02-29  453  
de85488138247d Boris Brezillon 2024-02-29  454  	/** @fence_ctx: Fence context fields. */
de85488138247d Boris Brezillon 2024-02-29  455  	struct {
de85488138247d Boris Brezillon 2024-02-29  456  		/** @lock: Used to protect access to all fences allocated by this context. */
de85488138247d Boris Brezillon 2024-02-29  457  		spinlock_t lock;
de85488138247d Boris Brezillon 2024-02-29  458  
de85488138247d Boris Brezillon 2024-02-29  459  		/**
de85488138247d Boris Brezillon 2024-02-29  460  		 * @id: Fence context ID.
de85488138247d Boris Brezillon 2024-02-29  461  		 *
de85488138247d Boris Brezillon 2024-02-29  462  		 * Allocated with dma_fence_context_alloc().
de85488138247d Boris Brezillon 2024-02-29  463  		 */
de85488138247d Boris Brezillon 2024-02-29  464  		u64 id;
de85488138247d Boris Brezillon 2024-02-29  465  
de85488138247d Boris Brezillon 2024-02-29  466  		/** @seqno: Sequence number of the last initialized fence. */
de85488138247d Boris Brezillon 2024-02-29  467  		atomic64_t seqno;
de85488138247d Boris Brezillon 2024-02-29  468  
de85488138247d Boris Brezillon 2024-02-29  469  		/**
de85488138247d Boris Brezillon 2024-02-29  470  		 * @in_flight_jobs: List containing all in-flight jobs.
de85488138247d Boris Brezillon 2024-02-29  471  		 *
de85488138247d Boris Brezillon 2024-02-29  472  		 * Used to keep track and signal panthor_job::done_fence when the
de85488138247d Boris Brezillon 2024-02-29  473  		 * synchronization object attached to the queue is signaled.
de85488138247d Boris Brezillon 2024-02-29  474  		 */
de85488138247d Boris Brezillon 2024-02-29  475  		struct list_head in_flight_jobs;
de85488138247d Boris Brezillon 2024-02-29  476  	} fence_ctx;
93e3ba65963905 Adrián Larumbe  2024-03-05  477  
93e3ba65963905 Adrián Larumbe  2024-03-05  478  	/** @time_offset: Offset of fdinfo stats structs in queue's syncobj. */
93e3ba65963905 Adrián Larumbe  2024-03-05  479  	unsigned long time_offset;
de85488138247d Boris Brezillon 2024-02-29 @480  };
de85488138247d Boris Brezillon 2024-02-29  481  
de85488138247d Boris Brezillon 2024-02-29  482  /**
de85488138247d Boris Brezillon 2024-02-29  483   * enum panthor_group_state - Scheduling group state.
de85488138247d Boris Brezillon 2024-02-29  484   */
de85488138247d Boris Brezillon 2024-02-29  485  enum panthor_group_state {
de85488138247d Boris Brezillon 2024-02-29  486  	/** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */
de85488138247d Boris Brezillon 2024-02-29  487  	PANTHOR_CS_GROUP_CREATED,
de85488138247d Boris Brezillon 2024-02-29  488  
de85488138247d Boris Brezillon 2024-02-29  489  	/** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */
de85488138247d Boris Brezillon 2024-02-29  490  	PANTHOR_CS_GROUP_ACTIVE,
de85488138247d Boris Brezillon 2024-02-29  491  
de85488138247d Boris Brezillon 2024-02-29  492  	/**
de85488138247d Boris Brezillon 2024-02-29  493  	 * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is
de85488138247d Boris Brezillon 2024-02-29  494  	 * inactive/suspended right now.
de85488138247d Boris Brezillon 2024-02-29  495  	 */
de85488138247d Boris Brezillon 2024-02-29  496  	PANTHOR_CS_GROUP_SUSPENDED,
de85488138247d Boris Brezillon 2024-02-29  497  
de85488138247d Boris Brezillon 2024-02-29  498  	/**
de85488138247d Boris Brezillon 2024-02-29  499  	 * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated.
de85488138247d Boris Brezillon 2024-02-29  500  	 *
de85488138247d Boris Brezillon 2024-02-29  501  	 * Can no longer be scheduled. The only allowed action is a destruction.
de85488138247d Boris Brezillon 2024-02-29  502  	 */
de85488138247d Boris Brezillon 2024-02-29  503  	PANTHOR_CS_GROUP_TERMINATED,
de85488138247d Boris Brezillon 2024-02-29  504  };
de85488138247d Boris Brezillon 2024-02-29  505  
de85488138247d Boris Brezillon 2024-02-29  506  /**
de85488138247d Boris Brezillon 2024-02-29  507   * struct panthor_group - Scheduling group object
de85488138247d Boris Brezillon 2024-02-29  508   */
de85488138247d Boris Brezillon 2024-02-29  509  struct panthor_group {
de85488138247d Boris Brezillon 2024-02-29  510  	/** @refcount: Reference count */
de85488138247d Boris Brezillon 2024-02-29  511  	struct kref refcount;
de85488138247d Boris Brezillon 2024-02-29  512  
de85488138247d Boris Brezillon 2024-02-29  513  	/** @ptdev: Device. */
de85488138247d Boris Brezillon 2024-02-29  514  	struct panthor_device *ptdev;
de85488138247d Boris Brezillon 2024-02-29  515  
de85488138247d Boris Brezillon 2024-02-29  516  	/** @vm: VM bound to the group. */
de85488138247d Boris Brezillon 2024-02-29  517  	struct panthor_vm *vm;
de85488138247d Boris Brezillon 2024-02-29  518  
de85488138247d Boris Brezillon 2024-02-29  519  	/** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */
de85488138247d Boris Brezillon 2024-02-29  520  	u64 compute_core_mask;
de85488138247d Boris Brezillon 2024-02-29  521  
de85488138247d Boris Brezillon 2024-02-29  522  	/** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */
de85488138247d Boris Brezillon 2024-02-29  523  	u64 fragment_core_mask;
de85488138247d Boris Brezillon 2024-02-29  524  
de85488138247d Boris Brezillon 2024-02-29  525  	/** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */
de85488138247d Boris Brezillon 2024-02-29  526  	u64 tiler_core_mask;
de85488138247d Boris Brezillon 2024-02-29  527  
de85488138247d Boris Brezillon 2024-02-29  528  	/** @max_compute_cores: Maximum number of shader cores used for compute jobs. */
de85488138247d Boris Brezillon 2024-02-29  529  	u8 max_compute_cores;
de85488138247d Boris Brezillon 2024-02-29  530  
de85488138247d Boris Brezillon 2024-02-29  531  	/** @max_compute_cores: Maximum number of shader cores used for fragment jobs. */
de85488138247d Boris Brezillon 2024-02-29  532  	u8 max_fragment_cores;
de85488138247d Boris Brezillon 2024-02-29  533  
de85488138247d Boris Brezillon 2024-02-29  534  	/** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */
de85488138247d Boris Brezillon 2024-02-29  535  	u8 max_tiler_cores;
de85488138247d Boris Brezillon 2024-02-29  536  
de85488138247d Boris Brezillon 2024-02-29  537  	/** @priority: Group priority (check panthor_csg_priority). */
de85488138247d Boris Brezillon 2024-02-29  538  	u8 priority;
de85488138247d Boris Brezillon 2024-02-29  539  
de85488138247d Boris Brezillon 2024-02-29  540  	/** @blocked_queues: Bitmask reflecting the blocked queues. */
de85488138247d Boris Brezillon 2024-02-29  541  	u32 blocked_queues;
de85488138247d Boris Brezillon 2024-02-29  542  
de85488138247d Boris Brezillon 2024-02-29  543  	/** @idle_queues: Bitmask reflecting the idle queues. */
de85488138247d Boris Brezillon 2024-02-29  544  	u32 idle_queues;
de85488138247d Boris Brezillon 2024-02-29  545  
de85488138247d Boris Brezillon 2024-02-29  546  	/** @fatal_lock: Lock used to protect access to fatal fields. */
de85488138247d Boris Brezillon 2024-02-29  547  	spinlock_t fatal_lock;
de85488138247d Boris Brezillon 2024-02-29  548  
de85488138247d Boris Brezillon 2024-02-29  549  	/** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */
de85488138247d Boris Brezillon 2024-02-29  550  	u32 fatal_queues;
de85488138247d Boris Brezillon 2024-02-29  551  
de85488138247d Boris Brezillon 2024-02-29  552  	/** @tiler_oom: Mask of queues that have a tiler OOM event to process. */
de85488138247d Boris Brezillon 2024-02-29  553  	atomic_t tiler_oom;
de85488138247d Boris Brezillon 2024-02-29  554  
de85488138247d Boris Brezillon 2024-02-29  555  	/** @queue_count: Number of queues in this group. */
de85488138247d Boris Brezillon 2024-02-29  556  	u32 queue_count;
de85488138247d Boris Brezillon 2024-02-29  557  
de85488138247d Boris Brezillon 2024-02-29  558  	/** @queues: Queues owned by this group. */
de85488138247d Boris Brezillon 2024-02-29  559  	struct panthor_queue *queues[MAX_CS_PER_CSG];
de85488138247d Boris Brezillon 2024-02-29  560  
de85488138247d Boris Brezillon 2024-02-29  561  	/**
de85488138247d Boris Brezillon 2024-02-29  562  	 * @csg_id: ID of the FW group slot.
de85488138247d Boris Brezillon 2024-02-29  563  	 *
de85488138247d Boris Brezillon 2024-02-29  564  	 * -1 when the group is not scheduled/active.
de85488138247d Boris Brezillon 2024-02-29  565  	 */
de85488138247d Boris Brezillon 2024-02-29  566  	int csg_id;
de85488138247d Boris Brezillon 2024-02-29  567  
de85488138247d Boris Brezillon 2024-02-29  568  	/**
de85488138247d Boris Brezillon 2024-02-29  569  	 * @destroyed: True when the group has been destroyed.
de85488138247d Boris Brezillon 2024-02-29  570  	 *
de85488138247d Boris Brezillon 2024-02-29  571  	 * If a group is destroyed it becomes useless: no further jobs can be submitted
de85488138247d Boris Brezillon 2024-02-29  572  	 * to its queues. We simply wait for all references to be dropped so we can
de85488138247d Boris Brezillon 2024-02-29  573  	 * release the group object.
de85488138247d Boris Brezillon 2024-02-29  574  	 */
de85488138247d Boris Brezillon 2024-02-29  575  	bool destroyed;
de85488138247d Boris Brezillon 2024-02-29  576  
de85488138247d Boris Brezillon 2024-02-29  577  	/**
de85488138247d Boris Brezillon 2024-02-29  578  	 * @timedout: True when a timeout occurred on any of the queues owned by
de85488138247d Boris Brezillon 2024-02-29  579  	 * this group.
de85488138247d Boris Brezillon 2024-02-29  580  	 *
de85488138247d Boris Brezillon 2024-02-29  581  	 * Timeouts can be reported by drm_sched or by the FW. In any case, any
de85488138247d Boris Brezillon 2024-02-29  582  	 * timeout situation is unrecoverable, and the group becomes useless.
de85488138247d Boris Brezillon 2024-02-29  583  	 * We simply wait for all references to be dropped so we can release the
de85488138247d Boris Brezillon 2024-02-29  584  	 * group object.
de85488138247d Boris Brezillon 2024-02-29  585  	 */
de85488138247d Boris Brezillon 2024-02-29  586  	bool timedout;
de85488138247d Boris Brezillon 2024-02-29  587  
de85488138247d Boris Brezillon 2024-02-29  588  	/**
de85488138247d Boris Brezillon 2024-02-29  589  	 * @syncobjs: Pool of per-queue synchronization objects.
de85488138247d Boris Brezillon 2024-02-29  590  	 *
de85488138247d Boris Brezillon 2024-02-29  591  	 * One sync object per queue. The position of the sync object is
de85488138247d Boris Brezillon 2024-02-29  592  	 * determined by the queue index.
de85488138247d Boris Brezillon 2024-02-29  593  	 */
93e3ba65963905 Adrián Larumbe  2024-03-05  594  
93e3ba65963905 Adrián Larumbe  2024-03-05  595  	struct {
93e3ba65963905 Adrián Larumbe  2024-03-05  596  		/** @bo: Kernel BO holding the sync objects. */
93e3ba65963905 Adrián Larumbe  2024-03-05  597  		struct panthor_kernel_bo *bo;
93e3ba65963905 Adrián Larumbe  2024-03-05  598  
93e3ba65963905 Adrián Larumbe  2024-03-05  599  		/** @times_offset: Beginning of time stats after objects of sync pool. */
93e3ba65963905 Adrián Larumbe  2024-03-05  600  		size_t times_offset;
93e3ba65963905 Adrián Larumbe  2024-03-05  601  	} syncobjs;
93e3ba65963905 Adrián Larumbe  2024-03-05  602  
93e3ba65963905 Adrián Larumbe  2024-03-05  603  	/** @fdinfo: Per-file total cycle and timestamp values reference. */
93e3ba65963905 Adrián Larumbe  2024-03-05  604  	struct {
93e3ba65963905 Adrián Larumbe  2024-03-05  605  		/** @data: Pointer to actual per-file sample data. */
93e3ba65963905 Adrián Larumbe  2024-03-05  606  		struct panthor_gpu_usage *data;
93e3ba65963905 Adrián Larumbe  2024-03-05  607  
93e3ba65963905 Adrián Larumbe  2024-03-05  608  		/**
93e3ba65963905 Adrián Larumbe  2024-03-05  609  		 * @lock: Mutex to govern concurrent access from drm file's fdinfo callback
93e3ba65963905 Adrián Larumbe  2024-03-05  610  		 * and job post-completion processing function
93e3ba65963905 Adrián Larumbe  2024-03-05  611  		 */
93e3ba65963905 Adrián Larumbe  2024-03-05  612  		struct mutex lock;
93e3ba65963905 Adrián Larumbe  2024-03-05  613  	} fdinfo;
de85488138247d Boris Brezillon 2024-02-29  614  
de85488138247d Boris Brezillon 2024-02-29  615  	/** @state: Group state. */
de85488138247d Boris Brezillon 2024-02-29  616  	enum panthor_group_state state;
de85488138247d Boris Brezillon 2024-02-29  617  
de85488138247d Boris Brezillon 2024-02-29  618  	/**
de85488138247d Boris Brezillon 2024-02-29  619  	 * @suspend_buf: Suspend buffer.
de85488138247d Boris Brezillon 2024-02-29  620  	 *
de85488138247d Boris Brezillon 2024-02-29  621  	 * Stores the state of the group and its queues when a group is suspended.
de85488138247d Boris Brezillon 2024-02-29  622  	 * Used at resume time to restore the group in its previous state.
de85488138247d Boris Brezillon 2024-02-29  623  	 *
de85488138247d Boris Brezillon 2024-02-29  624  	 * The size of the suspend buffer is exposed through the FW interface.
de85488138247d Boris Brezillon 2024-02-29  625  	 */
de85488138247d Boris Brezillon 2024-02-29  626  	struct panthor_kernel_bo *suspend_buf;
de85488138247d Boris Brezillon 2024-02-29  627  
de85488138247d Boris Brezillon 2024-02-29  628  	/**
de85488138247d Boris Brezillon 2024-02-29  629  	 * @protm_suspend_buf: Protection mode suspend buffer.
de85488138247d Boris Brezillon 2024-02-29  630  	 *
de85488138247d Boris Brezillon 2024-02-29  631  	 * Stores the state of the group and its queues when a group that's in
de85488138247d Boris Brezillon 2024-02-29  632  	 * protection mode is suspended.
de85488138247d Boris Brezillon 2024-02-29  633  	 *
de85488138247d Boris Brezillon 2024-02-29  634  	 * Used at resume time to restore the group in its previous state.
de85488138247d Boris Brezillon 2024-02-29  635  	 *
de85488138247d Boris Brezillon 2024-02-29  636  	 * The size of the protection mode suspend buffer is exposed through the
de85488138247d Boris Brezillon 2024-02-29  637  	 * FW interface.
de85488138247d Boris Brezillon 2024-02-29  638  	 */
de85488138247d Boris Brezillon 2024-02-29  639  	struct panthor_kernel_bo *protm_suspend_buf;
de85488138247d Boris Brezillon 2024-02-29  640  
de85488138247d Boris Brezillon 2024-02-29  641  	/** @sync_upd_work: Work used to check/signal job fences. */
de85488138247d Boris Brezillon 2024-02-29  642  	struct work_struct sync_upd_work;
de85488138247d Boris Brezillon 2024-02-29  643  
de85488138247d Boris Brezillon 2024-02-29  644  	/** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */
de85488138247d Boris Brezillon 2024-02-29  645  	struct work_struct tiler_oom_work;
de85488138247d Boris Brezillon 2024-02-29  646  
de85488138247d Boris Brezillon 2024-02-29  647  	/** @term_work: Work used to finish the group termination procedure. */
de85488138247d Boris Brezillon 2024-02-29  648  	struct work_struct term_work;
de85488138247d Boris Brezillon 2024-02-29  649  
de85488138247d Boris Brezillon 2024-02-29  650  	/**
de85488138247d Boris Brezillon 2024-02-29  651  	 * @release_work: Work used to release group resources.
de85488138247d Boris Brezillon 2024-02-29  652  	 *
de85488138247d Boris Brezillon 2024-02-29  653  	 * We need to postpone the group release to avoid a deadlock when
de85488138247d Boris Brezillon 2024-02-29  654  	 * the last ref is released in the tick work.
de85488138247d Boris Brezillon 2024-02-29  655  	 */
de85488138247d Boris Brezillon 2024-02-29  656  	struct work_struct release_work;
de85488138247d Boris Brezillon 2024-02-29  657  
de85488138247d Boris Brezillon 2024-02-29  658  	/**
de85488138247d Boris Brezillon 2024-02-29  659  	 * @run_node: Node used to insert the group in the
de85488138247d Boris Brezillon 2024-02-29  660  	 * panthor_group::groups::{runnable,idle} and
de85488138247d Boris Brezillon 2024-02-29  661  	 * panthor_group::reset.stopped_groups lists.
de85488138247d Boris Brezillon 2024-02-29  662  	 */
de85488138247d Boris Brezillon 2024-02-29  663  	struct list_head run_node;
de85488138247d Boris Brezillon 2024-02-29  664  
de85488138247d Boris Brezillon 2024-02-29  665  	/**
de85488138247d Boris Brezillon 2024-02-29  666  	 * @wait_node: Node used to insert the group in the
de85488138247d Boris Brezillon 2024-02-29  667  	 * panthor_group::groups::waiting list.
de85488138247d Boris Brezillon 2024-02-29  668  	 */
de85488138247d Boris Brezillon 2024-02-29  669  	struct list_head wait_node;
de85488138247d Boris Brezillon 2024-02-29 @670  };
de85488138247d Boris Brezillon 2024-02-29  671  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/panthor: Enable fdinfo for cycle and time measurements
  2024-03-05 21:05 ` [PATCH 1/2] drm/panthor: Enable fdinfo for cycle and time measurements Adrián Larumbe
  2024-03-08  8:47   ` kernel test robot
@ 2024-03-28 15:49   ` Liviu Dudau
  2024-04-23 15:51     ` Adrián Larumbe
  1 sibling, 1 reply; 6+ messages in thread
From: Liviu Dudau @ 2024-03-28 15:49 UTC (permalink / raw)
  To: Adrián Larumbe
  Cc: boris.brezillon, steven.price, maarten.lankhorst, mripard,
	tzimmermann, airlied, daniel, dri-devel, linux-kernel, kernel

Hi Adrián,

Appologies for the delay in reviewing this.

On Tue, Mar 05, 2024 at 09:05:49PM +0000, Adrián Larumbe wrote:
> These values are sampled by the firmware right before jumping into the UM
> command stream and immediately after returning from it, and then kept inside a
> per-job accounting structure. That structure is held inside the group's syncobjs
> buffer object, at an offset that depends on the job's queue slot number and the
> queue's index within the group.

I think this commit message is misleadingly short compared to the size of the
changes. If I may, I would like to suggest that you split this commit into two
parts, one introducing the changes in the ringbuf and syncobjs structures and
the other exporting the statistics in the fdinfo.

> 
> Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> ---
>  drivers/gpu/drm/panthor/panthor_devfreq.c |  10 +
>  drivers/gpu/drm/panthor/panthor_device.h  |  11 ++
>  drivers/gpu/drm/panthor/panthor_drv.c     |  31 ++++
>  drivers/gpu/drm/panthor/panthor_sched.c   | 217 +++++++++++++++++++---
>  4 files changed, 241 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c
> index 7ac4fa290f27..51a7b734edcd 100644
> --- a/drivers/gpu/drm/panthor/panthor_devfreq.c
> +++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
> @@ -91,6 +91,7 @@ static int panthor_devfreq_get_dev_status(struct device *dev,
>  	spin_lock_irqsave(&pdevfreq->lock, irqflags);
>  
>  	panthor_devfreq_update_utilization(pdevfreq);
> +	ptdev->current_frequency = status->current_frequency;
>  
>  	status->total_time = ktime_to_ns(ktime_add(pdevfreq->busy_time,
>  						   pdevfreq->idle_time));
> @@ -130,6 +131,7 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
>  	struct panthor_devfreq *pdevfreq;
>  	struct dev_pm_opp *opp;
>  	unsigned long cur_freq;
> +	unsigned long freq = ULONG_MAX;
>  	int ret;
>  
>  	pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL);
> @@ -204,6 +206,14 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
>  
>  	dev_pm_opp_put(opp);
>  
> +	/* Find the fastest defined rate  */
> +	opp = dev_pm_opp_find_freq_floor(dev, &freq);
> +	if (IS_ERR(opp))
> +		return PTR_ERR(opp);
> +	ptdev->fast_rate = freq;
> +
> +	dev_pm_opp_put(opp);
> +
>  	/*
>  	 * Setup default thresholds for the simple_ondemand governor.
>  	 * The values are chosen based on experiments.
> diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> index 51c9d61b6796..10e970921ca3 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.h
> +++ b/drivers/gpu/drm/panthor/panthor_device.h
> @@ -162,6 +162,14 @@ struct panthor_device {
>  		 */
>  		u32 *dummy_latest_flush;
>  	} pm;
> +
> +	unsigned long current_frequency;
> +	unsigned long fast_rate;
> +};
> +
> +struct panthor_gpu_usage {
> +	u64 time;
> +	u64 cycles;
>  };
>  
>  /**
> @@ -176,6 +184,9 @@ struct panthor_file {
>  
>  	/** @groups: Scheduling group pool attached to this file. */
>  	struct panthor_group_pool *groups;
> +
> +	/** @stats: cycle and timestamp measures for job execution. */
> +	struct panthor_gpu_usage stats;
>  };
>  
>  int panthor_device_init(struct panthor_device *ptdev);
> diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
> index ff484506229f..fa06b9e2c6cd 100644
> --- a/drivers/gpu/drm/panthor/panthor_drv.c
> +++ b/drivers/gpu/drm/panthor/panthor_drv.c
> @@ -3,6 +3,10 @@
>  /* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
>  /* Copyright 2019 Collabora ltd. */
>  
> +#ifdef CONFIG_HAVE_ARM_ARCH_TIMER
> +#include <asm/arch_timer.h>
> +#endif
> +
>  #include <linux/list.h>
>  #include <linux/module.h>
>  #include <linux/of_platform.h>
> @@ -28,6 +32,8 @@
>  #include "panthor_regs.h"
>  #include "panthor_sched.h"
>  
> +#define NS_PER_SEC      1000000000ULL
> +
>  /**
>   * DOC: user <-> kernel object copy helpers.
>   */
> @@ -1336,6 +1342,29 @@ static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
>  	return ret;
>  }
>  
> +static void panthor_gpu_show_fdinfo(struct panthor_device *ptdev,
> +				    struct panthor_file *pfile,
> +				    struct drm_printer *p)
> +{
> +#ifdef CONFIG_HAVE_ARM_ARCH_TIMER
> +	drm_printf(p, "drm-engine-panthor:\t%llu ns\n",
> +		   DIV_ROUND_UP_ULL((pfile->stats.time * NS_PER_SEC),
> +				    arch_timer_get_cntfrq()));
> +#endif
> +	drm_printf(p, "drm-cycles-panthor:\t%llu\n", pfile->stats.cycles);
> +	drm_printf(p, "drm-maxfreq-panthor:\t%lu Hz\n", ptdev->fast_rate);
> +	drm_printf(p, "drm-curfreq-panthor:\t%lu Hz\n", ptdev->current_frequency);
> +}
> +
> +static void panthor_show_fdinfo(struct drm_printer *p, struct drm_file *file)
> +{
> +	struct drm_device *dev = file->minor->dev;
> +	struct panthor_device *ptdev = container_of(dev, struct panthor_device, base);
> +
> +	panthor_gpu_show_fdinfo(ptdev, file->driver_priv, p);
> +
> +}
> +
>  static const struct file_operations panthor_drm_driver_fops = {
>  	.open = drm_open,
>  	.release = drm_release,
> @@ -1345,6 +1374,7 @@ static const struct file_operations panthor_drm_driver_fops = {
>  	.read = drm_read,
>  	.llseek = noop_llseek,
>  	.mmap = panthor_mmap,
> +	.show_fdinfo = drm_show_fdinfo,
>  };
>  
>  #ifdef CONFIG_DEBUG_FS
> @@ -1363,6 +1393,7 @@ static const struct drm_driver panthor_drm_driver = {
>  			   DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
>  	.open = panthor_open,
>  	.postclose = panthor_postclose,
> +	.show_fdinfo = panthor_show_fdinfo,
>  	.ioctls = panthor_drm_driver_ioctls,
>  	.num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls),
>  	.fops = &panthor_drm_driver_fops,
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index 5f7803b6fc48..751d1453e7a1 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -93,6 +93,8 @@
>  #define MIN_CSGS				3
>  #define MAX_CSG_PRIO				0xf
>  
> +#define SLOTSIZE				(NUM_INSTRS_PER_SLOT * sizeof(u64))

Worth moving NUM_INSTRS_PER_SLOT here too?

> +
>  struct panthor_group;
>  
>  /**
> @@ -393,7 +395,13 @@ struct panthor_queue {
>  #define CSF_MAX_QUEUE_PRIO	GENMASK(3, 0)
>  
>  	/** @ringbuf: Command stream ring-buffer. */
> -	struct panthor_kernel_bo *ringbuf;
> +	struct {
> +		/** @ringbuf: Kernel BO that holds ring buffer. */
> +		struct panthor_kernel_bo *bo;
> +
> +		/** @nelem: Number of slots in the ring buffer. */
> +		unsigned int nelem;

I'm not convinced this nelem is needed. The only place it is used is to check that
job->ringbuf_idx is not too big, which is something we should do in queue_run_job()
function rather than in update_fdinfo_stats(). If we don't change ringbuf to a
structure the patch slims down by more than a dozen lines.

> +	} ringbuf;
>  
>  	/** @iface: Firmware interface. */
>  	struct {
> @@ -466,6 +474,9 @@ struct panthor_queue {
>  		 */
>  		struct list_head in_flight_jobs;
>  	} fence_ctx;
> +
> +	/** @time_offset: Offset of fdinfo stats structs in queue's syncobj. */
> +	unsigned long time_offset;
>  };
>  
>  /**
> @@ -580,7 +591,26 @@ struct panthor_group {
>  	 * One sync object per queue. The position of the sync object is
>  	 * determined by the queue index.
>  	 */
> -	struct panthor_kernel_bo *syncobjs;
> +
> +	struct {
> +		/** @bo: Kernel BO holding the sync objects. */
> +		struct panthor_kernel_bo *bo;
> +
> +		/** @times_offset: Beginning of time stats after objects of sync pool. */
> +		size_t times_offset;
> +	} syncobjs;
> +
> +	/** @fdinfo: Per-file total cycle and timestamp values reference. */
> +	struct {
> +		/** @data: Pointer to actual per-file sample data. */
> +		struct panthor_gpu_usage *data;
> +
> +		/**
> +		 * @lock: Mutex to govern concurrent access from drm file's fdinfo callback
> +		 * and job post-completion processing function
> +		 */
> +		struct mutex lock;
> +	} fdinfo;
>  
>  	/** @state: Group state. */
>  	enum panthor_group_state state;
> @@ -639,6 +669,18 @@ struct panthor_group {
>  	struct list_head wait_node;
>  };
>  
> +struct panthor_job_times {
> +	struct {
> +		u64 before;
> +		u64 after;
> +	} cycles;
> +
> +	struct {
> +		u64 before;
> +		u64 after;
> +	} time;
> +};
> +
>  /**
>   * group_queue_work() - Queue a group work
>   * @group: Group to queue the work for.
> @@ -718,6 +760,9 @@ struct panthor_job {
>  	/** @queue_idx: Index of the queue inside @group. */
>  	u32 queue_idx;
>  
> +	/** @ringbuf_idx: Index of the queue inside @queue. */

Index of the ringbuffer inside @queue.

> +	u32 ringbuf_idx;
> +
>  	/** @call_info: Information about the userspace command stream call. */
>  	struct {
>  		/** @start: GPU address of the userspace command stream. */
> @@ -814,7 +859,7 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
>  
>  	panthor_queue_put_syncwait_obj(queue);
>  
> -	panthor_kernel_bo_destroy(group->vm, queue->ringbuf);
> +	panthor_kernel_bo_destroy(group->vm, queue->ringbuf.bo);
>  	panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem);
>  
>  	kfree(queue);
> @@ -828,12 +873,14 @@ static void group_release_work(struct work_struct *work)
>  	struct panthor_device *ptdev = group->ptdev;
>  	u32 i;
>  
> +	mutex_destroy(&group->fdinfo.lock);
> +
>  	for (i = 0; i < group->queue_count; i++)
>  		group_free_queue(group, group->queues[i]);
>  
>  	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf);
>  	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf);
> -	panthor_kernel_bo_destroy(group->vm, group->syncobjs);
> +	panthor_kernel_bo_destroy(group->vm, group->syncobjs.bo);
>  
>  	panthor_vm_put(group->vm);
>  	kfree(group);
> @@ -970,8 +1017,8 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
>  	queue->iface.input->extract = queue->iface.output->extract;
>  	drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
>  
> -	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf);
> -	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
> +	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf.bo);
> +	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf.bo);
>  	cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
>  	cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
>  	cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) |
> @@ -1926,7 +1973,7 @@ tick_ctx_init(struct panthor_scheduler *sched,
>  	}
>  }
>  
> -#define NUM_INSTRS_PER_SLOT		16
> +#define NUM_INSTRS_PER_SLOT		32

I guess this macro has to be a value that is a power of 2, as it used to divide the ringbuffer size, right?

>  
>  static void
>  group_term_post_processing(struct panthor_group *group)
> @@ -1964,7 +2011,7 @@ group_term_post_processing(struct panthor_group *group)
>  		spin_unlock(&queue->fence_ctx.lock);
>  
>  		/* Manually update the syncobj seqno to unblock waiters. */
> -		syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
> +		syncobj = group->syncobjs.bo->kmap + (i * sizeof(*syncobj));
>  		syncobj->status = ~0;
>  		syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
>  		sched_queue_work(group->ptdev->scheduler, sync_upd);
> @@ -2715,6 +2762,30 @@ void panthor_sched_post_reset(struct panthor_device *ptdev)
>  	sched_queue_work(sched, sync_upd);
>  }
>  
> +static void update_fdinfo_stats(struct panthor_job *job)
> +{
> +	struct panthor_group *group = job->group;
> +	struct panthor_queue *queue = group->queues[job->queue_idx];
> +	struct panthor_device *ptdev = group->ptdev;
> +	struct panthor_gpu_usage *fdinfo;
> +	struct panthor_job_times *times;
> +
> +	if (drm_WARN_ON(&ptdev->base, job->ringbuf_idx >= queue->ringbuf.nelem))
> +		return;
> +
> +	times = (struct panthor_job_times *)
> +		((unsigned long)group->syncobjs.bo->kmap + queue->time_offset +
> +		 (job->ringbuf_idx * sizeof(struct panthor_job_times)));
> +
> +	mutex_lock(&group->fdinfo.lock);
> +	if ((group->fdinfo.data)) {
> +		fdinfo = group->fdinfo.data;
> +		fdinfo->cycles += times->cycles.after - times->cycles.before;
> +		fdinfo->time += times->time.after - times->time.before;
> +	}
> +	mutex_unlock(&group->fdinfo.lock);
> +}
> +
>  static void group_sync_upd_work(struct work_struct *work)
>  {
>  	struct panthor_group *group =
> @@ -2732,7 +2803,7 @@ static void group_sync_upd_work(struct work_struct *work)
>  		if (!queue)
>  			continue;
>  
> -		syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
> +		syncobj = group->syncobjs.bo->kmap + (queue_idx * sizeof(*syncobj));
>  
>  		spin_lock(&queue->fence_ctx.lock);
>  		list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
> @@ -2750,6 +2821,7 @@ static void group_sync_upd_work(struct work_struct *work)
>  	dma_fence_end_signalling(cookie);
>  
>  	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
> +		update_fdinfo_stats(job);
>  		list_del_init(&job->node);
>  		panthor_job_put(&job->base);
>  	}
> @@ -2765,13 +2837,19 @@ queue_run_job(struct drm_sched_job *sched_job)
>  	struct panthor_queue *queue = group->queues[job->queue_idx];
>  	struct panthor_device *ptdev = group->ptdev;
>  	struct panthor_scheduler *sched = ptdev->scheduler;
> -	u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
> +	u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf.bo);
>  	u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1);
> +	u32 ringbuf_index = ringbuf_insert / (SLOTSIZE);
>  	u64 addr_reg = ptdev->csif_info.cs_reg_count -
>  		       ptdev->csif_info.unpreserved_cs_reg_count;
>  	u64 val_reg = addr_reg + 2;
> -	u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
> -			job->queue_idx * sizeof(struct panthor_syncobj_64b);
> +	u64 cycle_reg = addr_reg;
> +	u64 time_reg = val_reg;
> +	u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs.bo) +
> +		job->queue_idx * sizeof(struct panthor_syncobj_64b);
> +	u64 times_addr = panthor_kernel_bo_gpuva(group->syncobjs.bo) + queue->time_offset +
> +		(ringbuf_index * sizeof(struct panthor_job_times));
> +
>  	u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
>  	struct dma_fence *done_fence;
>  	int ret;
> @@ -2783,6 +2861,18 @@ queue_run_job(struct drm_sched_job *sched_job)
>  		/* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
>  		(36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233,
>  
> +		/* MOV48 rX:rX+1, cycles_offset */
> +		(1ull << 56) | (cycle_reg << 48) | (times_addr + offsetof(struct panthor_job_times, cycles.before)),
> +
> +		/* MOV48 rX:rX+1, time_offset */
> +		(1ull << 56) | (time_reg << 48) | (times_addr + offsetof(struct panthor_job_times, time.before)),
> +
> +		/* STORE_STATE cycles */
> +		(40ull << 56) |  (cycle_reg << 40) | (1ll << 32),
> +
> +		/* STORE_STATE timer */
> +		(40ull << 56) |  (time_reg << 40) | (0ll << 32),
> +
>  		/* MOV48 rX:rX+1, cs.start */
>  		(1ull << 56) | (addr_reg << 48) | job->call_info.start,
>  
> @@ -2795,6 +2885,18 @@ queue_run_job(struct drm_sched_job *sched_job)
>  		/* CALL rX:rX+1, rX+2 */
>  		(32ull << 56) | (addr_reg << 40) | (val_reg << 32),
>  
> +		/* MOV48 rX:rX+1, cycles_offset */
> +		(1ull << 56) | (cycle_reg << 48) | (times_addr + offsetof(struct panthor_job_times, cycles.after)),
> +
> +		/* MOV48 rX:rX+1, time_offset */
> +		(1ull << 56) | (time_reg << 48) | (times_addr + offsetof(struct panthor_job_times, time.after)),
> +
> +		/* STORE_STATE cycles */
> +		(40ull << 56) |  (cycle_reg << 40) | (1ll << 32),
> +
> +		/* STORE_STATE timer */
> +		(40ull << 56) |  (time_reg << 40) | (0ll << 32),
> +
>  		/* MOV48 rX:rX+1, sync_addr */
>  		(1ull << 56) | (addr_reg << 48) | sync_addr,
>  
> @@ -2839,7 +2941,7 @@ queue_run_job(struct drm_sched_job *sched_job)
>  		       queue->fence_ctx.id,
>  		       atomic64_inc_return(&queue->fence_ctx.seqno));
>  
> -	memcpy(queue->ringbuf->kmap + ringbuf_insert,
> +	memcpy(queue->ringbuf.bo->kmap + ringbuf_insert,
>  	       call_instrs, sizeof(call_instrs));
>  
>  	panthor_job_get(&job->base);
> @@ -2849,6 +2951,7 @@ queue_run_job(struct drm_sched_job *sched_job)
>  
>  	job->ringbuf.start = queue->iface.input->insert;
>  	job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs);
> +	job->ringbuf_idx = ringbuf_index;
>  
>  	/* Make sure the ring buffer is updated before the INSERT
>  	 * register.
> @@ -2939,7 +3042,8 @@ static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
>  
>  static struct panthor_queue *
>  group_create_queue(struct panthor_group *group,
> -		   const struct drm_panthor_queue_create *args)
> +		   const struct drm_panthor_queue_create *args,
> +		   unsigned int slots_so_far)
>  {
>  	struct drm_gpu_scheduler *drm_sched;
>  	struct panthor_queue *queue;
> @@ -2965,21 +3069,23 @@ group_create_queue(struct panthor_group *group,
>  
>  	queue->priority = args->priority;
>  
> -	queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm,
> +	queue->ringbuf.bo = panthor_kernel_bo_create(group->ptdev, group->vm,
>  						  args->ringbuf_size,
>  						  DRM_PANTHOR_BO_NO_MMAP,
>  						  DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
>  						  DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
>  						  PANTHOR_VM_KERNEL_AUTO_VA);
> -	if (IS_ERR(queue->ringbuf)) {
> -		ret = PTR_ERR(queue->ringbuf);
> +	if (IS_ERR(queue->ringbuf.bo)) {
> +		ret = PTR_ERR(queue->ringbuf.bo);
>  		goto err_free_queue;
>  	}
>  
> -	ret = panthor_kernel_bo_vmap(queue->ringbuf);
> +	ret = panthor_kernel_bo_vmap(queue->ringbuf.bo);
>  	if (ret)
>  		goto err_free_queue;
>  
> +	queue->ringbuf.nelem = (args->ringbuf_size / (SLOTSIZE));
> +
>  	queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
>  							    &queue->iface.input,
>  							    &queue->iface.output,
> @@ -2990,6 +3096,9 @@ group_create_queue(struct panthor_group *group,
>  		goto err_free_queue;
>  	}
>  
> +	queue->time_offset = group->syncobjs.times_offset +
> +		(slots_so_far * sizeof(struct panthor_job_times));
> +
>  	ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
>  			     group->ptdev->scheduler->wq, 1,
>  			     args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),

You can use the newly added SLOTSIZE here.

> @@ -3020,6 +3129,7 @@ int panthor_group_create(struct panthor_file *pfile,
>  	struct panthor_scheduler *sched = ptdev->scheduler;
>  	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
>  	struct panthor_group *group = NULL;
> +	unsigned int total_slots;
>  	u32 gid, i, suspend_size;
>  	int ret;
>  
> @@ -3086,33 +3196,77 @@ int panthor_group_create(struct panthor_file *pfile,
>  		goto err_put_group;
>  	}
>  
> -	group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
> -						   group_args->queues.count *
> -						   sizeof(struct panthor_syncobj_64b),
> +	/*
> +	 * Need to add size for the fdinfo sample structs, as many as the sum
> +	 * of the number of job slots for every single queue ringbuffer.
> +	 */
> +
> +	for (i = 0, total_slots = 0; i < group_args->queues.count; i++)
> +		total_slots += (queue_args[i].ringbuf_size / (SLOTSIZE));

Minor nit: We should pre-compute here (group_args->queues.count * sizeof(struct panthor_syncobj_64b)) + \
total_slots * sizeof(struct panthor_job_times) and then use it later as argument to panthor_kernel_bo_create()
and memset().

> +
> +	/*
> +	 * Memory layout of group's syncobjs BO
> +	 * group->syncobjs.bo {
> +	 *	struct panthor_syncobj_64b sync1;
> +	 *	struct panthor_syncobj_64b sync2;
> +	 *		...
> +	 *		As many as group_args->queues.count
> +	 *		...
> +	 *	struct panthor_syncobj_64b syncn;
> +	 *	struct panthor_job_times queue_1slot_1
> +	 *	struct panthor_job_times queue_1slot_2
> +	 *		...
> +	 *		As many as queue[i].ringbuf_size / SLOTSIZE
> +	 *		...
> +	 *	struct panthor_job_times queue_1slot_p
> +	 *		...
> +	 *		As many as group_args->queues.count
> +	 *		...
> +	 *	struct panthor_job_times queue_nslot_1
> +	 *	struct panthor_job_times queue_nslot_2
> +	 *		...
> +	 *		As many as queue[n].ringbuf_size / SLOTSIZE
> +	 *	struct panthor_job_times queue_nslot_q

Minor nit: I find it more readable the form "queue1_slotP"... "queueN_slotQ".

> +	 *
> +	 *	Linearly, group->syncobjs.bo = {syncojb1,..,syncobjN,
> +	 *	{queue1 = {js1,..,jsp},..,queueN = {js1,..,jsq}}}
> +	 * }
> +	 *
> +	 */
> +
> +	group->syncobjs.bo = panthor_kernel_bo_create(ptdev, group->vm,
> +						   (group_args->queues.count *
> +						    sizeof(struct panthor_syncobj_64b))
> +						   + (total_slots * sizeof(struct panthor_job_times)),
>  						   DRM_PANTHOR_BO_NO_MMAP,
>  						   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
>  						   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
>  						   PANTHOR_VM_KERNEL_AUTO_VA);
> -	if (IS_ERR(group->syncobjs)) {
> -		ret = PTR_ERR(group->syncobjs);
> +	if (IS_ERR(group->syncobjs.bo)) {
> +		ret = PTR_ERR(group->syncobjs.bo);
>  		goto err_put_group;
>  	}
>  
> -	ret = panthor_kernel_bo_vmap(group->syncobjs);
> +	ret = panthor_kernel_bo_vmap(group->syncobjs.bo);
>  	if (ret)
>  		goto err_put_group;
>  
> -	memset(group->syncobjs->kmap, 0,
> -	       group_args->queues.count * sizeof(struct panthor_syncobj_64b));
> +	memset(group->syncobjs.bo->kmap, 0,
> +	       (group_args->queues.count * sizeof(struct panthor_syncobj_64b)) +
> +	       (total_slots * sizeof(struct panthor_job_times)));
>  
> -	for (i = 0; i < group_args->queues.count; i++) {
> -		group->queues[i] = group_create_queue(group, &queue_args[i]);
> +	group->syncobjs.times_offset =
> +		group_args->queues.count * sizeof(struct panthor_syncobj_64b);
> +
> +	for (i = 0, total_slots = 0; i < group_args->queues.count; i++) {
> +		group->queues[i] = group_create_queue(group, &queue_args[i], total_slots);
>  		if (IS_ERR(group->queues[i])) {
>  			ret = PTR_ERR(group->queues[i]);
>  			group->queues[i] = NULL;
>  			goto err_put_group;
>  		}
>  
> +		total_slots += (queue_args[i].ringbuf_size / (SLOTSIZE));
>  		group->queue_count++;
>  	}
>  
> @@ -3133,6 +3287,9 @@ int panthor_group_create(struct panthor_file *pfile,
>  	}
>  	mutex_unlock(&sched->reset.lock);
>  
> +	group->fdinfo.data = &pfile->stats;
> +	mutex_init(&group->fdinfo.lock);
> +
>  	return gid;
>  
>  err_put_group:
> @@ -3172,6 +3329,10 @@ int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
>  	mutex_unlock(&sched->lock);
>  	mutex_unlock(&sched->reset.lock);
>  
> +	mutex_lock(&group->fdinfo.lock);
> +	group->fdinfo.data = NULL;
> +	mutex_unlock(&group->fdinfo.lock);
> +
>  	group_put(group);
>  	return 0;
>  }
> -- 
> 2.43.0
>

I've tried to review the patch as best as I could, specially the math. AFAICT it all checks out,
would be good for others to have a look.

Best regards,
Liviu

-- 
====================
| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---------------
    ¯\_(ツ)_/¯

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/panthor: Enable fdinfo for cycle and time measurements
  2024-03-28 15:49   ` Liviu Dudau
@ 2024-04-23 15:51     ` Adrián Larumbe
  0 siblings, 0 replies; 6+ messages in thread
From: Adrián Larumbe @ 2024-04-23 15:51 UTC (permalink / raw)
  To: Liviu Dudau
  Cc: boris.brezillon, steven.price, maarten.lankhorst, mripard,
	tzimmermann, airlied, daniel, dri-devel, linux-kernel, kernel

Hi Liviu,

Thanks for your review. Also want to apologise for replying so late.
Today I'll be sending a v2 of this patch series after having applied 
all your suggestions.

On 28.03.2024 15:49, Liviu Dudau wrote:
> Hi Adrián,
> 
> Appologies for the delay in reviewing this.
> 
> On Tue, Mar 05, 2024 at 09:05:49PM +0000, Adrián Larumbe wrote:
> > These values are sampled by the firmware right before jumping into the UM
> > command stream and immediately after returning from it, and then kept inside a
> > per-job accounting structure. That structure is held inside the group's syncobjs
> > buffer object, at an offset that depends on the job's queue slot number and the
> > queue's index within the group.
> 
> I think this commit message is misleadingly short compared to the size of the
> changes. If I may, I would like to suggest that you split this commit into two
> parts, one introducing the changes in the ringbuf and syncobjs structures and
> the other exporting the statistics in the fdinfo.
> 
> > 
> > Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> > ---
> >  drivers/gpu/drm/panthor/panthor_devfreq.c |  10 +
> >  drivers/gpu/drm/panthor/panthor_device.h  |  11 ++
> >  drivers/gpu/drm/panthor/panthor_drv.c     |  31 ++++
> >  drivers/gpu/drm/panthor/panthor_sched.c   | 217 +++++++++++++++++++---
> >  4 files changed, 241 insertions(+), 28 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c
> > index 7ac4fa290f27..51a7b734edcd 100644
> > --- a/drivers/gpu/drm/panthor/panthor_devfreq.c
> > +++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
> > @@ -91,6 +91,7 @@ static int panthor_devfreq_get_dev_status(struct device *dev,
> >  	spin_lock_irqsave(&pdevfreq->lock, irqflags);
> >  
> >  	panthor_devfreq_update_utilization(pdevfreq);
> > +	ptdev->current_frequency = status->current_frequency;
> >  
> >  	status->total_time = ktime_to_ns(ktime_add(pdevfreq->busy_time,
> >  						   pdevfreq->idle_time));
> > @@ -130,6 +131,7 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
> >  	struct panthor_devfreq *pdevfreq;
> >  	struct dev_pm_opp *opp;
> >  	unsigned long cur_freq;
> > +	unsigned long freq = ULONG_MAX;
> >  	int ret;
> >  
> >  	pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL);
> > @@ -204,6 +206,14 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
> >  
> >  	dev_pm_opp_put(opp);
> >  
> > +	/* Find the fastest defined rate  */
> > +	opp = dev_pm_opp_find_freq_floor(dev, &freq);
> > +	if (IS_ERR(opp))
> > +		return PTR_ERR(opp);
> > +	ptdev->fast_rate = freq;
> > +
> > +	dev_pm_opp_put(opp);
> > +
> >  	/*
> >  	 * Setup default thresholds for the simple_ondemand governor.
> >  	 * The values are chosen based on experiments.
> > diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> > index 51c9d61b6796..10e970921ca3 100644
> > --- a/drivers/gpu/drm/panthor/panthor_device.h
> > +++ b/drivers/gpu/drm/panthor/panthor_device.h
> > @@ -162,6 +162,14 @@ struct panthor_device {
> >  		 */
> >  		u32 *dummy_latest_flush;
> >  	} pm;
> > +
> > +	unsigned long current_frequency;
> > +	unsigned long fast_rate;
> > +};
> > +
> > +struct panthor_gpu_usage {
> > +	u64 time;
> > +	u64 cycles;
> >  };
> >  
> >  /**
> > @@ -176,6 +184,9 @@ struct panthor_file {
> >  
> >  	/** @groups: Scheduling group pool attached to this file. */
> >  	struct panthor_group_pool *groups;
> > +
> > +	/** @stats: cycle and timestamp measures for job execution. */
> > +	struct panthor_gpu_usage stats;
> >  };
> >  
> >  int panthor_device_init(struct panthor_device *ptdev);
> > diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
> > index ff484506229f..fa06b9e2c6cd 100644
> > --- a/drivers/gpu/drm/panthor/panthor_drv.c
> > +++ b/drivers/gpu/drm/panthor/panthor_drv.c
> > @@ -3,6 +3,10 @@
> >  /* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
> >  /* Copyright 2019 Collabora ltd. */
> >  
> > +#ifdef CONFIG_HAVE_ARM_ARCH_TIMER
> > +#include <asm/arch_timer.h>
> > +#endif
> > +
> >  #include <linux/list.h>
> >  #include <linux/module.h>
> >  #include <linux/of_platform.h>
> > @@ -28,6 +32,8 @@
> >  #include "panthor_regs.h"
> >  #include "panthor_sched.h"
> >  
> > +#define NS_PER_SEC      1000000000ULL
> > +
> >  /**
> >   * DOC: user <-> kernel object copy helpers.
> >   */
> > @@ -1336,6 +1342,29 @@ static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
> >  	return ret;
> >  }
> >  
> > +static void panthor_gpu_show_fdinfo(struct panthor_device *ptdev,
> > +				    struct panthor_file *pfile,
> > +				    struct drm_printer *p)
> > +{
> > +#ifdef CONFIG_HAVE_ARM_ARCH_TIMER
> > +	drm_printf(p, "drm-engine-panthor:\t%llu ns\n",
> > +		   DIV_ROUND_UP_ULL((pfile->stats.time * NS_PER_SEC),
> > +				    arch_timer_get_cntfrq()));
> > +#endif
> > +	drm_printf(p, "drm-cycles-panthor:\t%llu\n", pfile->stats.cycles);
> > +	drm_printf(p, "drm-maxfreq-panthor:\t%lu Hz\n", ptdev->fast_rate);
> > +	drm_printf(p, "drm-curfreq-panthor:\t%lu Hz\n", ptdev->current_frequency);
> > +}
> > +
> > +static void panthor_show_fdinfo(struct drm_printer *p, struct drm_file *file)
> > +{
> > +	struct drm_device *dev = file->minor->dev;
> > +	struct panthor_device *ptdev = container_of(dev, struct panthor_device, base);
> > +
> > +	panthor_gpu_show_fdinfo(ptdev, file->driver_priv, p);
> > +
> > +}
> > +
> >  static const struct file_operations panthor_drm_driver_fops = {
> >  	.open = drm_open,
> >  	.release = drm_release,
> > @@ -1345,6 +1374,7 @@ static const struct file_operations panthor_drm_driver_fops = {
> >  	.read = drm_read,
> >  	.llseek = noop_llseek,
> >  	.mmap = panthor_mmap,
> > +	.show_fdinfo = drm_show_fdinfo,
> >  };
> >  
> >  #ifdef CONFIG_DEBUG_FS
> > @@ -1363,6 +1393,7 @@ static const struct drm_driver panthor_drm_driver = {
> >  			   DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
> >  	.open = panthor_open,
> >  	.postclose = panthor_postclose,
> > +	.show_fdinfo = panthor_show_fdinfo,
> >  	.ioctls = panthor_drm_driver_ioctls,
> >  	.num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls),
> >  	.fops = &panthor_drm_driver_fops,
> > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> > index 5f7803b6fc48..751d1453e7a1 100644
> > --- a/drivers/gpu/drm/panthor/panthor_sched.c
> > +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> > @@ -93,6 +93,8 @@
> >  #define MIN_CSGS				3
> >  #define MAX_CSG_PRIO				0xf
> >  
> > +#define SLOTSIZE				(NUM_INSTRS_PER_SLOT * sizeof(u64))
> 
> Worth moving NUM_INSTRS_PER_SLOT here too?
> 
> > +
> >  struct panthor_group;
> >  
> >  /**
> > @@ -393,7 +395,13 @@ struct panthor_queue {
> >  #define CSF_MAX_QUEUE_PRIO	GENMASK(3, 0)
> >  
> >  	/** @ringbuf: Command stream ring-buffer. */
> > -	struct panthor_kernel_bo *ringbuf;
> > +	struct {
> > +		/** @ringbuf: Kernel BO that holds ring buffer. */
> > +		struct panthor_kernel_bo *bo;
> > +
> > +		/** @nelem: Number of slots in the ring buffer. */
> > +		unsigned int nelem;
> 
> I'm not convinced this nelem is needed. The only place it is used is to check that
> job->ringbuf_idx is not too big, which is something we should do in queue_run_job()
> function rather than in update_fdinfo_stats(). If we don't change ringbuf to a
> structure the patch slims down by more than a dozen lines.
> 
> > +	} ringbuf;
> >  
> >  	/** @iface: Firmware interface. */
> >  	struct {
> > @@ -466,6 +474,9 @@ struct panthor_queue {
> >  		 */
> >  		struct list_head in_flight_jobs;
> >  	} fence_ctx;
> > +
> > +	/** @time_offset: Offset of fdinfo stats structs in queue's syncobj. */
> > +	unsigned long time_offset;
> >  };
> >  
> >  /**
> > @@ -580,7 +591,26 @@ struct panthor_group {
> >  	 * One sync object per queue. The position of the sync object is
> >  	 * determined by the queue index.
> >  	 */
> > -	struct panthor_kernel_bo *syncobjs;
> > +
> > +	struct {
> > +		/** @bo: Kernel BO holding the sync objects. */
> > +		struct panthor_kernel_bo *bo;
> > +
> > +		/** @times_offset: Beginning of time stats after objects of sync pool. */
> > +		size_t times_offset;
> > +	} syncobjs;
> > +
> > +	/** @fdinfo: Per-file total cycle and timestamp values reference. */
> > +	struct {
> > +		/** @data: Pointer to actual per-file sample data. */
> > +		struct panthor_gpu_usage *data;
> > +
> > +		/**
> > +		 * @lock: Mutex to govern concurrent access from drm file's fdinfo callback
> > +		 * and job post-completion processing function
> > +		 */
> > +		struct mutex lock;
> > +	} fdinfo;
> >  
> >  	/** @state: Group state. */
> >  	enum panthor_group_state state;
> > @@ -639,6 +669,18 @@ struct panthor_group {
> >  	struct list_head wait_node;
> >  };
> >  
> > +struct panthor_job_times {
> > +	struct {
> > +		u64 before;
> > +		u64 after;
> > +	} cycles;
> > +
> > +	struct {
> > +		u64 before;
> > +		u64 after;
> > +	} time;
> > +};
> > +
> >  /**
> >   * group_queue_work() - Queue a group work
> >   * @group: Group to queue the work for.
> > @@ -718,6 +760,9 @@ struct panthor_job {
> >  	/** @queue_idx: Index of the queue inside @group. */
> >  	u32 queue_idx;
> >  
> > +	/** @ringbuf_idx: Index of the queue inside @queue. */
> 
> Index of the ringbuffer inside @queue.
> 
> > +	u32 ringbuf_idx;
> > +
> >  	/** @call_info: Information about the userspace command stream call. */
> >  	struct {
> >  		/** @start: GPU address of the userspace command stream. */
> > @@ -814,7 +859,7 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue *
> >  
> >  	panthor_queue_put_syncwait_obj(queue);
> >  
> > -	panthor_kernel_bo_destroy(group->vm, queue->ringbuf);
> > +	panthor_kernel_bo_destroy(group->vm, queue->ringbuf.bo);
> >  	panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem);
> >  
> >  	kfree(queue);
> > @@ -828,12 +873,14 @@ static void group_release_work(struct work_struct *work)
> >  	struct panthor_device *ptdev = group->ptdev;
> >  	u32 i;
> >  
> > +	mutex_destroy(&group->fdinfo.lock);
> > +
> >  	for (i = 0; i < group->queue_count; i++)
> >  		group_free_queue(group, group->queues[i]);
> >  
> >  	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf);
> >  	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf);
> > -	panthor_kernel_bo_destroy(group->vm, group->syncobjs);
> > +	panthor_kernel_bo_destroy(group->vm, group->syncobjs.bo);
> >  
> >  	panthor_vm_put(group->vm);
> >  	kfree(group);
> > @@ -970,8 +1017,8 @@ cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
> >  	queue->iface.input->extract = queue->iface.output->extract;
> >  	drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
> >  
> > -	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf);
> > -	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
> > +	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf.bo);
> > +	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf.bo);
> >  	cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
> >  	cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
> >  	cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) |
> > @@ -1926,7 +1973,7 @@ tick_ctx_init(struct panthor_scheduler *sched,
> >  	}
> >  }
> >  
> > -#define NUM_INSTRS_PER_SLOT		16
> > +#define NUM_INSTRS_PER_SLOT		32
> 
> I guess this macro has to be a value that is a power of 2, as it used to divide the ringbuffer size, right?
> 
> >  
> >  static void
> >  group_term_post_processing(struct panthor_group *group)
> > @@ -1964,7 +2011,7 @@ group_term_post_processing(struct panthor_group *group)
> >  		spin_unlock(&queue->fence_ctx.lock);
> >  
> >  		/* Manually update the syncobj seqno to unblock waiters. */
> > -		syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
> > +		syncobj = group->syncobjs.bo->kmap + (i * sizeof(*syncobj));
> >  		syncobj->status = ~0;
> >  		syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
> >  		sched_queue_work(group->ptdev->scheduler, sync_upd);
> > @@ -2715,6 +2762,30 @@ void panthor_sched_post_reset(struct panthor_device *ptdev)
> >  	sched_queue_work(sched, sync_upd);
> >  }
> >  
> > +static void update_fdinfo_stats(struct panthor_job *job)
> > +{
> > +	struct panthor_group *group = job->group;
> > +	struct panthor_queue *queue = group->queues[job->queue_idx];
> > +	struct panthor_device *ptdev = group->ptdev;
> > +	struct panthor_gpu_usage *fdinfo;
> > +	struct panthor_job_times *times;
> > +
> > +	if (drm_WARN_ON(&ptdev->base, job->ringbuf_idx >= queue->ringbuf.nelem))
> > +		return;
> > +
> > +	times = (struct panthor_job_times *)
> > +		((unsigned long)group->syncobjs.bo->kmap + queue->time_offset +
> > +		 (job->ringbuf_idx * sizeof(struct panthor_job_times)));
> > +
> > +	mutex_lock(&group->fdinfo.lock);
> > +	if ((group->fdinfo.data)) {
> > +		fdinfo = group->fdinfo.data;
> > +		fdinfo->cycles += times->cycles.after - times->cycles.before;
> > +		fdinfo->time += times->time.after - times->time.before;
> > +	}
> > +	mutex_unlock(&group->fdinfo.lock);
> > +}
> > +
> >  static void group_sync_upd_work(struct work_struct *work)
> >  {
> >  	struct panthor_group *group =
> > @@ -2732,7 +2803,7 @@ static void group_sync_upd_work(struct work_struct *work)
> >  		if (!queue)
> >  			continue;
> >  
> > -		syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
> > +		syncobj = group->syncobjs.bo->kmap + (queue_idx * sizeof(*syncobj));
> >  
> >  		spin_lock(&queue->fence_ctx.lock);
> >  		list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
> > @@ -2750,6 +2821,7 @@ static void group_sync_upd_work(struct work_struct *work)
> >  	dma_fence_end_signalling(cookie);
> >  
> >  	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
> > +		update_fdinfo_stats(job);
> >  		list_del_init(&job->node);
> >  		panthor_job_put(&job->base);
> >  	}
> > @@ -2765,13 +2837,19 @@ queue_run_job(struct drm_sched_job *sched_job)
> >  	struct panthor_queue *queue = group->queues[job->queue_idx];
> >  	struct panthor_device *ptdev = group->ptdev;
> >  	struct panthor_scheduler *sched = ptdev->scheduler;
> > -	u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
> > +	u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf.bo);
> >  	u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1);
> > +	u32 ringbuf_index = ringbuf_insert / (SLOTSIZE);
> >  	u64 addr_reg = ptdev->csif_info.cs_reg_count -
> >  		       ptdev->csif_info.unpreserved_cs_reg_count;
> >  	u64 val_reg = addr_reg + 2;
> > -	u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
> > -			job->queue_idx * sizeof(struct panthor_syncobj_64b);
> > +	u64 cycle_reg = addr_reg;
> > +	u64 time_reg = val_reg;
> > +	u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs.bo) +
> > +		job->queue_idx * sizeof(struct panthor_syncobj_64b);
> > +	u64 times_addr = panthor_kernel_bo_gpuva(group->syncobjs.bo) + queue->time_offset +
> > +		(ringbuf_index * sizeof(struct panthor_job_times));
> > +
> >  	u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
> >  	struct dma_fence *done_fence;
> >  	int ret;
> > @@ -2783,6 +2861,18 @@ queue_run_job(struct drm_sched_job *sched_job)
> >  		/* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
> >  		(36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233,
> >  
> > +		/* MOV48 rX:rX+1, cycles_offset */
> > +		(1ull << 56) | (cycle_reg << 48) | (times_addr + offsetof(struct panthor_job_times, cycles.before)),
> > +
> > +		/* MOV48 rX:rX+1, time_offset */
> > +		(1ull << 56) | (time_reg << 48) | (times_addr + offsetof(struct panthor_job_times, time.before)),
> > +
> > +		/* STORE_STATE cycles */
> > +		(40ull << 56) |  (cycle_reg << 40) | (1ll << 32),
> > +
> > +		/* STORE_STATE timer */
> > +		(40ull << 56) |  (time_reg << 40) | (0ll << 32),
> > +
> >  		/* MOV48 rX:rX+1, cs.start */
> >  		(1ull << 56) | (addr_reg << 48) | job->call_info.start,
> >  
> > @@ -2795,6 +2885,18 @@ queue_run_job(struct drm_sched_job *sched_job)
> >  		/* CALL rX:rX+1, rX+2 */
> >  		(32ull << 56) | (addr_reg << 40) | (val_reg << 32),
> >  
> > +		/* MOV48 rX:rX+1, cycles_offset */
> > +		(1ull << 56) | (cycle_reg << 48) | (times_addr + offsetof(struct panthor_job_times, cycles.after)),
> > +
> > +		/* MOV48 rX:rX+1, time_offset */
> > +		(1ull << 56) | (time_reg << 48) | (times_addr + offsetof(struct panthor_job_times, time.after)),
> > +
> > +		/* STORE_STATE cycles */
> > +		(40ull << 56) |  (cycle_reg << 40) | (1ll << 32),
> > +
> > +		/* STORE_STATE timer */
> > +		(40ull << 56) |  (time_reg << 40) | (0ll << 32),
> > +
> >  		/* MOV48 rX:rX+1, sync_addr */
> >  		(1ull << 56) | (addr_reg << 48) | sync_addr,
> >  
> > @@ -2839,7 +2941,7 @@ queue_run_job(struct drm_sched_job *sched_job)
> >  		       queue->fence_ctx.id,
> >  		       atomic64_inc_return(&queue->fence_ctx.seqno));
> >  
> > -	memcpy(queue->ringbuf->kmap + ringbuf_insert,
> > +	memcpy(queue->ringbuf.bo->kmap + ringbuf_insert,
> >  	       call_instrs, sizeof(call_instrs));
> >  
> >  	panthor_job_get(&job->base);
> > @@ -2849,6 +2951,7 @@ queue_run_job(struct drm_sched_job *sched_job)
> >  
> >  	job->ringbuf.start = queue->iface.input->insert;
> >  	job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs);
> > +	job->ringbuf_idx = ringbuf_index;
> >  
> >  	/* Make sure the ring buffer is updated before the INSERT
> >  	 * register.
> > @@ -2939,7 +3042,8 @@ static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
> >  
> >  static struct panthor_queue *
> >  group_create_queue(struct panthor_group *group,
> > -		   const struct drm_panthor_queue_create *args)
> > +		   const struct drm_panthor_queue_create *args,
> > +		   unsigned int slots_so_far)
> >  {
> >  	struct drm_gpu_scheduler *drm_sched;
> >  	struct panthor_queue *queue;
> > @@ -2965,21 +3069,23 @@ group_create_queue(struct panthor_group *group,
> >  
> >  	queue->priority = args->priority;
> >  
> > -	queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm,
> > +	queue->ringbuf.bo = panthor_kernel_bo_create(group->ptdev, group->vm,
> >  						  args->ringbuf_size,
> >  						  DRM_PANTHOR_BO_NO_MMAP,
> >  						  DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
> >  						  DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
> >  						  PANTHOR_VM_KERNEL_AUTO_VA);
> > -	if (IS_ERR(queue->ringbuf)) {
> > -		ret = PTR_ERR(queue->ringbuf);
> > +	if (IS_ERR(queue->ringbuf.bo)) {
> > +		ret = PTR_ERR(queue->ringbuf.bo);
> >  		goto err_free_queue;
> >  	}
> >  
> > -	ret = panthor_kernel_bo_vmap(queue->ringbuf);
> > +	ret = panthor_kernel_bo_vmap(queue->ringbuf.bo);
> >  	if (ret)
> >  		goto err_free_queue;
> >  
> > +	queue->ringbuf.nelem = (args->ringbuf_size / (SLOTSIZE));
> > +
> >  	queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
> >  							    &queue->iface.input,
> >  							    &queue->iface.output,
> > @@ -2990,6 +3096,9 @@ group_create_queue(struct panthor_group *group,
> >  		goto err_free_queue;
> >  	}
> >  
> > +	queue->time_offset = group->syncobjs.times_offset +
> > +		(slots_so_far * sizeof(struct panthor_job_times));
> > +
> >  	ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
> >  			     group->ptdev->scheduler->wq, 1,
> >  			     args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),
> 
> You can use the newly added SLOTSIZE here.
> 
> > @@ -3020,6 +3129,7 @@ int panthor_group_create(struct panthor_file *pfile,
> >  	struct panthor_scheduler *sched = ptdev->scheduler;
> >  	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
> >  	struct panthor_group *group = NULL;
> > +	unsigned int total_slots;
> >  	u32 gid, i, suspend_size;
> >  	int ret;
> >  
> > @@ -3086,33 +3196,77 @@ int panthor_group_create(struct panthor_file *pfile,
> >  		goto err_put_group;
> >  	}
> >  
> > -	group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
> > -						   group_args->queues.count *
> > -						   sizeof(struct panthor_syncobj_64b),
> > +	/*
> > +	 * Need to add size for the fdinfo sample structs, as many as the sum
> > +	 * of the number of job slots for every single queue ringbuffer.
> > +	 */
> > +
> > +	for (i = 0, total_slots = 0; i < group_args->queues.count; i++)
> > +		total_slots += (queue_args[i].ringbuf_size / (SLOTSIZE));
> 
> Minor nit: We should pre-compute here (group_args->queues.count * sizeof(struct panthor_syncobj_64b)) + \
> total_slots * sizeof(struct panthor_job_times) and then use it later as argument to panthor_kernel_bo_create()
> and memset().
> 
> > +
> > +	/*
> > +	 * Memory layout of group's syncobjs BO
> > +	 * group->syncobjs.bo {
> > +	 *	struct panthor_syncobj_64b sync1;
> > +	 *	struct panthor_syncobj_64b sync2;
> > +	 *		...
> > +	 *		As many as group_args->queues.count
> > +	 *		...
> > +	 *	struct panthor_syncobj_64b syncn;
> > +	 *	struct panthor_job_times queue_1slot_1
> > +	 *	struct panthor_job_times queue_1slot_2
> > +	 *		...
> > +	 *		As many as queue[i].ringbuf_size / SLOTSIZE
> > +	 *		...
> > +	 *	struct panthor_job_times queue_1slot_p
> > +	 *		...
> > +	 *		As many as group_args->queues.count
> > +	 *		...
> > +	 *	struct panthor_job_times queue_nslot_1
> > +	 *	struct panthor_job_times queue_nslot_2
> > +	 *		...
> > +	 *		As many as queue[n].ringbuf_size / SLOTSIZE
> > +	 *	struct panthor_job_times queue_nslot_q
> 
> Minor nit: I find it more readable the form "queue1_slotP"... "queueN_slotQ".
> 
> > +	 *
> > +	 *	Linearly, group->syncobjs.bo = {syncojb1,..,syncobjN,
> > +	 *	{queue1 = {js1,..,jsp},..,queueN = {js1,..,jsq}}}
> > +	 * }
> > +	 *
> > +	 */
> > +
> > +	group->syncobjs.bo = panthor_kernel_bo_create(ptdev, group->vm,
> > +						   (group_args->queues.count *
> > +						    sizeof(struct panthor_syncobj_64b))
> > +						   + (total_slots * sizeof(struct panthor_job_times)),
> >  						   DRM_PANTHOR_BO_NO_MMAP,
> >  						   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
> >  						   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
> >  						   PANTHOR_VM_KERNEL_AUTO_VA);
> > -	if (IS_ERR(group->syncobjs)) {
> > -		ret = PTR_ERR(group->syncobjs);
> > +	if (IS_ERR(group->syncobjs.bo)) {
> > +		ret = PTR_ERR(group->syncobjs.bo);
> >  		goto err_put_group;
> >  	}
> >  
> > -	ret = panthor_kernel_bo_vmap(group->syncobjs);
> > +	ret = panthor_kernel_bo_vmap(group->syncobjs.bo);
> >  	if (ret)
> >  		goto err_put_group;
> >  
> > -	memset(group->syncobjs->kmap, 0,
> > -	       group_args->queues.count * sizeof(struct panthor_syncobj_64b));
> > +	memset(group->syncobjs.bo->kmap, 0,
> > +	       (group_args->queues.count * sizeof(struct panthor_syncobj_64b)) +
> > +	       (total_slots * sizeof(struct panthor_job_times)));
> >  
> > -	for (i = 0; i < group_args->queues.count; i++) {
> > -		group->queues[i] = group_create_queue(group, &queue_args[i]);
> > +	group->syncobjs.times_offset =
> > +		group_args->queues.count * sizeof(struct panthor_syncobj_64b);
> > +
> > +	for (i = 0, total_slots = 0; i < group_args->queues.count; i++) {
> > +		group->queues[i] = group_create_queue(group, &queue_args[i], total_slots);
> >  		if (IS_ERR(group->queues[i])) {
> >  			ret = PTR_ERR(group->queues[i]);
> >  			group->queues[i] = NULL;
> >  			goto err_put_group;
> >  		}
> >  
> > +		total_slots += (queue_args[i].ringbuf_size / (SLOTSIZE));
> >  		group->queue_count++;
> >  	}
> >  
> > @@ -3133,6 +3287,9 @@ int panthor_group_create(struct panthor_file *pfile,
> >  	}
> >  	mutex_unlock(&sched->reset.lock);
> >  
> > +	group->fdinfo.data = &pfile->stats;
> > +	mutex_init(&group->fdinfo.lock);
> > +
> >  	return gid;
> >  
> >  err_put_group:
> > @@ -3172,6 +3329,10 @@ int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
> >  	mutex_unlock(&sched->lock);
> >  	mutex_unlock(&sched->reset.lock);
> >  
> > +	mutex_lock(&group->fdinfo.lock);
> > +	group->fdinfo.data = NULL;
> > +	mutex_unlock(&group->fdinfo.lock);
> > +
> >  	group_put(group);
> >  	return 0;
> >  }
> > -- 
> > 2.43.0
> >
> 
> I've tried to review the patch as best as I could, specially the math. AFAICT it all checks out,
> would be good for others to have a look.
> 
> Best regards,
> Liviu
> 
> -- 
> ====================
> | I would like to |
> | fix the world,  |
> | but they're not |
> | giving me the   |
>  \ source code!  /
>   ---------------
>     ¯\_(ツ)_/¯

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-04-23 15:51 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-05 21:05 [PATCH 0/2] Support fdinfo runtime and memory stats on Panthor Adrián Larumbe
2024-03-05 21:05 ` [PATCH 1/2] drm/panthor: Enable fdinfo for cycle and time measurements Adrián Larumbe
2024-03-08  8:47   ` kernel test robot
2024-03-28 15:49   ` Liviu Dudau
2024-04-23 15:51     ` Adrián Larumbe
2024-03-05 21:05 ` [PATCH 2/2] drm/panthor: Enable fdinfo for memory stats Adrián Larumbe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.