linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] Add fdinfo support to Panfrost
@ 2023-08-08 22:22 Adrián Larumbe
  2023-08-08 22:22 ` [PATCH 1/2] drm/panfrost: " Adrián Larumbe
  2023-08-08 22:22 ` [PATCH 2/2] drm/panfrost: Add drm memory stats display through fdinfo Adrián Larumbe
  0 siblings, 2 replies; 6+ messages in thread
From: Adrián Larumbe @ 2023-08-08 22:22 UTC (permalink / raw)
  To: robh, steven.price, airlied, daniel
  Cc: dri-devel, kernel, linux-kernel, adrian.larumbe

This patch series adds basic fdinfo support to the Panfrost DRM driver.
It will display a series of key:value pairs under /proc/pid/fdinfo/fd
for render processes that open the Panfrost DRM file.

The pairs contain basic drm gpu engine and memory region information that
can either be cat by a privileged user or accessed with IGT's gputop
utility.

Adrián Larumbe (2):
  drm/panfrost: Add fdinfo support to Panfrost
  drm/panfrost: Add drm memory stats display through fdinfo

 drivers/gpu/drm/panfrost/panfrost_device.c | 12 +++++++
 drivers/gpu/drm/panfrost/panfrost_device.h | 10 ++++++
 drivers/gpu/drm/panfrost/panfrost_drv.c    | 40 ++++++++++++++++++++--
 drivers/gpu/drm/panfrost/panfrost_gem.c    | 16 +++++++++
 drivers/gpu/drm/panfrost/panfrost_gem.h    |  1 +
 drivers/gpu/drm/panfrost/panfrost_job.c    |  6 ++++
 drivers/gpu/drm/panfrost/panfrost_job.h    |  3 ++
 7 files changed, 85 insertions(+), 3 deletions(-)

-- 
2.41.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] drm/panfrost: Add fdinfo support to Panfrost
  2023-08-08 22:22 [PATCH 0/2] Add fdinfo support to Panfrost Adrián Larumbe
@ 2023-08-08 22:22 ` Adrián Larumbe
  2023-08-21 15:56   ` Steven Price
  2023-08-08 22:22 ` [PATCH 2/2] drm/panfrost: Add drm memory stats display through fdinfo Adrián Larumbe
  1 sibling, 1 reply; 6+ messages in thread
From: Adrián Larumbe @ 2023-08-08 22:22 UTC (permalink / raw)
  To: robh, steven.price, airlied, daniel
  Cc: dri-devel, kernel, linux-kernel, adrian.larumbe

We calculate the amount of time the GPU spends on a job with ktime samples,
and then add it to the cumulative total for the open DRM file, which is
what will be eventually exposed through the 'fdinfo' DRM file descriptor.

Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
---
 drivers/gpu/drm/panfrost/panfrost_device.c | 12 ++++++++
 drivers/gpu/drm/panfrost/panfrost_device.h | 10 +++++++
 drivers/gpu/drm/panfrost/panfrost_drv.c    | 32 +++++++++++++++++++++-
 drivers/gpu/drm/panfrost/panfrost_job.c    |  6 ++++
 drivers/gpu/drm/panfrost/panfrost_job.h    |  3 ++
 5 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index fa1a086a862b..67a5e894d037 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -401,6 +401,18 @@ void panfrost_device_reset(struct panfrost_device *pfdev)
 	panfrost_job_enable_interrupts(pfdev);
 }
 
+struct drm_info_gpu panfrost_device_get_counters(struct panfrost_device *pfdev,
+						 struct panfrost_file_priv *panfrost_priv)
+{
+	struct drm_info_gpu gpu_info;
+
+	gpu_info.engine =  panfrost_priv->elapsed_ns;
+	gpu_info.cycles =  panfrost_priv->elapsed_ns * clk_get_rate(pfdev->clock);
+	gpu_info.maxfreq =  clk_get_rate(pfdev->clock);
+
+	return gpu_info;
+}
+
 static int panfrost_device_resume(struct device *dev)
 {
 	struct panfrost_device *pfdev = dev_get_drvdata(dev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index b0126b9fbadc..4621a2ece1bb 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -141,6 +141,14 @@ struct panfrost_file_priv {
 	struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
 
 	struct panfrost_mmu *mmu;
+
+	uint64_t elapsed_ns;
+};
+
+struct drm_info_gpu {
+	unsigned long long engine;
+	unsigned long long cycles;
+	unsigned int maxfreq;
 };
 
 static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev)
@@ -172,6 +180,8 @@ int panfrost_unstable_ioctl_check(void);
 int panfrost_device_init(struct panfrost_device *pfdev);
 void panfrost_device_fini(struct panfrost_device *pfdev);
 void panfrost_device_reset(struct panfrost_device *pfdev);
+struct drm_info_gpu panfrost_device_get_counters(struct panfrost_device *pfdev,
+						 struct panfrost_file_priv *panfrost_priv);
 
 extern const struct dev_pm_ops panfrost_pm_ops;
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index a2ab99698ca8..65fdc0e4c7cb 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -3,6 +3,7 @@
 /* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
 /* Copyright 2019 Collabora ltd. */
 
+#include "drm/drm_file.h"
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/pagemap.h>
@@ -267,6 +268,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
 	job->requirements = args->requirements;
 	job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
 	job->mmu = file_priv->mmu;
+	job->priv = file_priv;
 
 	slot = panfrost_job_get_slot(job);
 
@@ -523,7 +525,34 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
 	PANFROST_IOCTL(MADVISE,		madvise,	DRM_RENDER_ALLOW),
 };
 
-DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops);
+
+static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
+				     struct panfrost_file_priv *panfrost_priv,
+				     struct drm_printer *p)
+{
+	struct drm_info_gpu gpu_info;
+
+	gpu_info = panfrost_device_get_counters(pfdev, panfrost_priv);
+
+	drm_printf(p, "drm-engine-gpu:\t%llu ns\n", gpu_info.engine);
+	drm_printf(p, "drm-cycles-gpu:\t%llu\n", gpu_info.cycles);
+	drm_printf(p, "drm-maxfreq-gpu:\t%u Hz\n", gpu_info.maxfreq);
+}
+
+static void panfrost_show_fdinfo(struct drm_printer *p, struct drm_file *file)
+{
+	struct drm_device *dev = file->minor->dev;
+	struct panfrost_device *pfdev = dev->dev_private;
+
+	panfrost_gpu_show_fdinfo(pfdev, file->driver_priv, p);
+
+}
+
+static const struct file_operations panfrost_drm_driver_fops = {
+	.owner = THIS_MODULE,
+	DRM_GEM_FOPS,
+	.show_fdinfo = drm_show_fdinfo,
+};
 
 /*
  * Panfrost driver version:
@@ -535,6 +564,7 @@ static const struct drm_driver panfrost_drm_driver = {
 	.driver_features	= DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
 	.open			= panfrost_open,
 	.postclose		= panfrost_postclose,
+	.show_fdinfo		= panfrost_show_fdinfo,
 	.ioctls			= panfrost_drm_driver_ioctls,
 	.num_ioctls		= ARRAY_SIZE(panfrost_drm_driver_ioctls),
 	.fops			= &panfrost_drm_driver_fops,
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index dbc597ab46fb..d0063cac9f72 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -157,6 +157,11 @@ static struct panfrost_job *
 panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
 {
 	struct panfrost_job *job = pfdev->jobs[slot][0];
+	job->priv->elapsed_ns +=
+		ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
+
+	/* Reset in case the job has to be requeued */
+	job->start_time = 0;
 
 	WARN_ON(!job);
 	pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
@@ -233,6 +238,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
 	subslot = panfrost_enqueue_job(pfdev, js, job);
 	/* Don't queue the job if a reset is in progress */
 	if (!atomic_read(&pfdev->reset.pending)) {
+		job->start_time = ktime_get();
 		job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
 		dev_dbg(pfdev->dev,
 			"JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
index 8becc1ba0eb9..b4318e476694 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.h
+++ b/drivers/gpu/drm/panfrost/panfrost_job.h
@@ -32,6 +32,9 @@ struct panfrost_job {
 
 	/* Fence to be signaled by drm-sched once its done with the job */
 	struct dma_fence *render_done_fence;
+
+	struct panfrost_file_priv *priv;
+	ktime_t start_time;
 };
 
 int panfrost_job_init(struct panfrost_device *pfdev);
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] drm/panfrost: Add drm memory stats display through fdinfo
  2023-08-08 22:22 [PATCH 0/2] Add fdinfo support to Panfrost Adrián Larumbe
  2023-08-08 22:22 ` [PATCH 1/2] drm/panfrost: " Adrián Larumbe
@ 2023-08-08 22:22 ` Adrián Larumbe
  2023-08-21 15:56   ` Steven Price
  1 sibling, 1 reply; 6+ messages in thread
From: Adrián Larumbe @ 2023-08-08 22:22 UTC (permalink / raw)
  To: robh, steven.price, airlied, daniel
  Cc: dri-devel, kernel, linux-kernel, adrian.larumbe

For drm_show_memory_stats to produce a more accurate report, provide a new
Panfrost DRM object callback that decides whether an object is resident in
memory or eligible for purging.

Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
---
 drivers/gpu/drm/panfrost/panfrost_drv.c |  8 ++++++--
 drivers/gpu/drm/panfrost/panfrost_gem.c | 16 ++++++++++++++++
 drivers/gpu/drm/panfrost/panfrost_gem.h |  1 +
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 65fdc0e4c7cb..46e8e69479c0 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -441,11 +441,14 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data,
 	args->retained = drm_gem_shmem_madvise(&bo->base, args->madv);
 
 	if (args->retained) {
-		if (args->madv == PANFROST_MADV_DONTNEED)
+		if (args->madv == PANFROST_MADV_DONTNEED) {
 			list_move_tail(&bo->base.madv_list,
 				       &pfdev->shrinker_list);
-		else if (args->madv == PANFROST_MADV_WILLNEED)
+			bo->is_purgable = true;
+		} else if (args->madv == PANFROST_MADV_WILLNEED) {
 			list_del_init(&bo->base.madv_list);
+			bo->is_purgable = false;
+		}
 	}
 
 out_unlock_mappings:
@@ -546,6 +549,7 @@ static void panfrost_show_fdinfo(struct drm_printer *p, struct drm_file *file)
 
 	panfrost_gpu_show_fdinfo(pfdev, file->driver_priv, p);
 
+	drm_show_memory_stats(p, file);
 }
 
 static const struct file_operations panfrost_drm_driver_fops = {
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 3c812fbd126f..80ab1521a14e 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -195,6 +195,21 @@ static int panfrost_gem_pin(struct drm_gem_object *obj)
 	return drm_gem_shmem_pin(&bo->base);
 }
 
+static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj)
+{
+	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+	struct panfrost_device *pfdev = obj->dev->dev_private;
+	unsigned int res = 0;
+
+	mutex_lock(&pfdev->shrinker_lock);
+	res |= (bo->is_purgable) ? DRM_GEM_OBJECT_PURGEABLE : 0;
+	mutex_unlock(&pfdev->shrinker_lock);
+
+	res |= (bo->base.pages) ? DRM_GEM_OBJECT_RESIDENT : 0;
+
+	return res;
+}
+
 static const struct drm_gem_object_funcs panfrost_gem_funcs = {
 	.free = panfrost_gem_free_object,
 	.open = panfrost_gem_open,
@@ -206,6 +221,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = {
 	.vmap = drm_gem_shmem_object_vmap,
 	.vunmap = drm_gem_shmem_object_vunmap,
 	.mmap = drm_gem_shmem_object_mmap,
+	.status = panfrost_gem_status,
 	.vm_ops = &drm_gem_shmem_vm_ops,
 };
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
index ad2877eeeccd..e06f7ceb8f73 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
@@ -38,6 +38,7 @@ struct panfrost_gem_object {
 
 	bool noexec		:1;
 	bool is_heap		:1;
+	bool is_purgable	:1;
 };
 
 struct panfrost_gem_mapping {
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/panfrost: Add fdinfo support to Panfrost
  2023-08-08 22:22 ` [PATCH 1/2] drm/panfrost: " Adrián Larumbe
@ 2023-08-21 15:56   ` Steven Price
  2023-08-23 12:55     ` Adrián Larumbe
  0 siblings, 1 reply; 6+ messages in thread
From: Steven Price @ 2023-08-21 15:56 UTC (permalink / raw)
  To: Adrián Larumbe, robh, airlied, daniel
  Cc: dri-devel, kernel, linux-kernel

On 08/08/2023 23:22, Adrián Larumbe wrote:
> We calculate the amount of time the GPU spends on a job with ktime samples,
> and then add it to the cumulative total for the open DRM file, which is
> what will be eventually exposed through the 'fdinfo' DRM file descriptor.
> 
> Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> ---
>  drivers/gpu/drm/panfrost/panfrost_device.c | 12 ++++++++
>  drivers/gpu/drm/panfrost/panfrost_device.h | 10 +++++++
>  drivers/gpu/drm/panfrost/panfrost_drv.c    | 32 +++++++++++++++++++++-
>  drivers/gpu/drm/panfrost/panfrost_job.c    |  6 ++++
>  drivers/gpu/drm/panfrost/panfrost_job.h    |  3 ++
>  5 files changed, 62 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
> index fa1a086a862b..67a5e894d037 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
> @@ -401,6 +401,18 @@ void panfrost_device_reset(struct panfrost_device *pfdev)
>  	panfrost_job_enable_interrupts(pfdev);
>  }
>  
> +struct drm_info_gpu panfrost_device_get_counters(struct panfrost_device *pfdev,
> +						 struct panfrost_file_priv *panfrost_priv)
> +{
> +	struct drm_info_gpu gpu_info;
> +
> +	gpu_info.engine =  panfrost_priv->elapsed_ns;
> +	gpu_info.cycles =  panfrost_priv->elapsed_ns * clk_get_rate(pfdev->clock);
> +	gpu_info.maxfreq =  clk_get_rate(pfdev->clock);

First, calling clk_get_rate() twice here is inefficient.

Second, I'm not sure it's really worth producing these derived values.
As I understand it the purpose of cycles/maxfreq is to be able to
provide a utilisation value which accounts for DVFS. I.e. if the GPU is
clocked down the utilisation of cycles/maxfreq is low even if the GPU is
active for the whole sample period.

What we therefore need to report is the *maximum* frequency in
clk_get_rate(). Also rather than just multiplying elapsed_ns by the
current clock rate, we need to sum up cycles over time as the clock
frequency changes. Alternatively it might be possible to use the actual
GPU register (CYCLE_COUNT_LO/CYCLE_COUNT_HI at offset 0x90,0x94) -
although note that this is reset when the GPU is reset.

Finally I doubt elapsed_ns is actually what user space is expecting. The
GPU has multiple job slots (3, but only 2 are used in almost all cases)
so can be running more than one job at a time. So there's going to be
some double counting going on here.

Sorry to poke holes in this, I think this would be a good feature. But
if we're going to return information we want it to be at least
reasonably correct.

Thanks,

Steve

> +
> +	return gpu_info;
> +}
> +
>  static int panfrost_device_resume(struct device *dev)
>  {
>  	struct panfrost_device *pfdev = dev_get_drvdata(dev);
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> index b0126b9fbadc..4621a2ece1bb 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> @@ -141,6 +141,14 @@ struct panfrost_file_priv {
>  	struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
>  
>  	struct panfrost_mmu *mmu;
> +
> +	uint64_t elapsed_ns;
> +};
> +
> +struct drm_info_gpu {
> +	unsigned long long engine;
> +	unsigned long long cycles;
> +	unsigned int maxfreq;
>  };
>  
>  static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev)
> @@ -172,6 +180,8 @@ int panfrost_unstable_ioctl_check(void);
>  int panfrost_device_init(struct panfrost_device *pfdev);
>  void panfrost_device_fini(struct panfrost_device *pfdev);
>  void panfrost_device_reset(struct panfrost_device *pfdev);
> +struct drm_info_gpu panfrost_device_get_counters(struct panfrost_device *pfdev,
> +						 struct panfrost_file_priv *panfrost_priv);
>  
>  extern const struct dev_pm_ops panfrost_pm_ops;
>  
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index a2ab99698ca8..65fdc0e4c7cb 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -3,6 +3,7 @@
>  /* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
>  /* Copyright 2019 Collabora ltd. */
>  
> +#include "drm/drm_file.h"
>  #include <linux/module.h>
>  #include <linux/of.h>
>  #include <linux/pagemap.h>
> @@ -267,6 +268,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
>  	job->requirements = args->requirements;
>  	job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
>  	job->mmu = file_priv->mmu;
> +	job->priv = file_priv;
>  
>  	slot = panfrost_job_get_slot(job);
>  
> @@ -523,7 +525,34 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
>  	PANFROST_IOCTL(MADVISE,		madvise,	DRM_RENDER_ALLOW),
>  };
>  
> -DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops);
> +
> +static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
> +				     struct panfrost_file_priv *panfrost_priv,
> +				     struct drm_printer *p)
> +{
> +	struct drm_info_gpu gpu_info;
> +
> +	gpu_info = panfrost_device_get_counters(pfdev, panfrost_priv);
> +
> +	drm_printf(p, "drm-engine-gpu:\t%llu ns\n", gpu_info.engine);
> +	drm_printf(p, "drm-cycles-gpu:\t%llu\n", gpu_info.cycles);
> +	drm_printf(p, "drm-maxfreq-gpu:\t%u Hz\n", gpu_info.maxfreq);
> +}
> +
> +static void panfrost_show_fdinfo(struct drm_printer *p, struct drm_file *file)
> +{
> +	struct drm_device *dev = file->minor->dev;
> +	struct panfrost_device *pfdev = dev->dev_private;
> +
> +	panfrost_gpu_show_fdinfo(pfdev, file->driver_priv, p);
> +
> +}
> +
> +static const struct file_operations panfrost_drm_driver_fops = {
> +	.owner = THIS_MODULE,
> +	DRM_GEM_FOPS,
> +	.show_fdinfo = drm_show_fdinfo,
> +};
>  
>  /*
>   * Panfrost driver version:
> @@ -535,6 +564,7 @@ static const struct drm_driver panfrost_drm_driver = {
>  	.driver_features	= DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
>  	.open			= panfrost_open,
>  	.postclose		= panfrost_postclose,
> +	.show_fdinfo		= panfrost_show_fdinfo,
>  	.ioctls			= panfrost_drm_driver_ioctls,
>  	.num_ioctls		= ARRAY_SIZE(panfrost_drm_driver_ioctls),
>  	.fops			= &panfrost_drm_driver_fops,
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index dbc597ab46fb..d0063cac9f72 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -157,6 +157,11 @@ static struct panfrost_job *
>  panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
>  {
>  	struct panfrost_job *job = pfdev->jobs[slot][0];
> +	job->priv->elapsed_ns +=
> +		ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
> +
> +	/* Reset in case the job has to be requeued */
> +	job->start_time = 0;
>  
>  	WARN_ON(!job);
>  	pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
> @@ -233,6 +238,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
>  	subslot = panfrost_enqueue_job(pfdev, js, job);
>  	/* Don't queue the job if a reset is in progress */
>  	if (!atomic_read(&pfdev->reset.pending)) {
> +		job->start_time = ktime_get();
>  		job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
>  		dev_dbg(pfdev->dev,
>  			"JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
> index 8becc1ba0eb9..b4318e476694 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
> @@ -32,6 +32,9 @@ struct panfrost_job {
>  
>  	/* Fence to be signaled by drm-sched once its done with the job */
>  	struct dma_fence *render_done_fence;
> +
> +	struct panfrost_file_priv *priv;
> +	ktime_t start_time;
>  };
>  
>  int panfrost_job_init(struct panfrost_device *pfdev);


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] drm/panfrost: Add drm memory stats display through fdinfo
  2023-08-08 22:22 ` [PATCH 2/2] drm/panfrost: Add drm memory stats display through fdinfo Adrián Larumbe
@ 2023-08-21 15:56   ` Steven Price
  0 siblings, 0 replies; 6+ messages in thread
From: Steven Price @ 2023-08-21 15:56 UTC (permalink / raw)
  To: Adrián Larumbe, robh, airlied, daniel
  Cc: dri-devel, kernel, linux-kernel

On 08/08/2023 23:22, Adrián Larumbe wrote:
> For drm_show_memory_stats to produce a more accurate report, provide a new
> Panfrost DRM object callback that decides whether an object is resident in
> memory or eligible for purging.
> 
> Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> ---
>  drivers/gpu/drm/panfrost/panfrost_drv.c |  8 ++++++--
>  drivers/gpu/drm/panfrost/panfrost_gem.c | 16 ++++++++++++++++
>  drivers/gpu/drm/panfrost/panfrost_gem.h |  1 +
>  3 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 65fdc0e4c7cb..46e8e69479c0 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -441,11 +441,14 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data,
>  	args->retained = drm_gem_shmem_madvise(&bo->base, args->madv);
>  
>  	if (args->retained) {
> -		if (args->madv == PANFROST_MADV_DONTNEED)
> +		if (args->madv == PANFROST_MADV_DONTNEED) {
>  			list_move_tail(&bo->base.madv_list,
>  				       &pfdev->shrinker_list);
> -		else if (args->madv == PANFROST_MADV_WILLNEED)
> +			bo->is_purgable = true;
> +		} else if (args->madv == PANFROST_MADV_WILLNEED) {
>  			list_del_init(&bo->base.madv_list);
> +			bo->is_purgable = false;
> +		}
>  	}
>  
>  out_unlock_mappings:
> @@ -546,6 +549,7 @@ static void panfrost_show_fdinfo(struct drm_printer *p, struct drm_file *file)
>  
>  	panfrost_gpu_show_fdinfo(pfdev, file->driver_priv, p);
>  
> +	drm_show_memory_stats(p, file);
>  }
>  
>  static const struct file_operations panfrost_drm_driver_fops = {
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
> index 3c812fbd126f..80ab1521a14e 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gem.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
> @@ -195,6 +195,21 @@ static int panfrost_gem_pin(struct drm_gem_object *obj)
>  	return drm_gem_shmem_pin(&bo->base);
>  }
>  
> +static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj)
> +{
> +	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
> +	struct panfrost_device *pfdev = obj->dev->dev_private;
> +	unsigned int res = 0;
> +
> +	mutex_lock(&pfdev->shrinker_lock);

This function is called by drm_show_memory_stats() while holding a
spin_lock, so we can't take the mutex here.

However, given this is racy anyway (the status could change before the
data is returned to user space), I don't think we need to have the mutex
held anyway.

Otherwise this looks good.

Steve

> +	res |= (bo->is_purgable) ? DRM_GEM_OBJECT_PURGEABLE : 0;
> +	mutex_unlock(&pfdev->shrinker_lock);
> +
> +	res |= (bo->base.pages) ? DRM_GEM_OBJECT_RESIDENT : 0;
> +
> +	return res;
> +}
> +
>  static const struct drm_gem_object_funcs panfrost_gem_funcs = {
>  	.free = panfrost_gem_free_object,
>  	.open = panfrost_gem_open,
> @@ -206,6 +221,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = {
>  	.vmap = drm_gem_shmem_object_vmap,
>  	.vunmap = drm_gem_shmem_object_vunmap,
>  	.mmap = drm_gem_shmem_object_mmap,
> +	.status = panfrost_gem_status,
>  	.vm_ops = &drm_gem_shmem_vm_ops,
>  };
>  
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
> index ad2877eeeccd..e06f7ceb8f73 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gem.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
> @@ -38,6 +38,7 @@ struct panfrost_gem_object {
>  
>  	bool noexec		:1;
>  	bool is_heap		:1;
> +	bool is_purgable	:1;
>  };
>  
>  struct panfrost_gem_mapping {


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/panfrost: Add fdinfo support to Panfrost
  2023-08-21 15:56   ` Steven Price
@ 2023-08-23 12:55     ` Adrián Larumbe
  0 siblings, 0 replies; 6+ messages in thread
From: Adrián Larumbe @ 2023-08-23 12:55 UTC (permalink / raw)
  To: Steven Price
  Cc: robh, airlied, daniel, dri-devel, kernel, linux-kernel,
	Boris Brezillon, healych

Hi Steven, thanks for your feedback.

On 21.08.2023 16:56, Steven Price wrote:
>> We calculate the amount of time the GPU spends on a job with ktime samples,
>> and then add it to the cumulative total for the open DRM file, which is
>> what will be eventually exposed through the 'fdinfo' DRM file descriptor.
>> 
>> Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
>> ---
>>  drivers/gpu/drm/panfrost/panfrost_device.c | 12 ++++++++
>>  drivers/gpu/drm/panfrost/panfrost_device.h | 10 +++++++
>>  drivers/gpu/drm/panfrost/panfrost_drv.c    | 32 +++++++++++++++++++++-
>>  drivers/gpu/drm/panfrost/panfrost_job.c    |  6 ++++
>>  drivers/gpu/drm/panfrost/panfrost_job.h    |  3 ++
>>  5 files changed, 62 insertions(+), 1 deletion(-)
>> 
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
>> index fa1a086a862b..67a5e894d037 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_device.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
>> @@ -401,6 +401,18 @@ void panfrost_device_reset(struct panfrost_device *pfdev)
>>  	panfrost_job_enable_interrupts(pfdev);
>>  }
>>  
>> +struct drm_info_gpu panfrost_device_get_counters(struct panfrost_device *pfdev,
>> +						 struct panfrost_file_priv *panfrost_priv)
>> +{
>> +	struct drm_info_gpu gpu_info;
>> +
>> +	gpu_info.engine =  panfrost_priv->elapsed_ns;
>> +	gpu_info.cycles =  panfrost_priv->elapsed_ns * clk_get_rate(pfdev->clock);
>> +	gpu_info.maxfreq =  clk_get_rate(pfdev->clock);
>
>First, calling clk_get_rate() twice here is inefficient.
>
>Second, I'm not sure it's really worth producing these derived values.
>As I understand it the purpose of cycles/maxfreq is to be able to
>provide a utilisation value which accounts for DVFS. I.e. if the GPU is
>clocked down the utilisation of cycles/maxfreq is low even if the GPU is
>active for the whole sample period.

>What we therefore need to report is the *maximum* frequency in
>clk_get_rate(). Also rather than just multiplying elapsed_ns by the
>current clock rate, we need to sum up cycles over time as the clock
>frequency changes. Alternatively it might be possible to use the actual
>GPU register (CYCLE_COUNT_LO/CYCLE_COUNT_HI at offset 0x90,0x94) -
>although note that this is reset when the GPU is reset.

I've fixed this in a second version of the patch and now calculate the maximum
operating frequency during the driver initialisation stage in the following way:

unsigned long freq = ULONG_MAX;

/* Find the fastest defined rate  */
opp = dev_pm_opp_find_freq_floor(dev, &freq);
if (IS_ERR(opp))
	return PTR_ERR(opp);
pfdev->features.fast_rate = freq;

dev_pm_opp_put(opp);

Regarding the number of cycles, sampling CYCLE_COUNT would give us the most
accurate figure, however fdinfo must return values that are relative to the file
being queried, whereas that register would give us a raw count for all queues.

There's also the problem of clock frequencies being variable over time because
of DVFS. To get an accurate value for the number of cycles spent in a given
job, we would have to store clock frequencies together with their timestamps
every time there's a rate change, and then in the job deque function traverse
it, find the interval intersection and multiply every frequency by its
respective length. This sounds like too much work, so I think until I can come
up with something less complex I'm going to drop reporting of the drm-cycles
value altogether.

Although come think of it, maybe I could sample the number of cycles both at
the beginning and end of a job and add that to an overall per-file tally. 

>Finally I doubt elapsed_ns is actually what user space is expecting. The
>GPU has multiple job slots (3, but only 2 are used in almost all cases)
>so can be running more than one job at a time. So there's going to be
>some double counting going on here.
>
>Sorry to poke holes in this, I think this would be a good feature. But
>if we're going to return information we want it to be at least
>reasonably correct.

Thanks for pointing this out, I hadn't thought about that the same file could
have two simultaneous jobs running at the same time.

I've checked what other drivers do for reporting these values, and they print a
separate drm-engine value for each of their execution units (render, copy,
compute, etc).  In our case, because there are 2 or 3 queues, perhaps we should
do the same.

>Thanks,
>
>Steve
>
>> +
>> +	return gpu_info;
>> +}
>> +
>>  static int panfrost_device_resume(struct device *dev)
>>  {
>>  	struct panfrost_device *pfdev = dev_get_drvdata(dev);
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
>> index b0126b9fbadc..4621a2ece1bb 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
>> @@ -141,6 +141,14 @@ struct panfrost_file_priv {
>>  	struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
>>  
>>  	struct panfrost_mmu *mmu;
>> +
>> +	uint64_t elapsed_ns;
>> +};
>> +
>> +struct drm_info_gpu {
>> +	unsigned long long engine;
>> +	unsigned long long cycles;
>> +	unsigned int maxfreq;
>>  };
>>  
>>  static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev)
>> @@ -172,6 +180,8 @@ int panfrost_unstable_ioctl_check(void);
>>  int panfrost_device_init(struct panfrost_device *pfdev);
>>  void panfrost_device_fini(struct panfrost_device *pfdev);
>>  void panfrost_device_reset(struct panfrost_device *pfdev);
>> +struct drm_info_gpu panfrost_device_get_counters(struct panfrost_device *pfdev,
>> +						 struct panfrost_file_priv *panfrost_priv);
>>  
>>  extern const struct dev_pm_ops panfrost_pm_ops;
>>  
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
>> index a2ab99698ca8..65fdc0e4c7cb 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
>> @@ -3,6 +3,7 @@
>>  /* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
>>  /* Copyright 2019 Collabora ltd. */
>>  
>> +#include "drm/drm_file.h"
>>  #include <linux/module.h>
>>  #include <linux/of.h>
>>  #include <linux/pagemap.h>
>> @@ -267,6 +268,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
>>  	job->requirements = args->requirements;
>>  	job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
>>  	job->mmu = file_priv->mmu;
>> +	job->priv = file_priv;
>>  
>>  	slot = panfrost_job_get_slot(job);
>>  
>> @@ -523,7 +525,34 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
>>  	PANFROST_IOCTL(MADVISE,		madvise,	DRM_RENDER_ALLOW),
>>  };
>>  
>> -DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops);
>> +
>> +static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
>> +				     struct panfrost_file_priv *panfrost_priv,
>> +				     struct drm_printer *p)
>> +{
>> +	struct drm_info_gpu gpu_info;
>> +
>> +	gpu_info = panfrost_device_get_counters(pfdev, panfrost_priv);
>> +
>> +	drm_printf(p, "drm-engine-gpu:\t%llu ns\n", gpu_info.engine);
>> +	drm_printf(p, "drm-cycles-gpu:\t%llu\n", gpu_info.cycles);
>> +	drm_printf(p, "drm-maxfreq-gpu:\t%u Hz\n", gpu_info.maxfreq);
>> +}
>> +
>> +static void panfrost_show_fdinfo(struct drm_printer *p, struct drm_file *file)
>> +{
>> +	struct drm_device *dev = file->minor->dev;
>> +	struct panfrost_device *pfdev = dev->dev_private;
>> +
>> +	panfrost_gpu_show_fdinfo(pfdev, file->driver_priv, p);
>> +
>> +}
>> +
>> +static const struct file_operations panfrost_drm_driver_fops = {
>> +	.owner = THIS_MODULE,
>> +	DRM_GEM_FOPS,
>> +	.show_fdinfo = drm_show_fdinfo,
>> +};
>>  
>>  /*
>>   * Panfrost driver version:
>> @@ -535,6 +564,7 @@ static const struct drm_driver panfrost_drm_driver = {
>>  	.driver_features	= DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
>>  	.open			= panfrost_open,
>>  	.postclose		= panfrost_postclose,
>> +	.show_fdinfo		= panfrost_show_fdinfo,
>>  	.ioctls			= panfrost_drm_driver_ioctls,
>>  	.num_ioctls		= ARRAY_SIZE(panfrost_drm_driver_ioctls),
>>  	.fops			= &panfrost_drm_driver_fops,
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
>> index dbc597ab46fb..d0063cac9f72 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
>> @@ -157,6 +157,11 @@ static struct panfrost_job *
>>  panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
>>  {
>>  	struct panfrost_job *job = pfdev->jobs[slot][0];
>> +	job->priv->elapsed_ns +=
>> +		ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
>> +
>> +	/* Reset in case the job has to be requeued */
>> +	job->start_time = 0;
>>  
>>  	WARN_ON(!job);
>>  	pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
>> @@ -233,6 +238,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
>>  	subslot = panfrost_enqueue_job(pfdev, js, job);
>>  	/* Don't queue the job if a reset is in progress */
>>  	if (!atomic_read(&pfdev->reset.pending)) {
>> +		job->start_time = ktime_get();
>>  		job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
>>  		dev_dbg(pfdev->dev,
>>  			"JS: Submitting atom %p to js[%d][%d] with head=0x%llx AS %d",
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
>> index 8becc1ba0eb9..b4318e476694 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_job.h
>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
>> @@ -32,6 +32,9 @@ struct panfrost_job {
>>  
>>  	/* Fence to be signaled by drm-sched once its done with the job */
>>  	struct dma_fence *render_done_fence;
>> +
>> +	struct panfrost_file_priv *priv;
>> +	ktime_t start_time;
>>  };
>>  
>>  int panfrost_job_init(struct panfrost_device *pfdev);

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-08-23 12:56 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-08 22:22 [PATCH 0/2] Add fdinfo support to Panfrost Adrián Larumbe
2023-08-08 22:22 ` [PATCH 1/2] drm/panfrost: " Adrián Larumbe
2023-08-21 15:56   ` Steven Price
2023-08-23 12:55     ` Adrián Larumbe
2023-08-08 22:22 ` [PATCH 2/2] drm/panfrost: Add drm memory stats display through fdinfo Adrián Larumbe
2023-08-21 15:56   ` Steven Price

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).