All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job
@ 2019-09-13 11:17 Boris Brezillon
  2019-09-13 11:17 ` [PATCH 2/2] drm/panfrost: Extend the bo_wait() ioctl Boris Brezillon
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Boris Brezillon @ 2019-09-13 11:17 UTC (permalink / raw)
  To: Rob Herring, Tomeu Vizoso, Alyssa Rosenzweig, Steven Price
  Cc: Boris Brezillon, dri-devel

The READ/WRITE flags are particularly useful if we want to avoid
serialization of jobs that read from the same BO but never write to it.
The NO_IMPLICIT_FENCE might be useful when the user knows the BO is
shared but jobs are using different portions of the buffer.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/gpu/drm/panfrost/panfrost_drv.c |  72 +++++++++--
 drivers/gpu/drm/panfrost/panfrost_job.c | 164 +++++++++++++++++++-----
 drivers/gpu/drm/panfrost/panfrost_job.h |  11 +-
 include/uapi/drm/panfrost_drm.h         |  41 ++++++
 4 files changed, 247 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index d74442d71048..08082fd557c3 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -119,20 +119,76 @@ panfrost_lookup_bos(struct drm_device *dev,
 		  struct drm_panfrost_submit *args,
 		  struct panfrost_job *job)
 {
-	job->bo_count = args->bo_handle_count;
+	struct drm_panfrost_submit_bo *bo_descs = NULL;
+	u32 *handles = NULL;
+	u32 i, bo_count;
+	int ret = 0;
 
-	if (!job->bo_count)
+	bo_count = args->bo_desc_count ?
+		   args->bo_desc_count : args->bo_handle_count;
+	if (!bo_count)
 		return 0;
 
-	job->implicit_fences = kvmalloc_array(job->bo_count,
-				  sizeof(struct dma_fence *),
+	job->bos = kvmalloc_array(bo_count, sizeof(*job->bos),
 				  GFP_KERNEL | __GFP_ZERO);
-	if (!job->implicit_fences)
+	if (!job->bos)
 		return -ENOMEM;
 
-	return drm_gem_objects_lookup(file_priv,
-				      (void __user *)(uintptr_t)args->bo_handles,
-				      job->bo_count, &job->bos);
+	job->bo_count = bo_count;
+	bo_descs = kvmalloc_array(bo_count, sizeof(*bo_descs),
+				  GFP_KERNEL | __GFP_ZERO);
+	if (!bo_descs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (!args->bo_desc_count) {
+		handles = kvmalloc_array(bo_count, sizeof(*handles),
+					 GFP_KERNEL);
+		if (!handles) {
+			ret =-ENOMEM;
+			goto out;
+		}
+
+		if (copy_from_user(handles,
+				   (void __user *)(uintptr_t)args->bo_handles,
+				   job->bo_count * sizeof(*handles))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		for (i = 0; i < job->bo_count; i++) {
+			bo_descs[i].handle = handles[i];
+			bo_descs[i].flags = PANFROST_SUBMIT_BO_WRITE |
+					    PANFROST_SUBMIT_BO_READ;
+		}
+	} else if (copy_from_user(bo_descs,
+				  (void __user *)(uintptr_t)args->bo_descs,
+				  job->bo_count * sizeof(*bo_descs))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	for (i = 0; i < job->bo_count; i++) {
+		if ((bo_descs[i].flags & ~PANFROST_SUBMIT_BO_VALID_FLAGS) ||
+                    !(bo_descs[i].flags & PANFROST_SUBMIT_BO_RW)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		job->bos[i].flags = bo_descs[i].flags;
+		job->bos[i].obj = drm_gem_object_lookup(file_priv,
+							bo_descs[i].handle);
+		if (!job->bos[i].obj) {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+out:
+	kvfree(handles);
+	kvfree(bo_descs);
+	return ret;
 }
 
 /**
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 05c85f45a0de..e4b74fde9339 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -193,24 +193,116 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
 	pm_runtime_put_autosuspend(pfdev->dev);
 }
 
-static void panfrost_acquire_object_fences(struct drm_gem_object **bos,
-					   int bo_count,
-					   struct dma_fence **implicit_fences)
+static int panfrost_acquire_object_fences(struct panfrost_job *job)
 {
-	int i;
+	int i, ret;
 
-	for (i = 0; i < bo_count; i++)
-		implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv);
+	for (i = 0; i < job->bo_count; i++) {
+		struct panfrost_job_bo_desc *bo = &job->bos[i];
+		struct dma_resv *robj = bo->obj->resv;
+
+		if (!(job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)) {
+			ret = dma_resv_reserve_shared(robj, 1);
+			if (ret)
+				return ret;
+		}
+
+		if (bo->flags & PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
+			continue;
+
+		if (bo->flags & PANFROST_SUBMIT_BO_WRITE) {
+			ret = dma_resv_get_fences_rcu(robj, &bo->excl,
+						      &bo->shared_count,
+						      &bo->shared);
+			if (ret)
+				return ret;
+		} else {
+			bo->excl = dma_resv_get_excl_rcu(robj);
+		}
+	}
+
+	return 0;
 }
 
-static void panfrost_attach_object_fences(struct drm_gem_object **bos,
-					  int bo_count,
-					  struct dma_fence *fence)
+static void panfrost_attach_object_fences(struct panfrost_job *job)
 {
 	int i;
 
-	for (i = 0; i < bo_count; i++)
-		dma_resv_add_excl_fence(bos[i]->resv, fence);
+	for (i = 0; i < job->bo_count; i++) {
+		struct drm_gem_object *obj = job->bos[i].obj;
+
+		if (job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)
+			dma_resv_add_excl_fence(obj->resv,
+						job->render_done_fence);
+		else
+			dma_resv_add_shared_fence(obj->resv,
+						  job->render_done_fence);
+	}
+}
+
+static int panfrost_job_lock_bos(struct panfrost_job *job,
+				 struct ww_acquire_ctx *acquire_ctx)
+{
+	int contended = -1;
+	int i, ret;
+
+	ww_acquire_init(acquire_ctx, &reservation_ww_class);
+
+retry:
+	if (contended != -1) {
+		struct drm_gem_object *obj = job->bos[contended].obj;
+
+		ret = ww_mutex_lock_slow_interruptible(&obj->resv->lock,
+						       acquire_ctx);
+		if (ret) {
+			ww_acquire_done(acquire_ctx);
+			return ret;
+		}
+	}
+
+	for (i = 0; i < job->bo_count; i++) {
+		if (i == contended)
+			continue;
+
+		ret = ww_mutex_lock_interruptible(&job->bos[i].obj->resv->lock,
+						  acquire_ctx);
+		if (ret) {
+			int j;
+
+			for (j = 0; j < i; j++)
+				ww_mutex_unlock(&job->bos[j].obj->resv->lock);
+
+			if (contended != -1 && contended >= i) {
+				struct drm_gem_object *contended_obj;
+
+				contended_obj = job->bos[contended].obj;
+				ww_mutex_unlock(&contended_obj->resv->lock);
+			}
+
+			if (ret == -EDEADLK) {
+				contended = i;
+				goto retry;
+			}
+
+			ww_acquire_done(acquire_ctx);
+			return ret;
+		}
+	}
+
+	ww_acquire_done(acquire_ctx);
+
+	return 0;
+}
+
+static void panfrost_job_unlock_bos(struct panfrost_job *job,
+				    struct ww_acquire_ctx *acquire_ctx)
+{
+	int i;
+
+	for (i = 0; i < job->bo_count; i++)
+		ww_mutex_unlock(&job->bos[i].obj->resv->lock);
+
+	ww_acquire_fini(acquire_ctx);
 }
 
 int panfrost_job_push(struct panfrost_job *job)
@@ -223,8 +315,7 @@ int panfrost_job_push(struct panfrost_job *job)
 
 	mutex_lock(&pfdev->sched_lock);
 
-	ret = drm_gem_lock_reservations(job->bos, job->bo_count,
-					    &acquire_ctx);
+	ret = panfrost_job_lock_bos(job, &acquire_ctx);
 	if (ret) {
 		mutex_unlock(&pfdev->sched_lock);
 		return ret;
@@ -240,18 +331,16 @@ int panfrost_job_push(struct panfrost_job *job)
 
 	kref_get(&job->refcount); /* put by scheduler job completion */
 
-	panfrost_acquire_object_fences(job->bos, job->bo_count,
-				       job->implicit_fences);
+	panfrost_acquire_object_fences(job);
 
 	drm_sched_entity_push_job(&job->base, entity);
 
 	mutex_unlock(&pfdev->sched_lock);
 
-	panfrost_attach_object_fences(job->bos, job->bo_count,
-				      job->render_done_fence);
+	panfrost_attach_object_fences(job);
 
 unlock:
-	drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
+	panfrost_job_unlock_bos(job, &acquire_ctx);
 
 	return ret;
 }
@@ -267,20 +356,22 @@ static void panfrost_job_cleanup(struct kref *ref)
 			dma_fence_put(job->in_fences[i]);
 		kvfree(job->in_fences);
 	}
-	if (job->implicit_fences) {
-		for (i = 0; i < job->bo_count; i++)
-			dma_fence_put(job->implicit_fences[i]);
-		kvfree(job->implicit_fences);
+
+	for (i = 0; i < job->bo_count; i++) {
+		unsigned int j;
+
+		dma_fence_put(job->bos[i].excl);
+		for (j = 0; j < job->bos[i].shared_count; j++)
+			dma_fence_put(job->bos[i].shared[j]);
 	}
+
 	dma_fence_put(job->done_fence);
 	dma_fence_put(job->render_done_fence);
 
-	if (job->bos) {
-		for (i = 0; i < job->bo_count; i++)
-			drm_gem_object_put_unlocked(job->bos[i]);
-		kvfree(job->bos);
-	}
+	for (i = 0; i < job->bo_count; i++)
+		drm_gem_object_put_unlocked(job->bos[i].obj);
 
+	kvfree(job->bos);
 	kfree(job);
 }
 
@@ -314,11 +405,22 @@ static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job
 		}
 	}
 
-	/* Implicit fences, max. one per BO */
+	/* Implicit fences */
 	for (i = 0; i < job->bo_count; i++) {
-		if (job->implicit_fences[i]) {
-			fence = job->implicit_fences[i];
-			job->implicit_fences[i] = NULL;
+		unsigned int j;
+
+		if (job->bos[i].excl) {
+			fence = job->bos[i].excl;
+			job->bos[i].excl = NULL;
+			return fence;
+		}
+
+		for (j = 0; j < job->bos[i].shared_count; j++) {
+			if (!job->bos[i].shared[j])
+				continue;
+
+			fence = job->bos[i].shared[j];
+			job->bos[i].shared[j] = NULL;
 			return fence;
 		}
 	}
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
index 62454128a792..53440640a6e3 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.h
+++ b/drivers/gpu/drm/panfrost/panfrost_job.h
@@ -11,6 +11,14 @@ struct panfrost_device;
 struct panfrost_gem_object;
 struct panfrost_file_priv;
 
+struct panfrost_job_bo_desc {
+	struct drm_gem_object *obj;
+	struct dma_fence *excl;
+	struct dma_fence **shared;
+	unsigned int shared_count;
+	u32 flags;
+};
+
 struct panfrost_job {
 	struct drm_sched_job base;
 
@@ -31,8 +39,7 @@ struct panfrost_job {
 	__u32 flush_id;
 
 	/* Exclusive fences we have taken from the BOs to wait for */
-	struct dma_fence **implicit_fences;
-	struct drm_gem_object **bos;
+	struct panfrost_job_bo_desc *bos;
 	u32 bo_count;
 
 	/* Fence to be signaled by drm-sched once its done with the job */
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index ec19db1eead8..029c6ae1b1f0 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -40,6 +40,28 @@ extern "C" {
 #define DRM_IOCTL_PANFROST_PERFCNT_DUMP		DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump)
 
 #define PANFROST_JD_REQ_FS (1 << 0)
+
+#define PANFROST_SUBMIT_BO_READ			(1 << 0)
+#define PANFROST_SUBMIT_BO_WRITE		(1 << 1)
+#define PANFROST_SUBMIT_BO_RW			(PANFROST_SUBMIT_BO_READ | \
+						 PANFROST_SUBMIT_BO_WRITE)
+#define PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE	(1 << 2)
+#define PANFROST_SUBMIT_BO_VALID_FLAGS		\
+	(PANFROST_SUBMIT_BO_RW | PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
+
+/**
+ * struct drm_panfrost_submit_bo - BO descriptor passed to the submit ioctl.
+ *
+ * Useful to give detailed information about BOs used by a given job.
+ *
+ * @handle: the GEM handle
+ * @flags: a combination of PANFROST_SUBMIT_BO_*
+ */
+struct drm_panfrost_submit_bo {
+	__u32 handle;
+	__u32 flags;
+};
+
 /**
  * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D
  * engine.
@@ -68,6 +90,25 @@ struct drm_panfrost_submit {
 
 	/** A combination of PANFROST_JD_REQ_* */
 	__u32 requirements;
+
+	/**
+	 * Pointer to a u32 array of &drm_panfrost_submit_bo_desc objects. This
+	 * field is meant to replace &drm_panfrost_submit.bo_handles which did
+	 * not provide enough information to relax synchronization between
+	 * jobs that only only read the BO they share. When both
+	 * &drm_panfrost_submit.bo_handles and &drm_panfrost_submit.bo_descs
+	 * are provided, drm_panfrost_submit.bo_handles is ignored.
+	 */
+	__u64 bo_descs;
+
+	/**
+	 * Number of BO descriptors passed in (size is that times
+	 * sizeof(drm_panfrost_submit_bo_desc)).
+	 */
+	__u32 bo_desc_count;
+
+	/** Padding, must be 0. */
+	__u32 pad;
 };
 
 /**
-- 
2.21.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] drm/panfrost: Extend the bo_wait() ioctl
  2019-09-13 11:17 [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job Boris Brezillon
@ 2019-09-13 11:17 ` Boris Brezillon
  2019-09-13 13:46   ` Steven Price
  2019-09-13 13:44 ` [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job Steven Price
  2019-09-16 22:20 ` Rob Herring
  2 siblings, 1 reply; 11+ messages in thread
From: Boris Brezillon @ 2019-09-13 11:17 UTC (permalink / raw)
  To: Rob Herring, Tomeu Vizoso, Alyssa Rosenzweig, Steven Price
  Cc: Boris Brezillon, dri-devel

So we can choose to wait for all BO users, or just for writers.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/gpu/drm/panfrost/panfrost_drv.c | 8 ++++++--
 include/uapi/drm/panfrost_drm.h         | 4 ++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 08082fd557c3..6a94aea2147f 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -322,16 +322,20 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data,
 	struct drm_panfrost_wait_bo *args = data;
 	struct drm_gem_object *gem_obj;
 	unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+	bool wait_all = !(args->flags & PANFROST_WAIT_BO_WRITERS);
 
 	if (args->pad)
 		return -EINVAL;
 
+	if (args->flags & ~PANFROST_WAIT_BO_WRITERS)
+		return -EINVAL;
+
 	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
 	if (!gem_obj)
 		return -ENOENT;
 
-	ret = dma_resv_wait_timeout_rcu(gem_obj->resv, true,
-						  true, timeout);
+	ret = dma_resv_wait_timeout_rcu(gem_obj->resv, wait_all,
+					true, timeout);
 	if (!ret)
 		ret = timeout ? -ETIMEDOUT : -EBUSY;
 
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index 029c6ae1b1f0..ac4facbcee47 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -111,6 +111,9 @@ struct drm_panfrost_submit {
 	__u32 pad;
 };
 
+#define PANFROST_WAIT_ALL_BO_USERS	(0 << 0)
+#define PANFROST_WAIT_BO_WRITERS	(1 << 0)
+
 /**
  * struct drm_panfrost_wait_bo - ioctl argument for waiting for
  * completion of the last DRM_PANFROST_SUBMIT on a BO.
@@ -123,6 +126,7 @@ struct drm_panfrost_wait_bo {
 	__u32 handle;
 	__u32 pad;
 	__s64 timeout_ns;	/* absolute */
+	__u64 flags;
 };
 
 #define PANFROST_BO_NOEXEC	1
-- 
2.21.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job
  2019-09-13 11:17 [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job Boris Brezillon
  2019-09-13 11:17 ` [PATCH 2/2] drm/panfrost: Extend the bo_wait() ioctl Boris Brezillon
@ 2019-09-13 13:44 ` Steven Price
  2019-09-16 12:11   ` Alyssa Rosenzweig
                     ` (2 more replies)
  2019-09-16 22:20 ` Rob Herring
  2 siblings, 3 replies; 11+ messages in thread
From: Steven Price @ 2019-09-13 13:44 UTC (permalink / raw)
  To: Boris Brezillon, Rob Herring, Tomeu Vizoso, Alyssa Rosenzweig; +Cc: dri-devel

On 13/09/2019 12:17, Boris Brezillon wrote:
> The READ/WRITE flags are particularly useful if we want to avoid
> serialization of jobs that read from the same BO but never write to it.
> The NO_IMPLICIT_FENCE might be useful when the user knows the BO is
> shared but jobs are using different portions of the buffer.
> 
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>

Good feature - we could do with an (easy) way of the user driver
detecting this - so it might be worth bumping the driver version for this?

Some more comments below.

> ---
>  drivers/gpu/drm/panfrost/panfrost_drv.c |  72 +++++++++--
>  drivers/gpu/drm/panfrost/panfrost_job.c | 164 +++++++++++++++++++-----
>  drivers/gpu/drm/panfrost/panfrost_job.h |  11 +-
>  include/uapi/drm/panfrost_drm.h         |  41 ++++++
>  4 files changed, 247 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index d74442d71048..08082fd557c3 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -119,20 +119,76 @@ panfrost_lookup_bos(struct drm_device *dev,
>  		  struct drm_panfrost_submit *args,
>  		  struct panfrost_job *job)
>  {
> -	job->bo_count = args->bo_handle_count;
> +	struct drm_panfrost_submit_bo *bo_descs = NULL;
> +	u32 *handles = NULL;
> +	u32 i, bo_count;
> +	int ret = 0;
>  
> -	if (!job->bo_count)
> +	bo_count = args->bo_desc_count ?
> +		   args->bo_desc_count : args->bo_handle_count;
> +	if (!bo_count)
>  		return 0;
>  
> -	job->implicit_fences = kvmalloc_array(job->bo_count,
> -				  sizeof(struct dma_fence *),
> +	job->bos = kvmalloc_array(bo_count, sizeof(*job->bos),
>  				  GFP_KERNEL | __GFP_ZERO);
> -	if (!job->implicit_fences)
> +	if (!job->bos)
>  		return -ENOMEM;
>  
> -	return drm_gem_objects_lookup(file_priv,
> -				      (void __user *)(uintptr_t)args->bo_handles,
> -				      job->bo_count, &job->bos);
> +	job->bo_count = bo_count;
> +	bo_descs = kvmalloc_array(bo_count, sizeof(*bo_descs),
> +				  GFP_KERNEL | __GFP_ZERO);
> +	if (!bo_descs) {
> +		ret = -ENOMEM;
> +		goto out;

This can be just "return -ENOMEM" - both handles and bo_descs will be NULL.

> +	}
> +
> +	if (!args->bo_desc_count) {
> +		handles = kvmalloc_array(bo_count, sizeof(*handles),
> +					 GFP_KERNEL);
> +		if (!handles) {
> +			ret =-ENOMEM;
> +			goto out;
> +		}
> +
> +		if (copy_from_user(handles,
> +				   (void __user *)(uintptr_t)args->bo_handles,
> +				   job->bo_count * sizeof(*handles))) {
> +			ret = -EFAULT;
> +			goto out;
> +		}
> +
> +		for (i = 0; i < job->bo_count; i++) {
> +			bo_descs[i].handle = handles[i];
> +			bo_descs[i].flags = PANFROST_SUBMIT_BO_WRITE |
> +					    PANFROST_SUBMIT_BO_READ;

You can use PANFROST_SUBMIT_BO_RW here.

> +		}
> +	} else if (copy_from_user(bo_descs,
> +				  (void __user *)(uintptr_t)args->bo_descs,
> +				  job->bo_count * sizeof(*bo_descs))) {
> +		ret = -EFAULT;
> +		goto out;
> +	}
> +
> +	for (i = 0; i < job->bo_count; i++) {
> +		if ((bo_descs[i].flags & ~PANFROST_SUBMIT_BO_VALID_FLAGS) ||
> +                    !(bo_descs[i].flags & PANFROST_SUBMIT_BO_RW)) {
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		job->bos[i].flags = bo_descs[i].flags;
> +		job->bos[i].obj = drm_gem_object_lookup(file_priv,
> +							bo_descs[i].handle);
> +		if (!job->bos[i].obj) {
> +			ret = -ENOENT;
> +			goto out;
> +		}
> +	}
> +
> +out:
> +	kvfree(handles);
> +	kvfree(bo_descs);
> +	return ret;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index 05c85f45a0de..e4b74fde9339 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -193,24 +193,116 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
>  	pm_runtime_put_autosuspend(pfdev->dev);
>  }
>  
> -static void panfrost_acquire_object_fences(struct drm_gem_object **bos,
> -					   int bo_count,
> -					   struct dma_fence **implicit_fences)
> +static int panfrost_acquire_object_fences(struct panfrost_job *job)
>  {
> -	int i;
> +	int i, ret;
>  
> -	for (i = 0; i < bo_count; i++)
> -		implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv);
> +	for (i = 0; i < job->bo_count; i++) {
> +		struct panfrost_job_bo_desc *bo = &job->bos[i];
> +		struct dma_resv *robj = bo->obj->resv;
> +
> +		if (!(job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)) {
> +			ret = dma_resv_reserve_shared(robj, 1);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		if (bo->flags & PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
> +			continue;
> +
> +		if (bo->flags & PANFROST_SUBMIT_BO_WRITE) {
> +			ret = dma_resv_get_fences_rcu(robj, &bo->excl,
> +						      &bo->shared_count,
> +						      &bo->shared);
> +			if (ret)
> +				return ret;
> +		} else {
> +			bo->excl = dma_resv_get_excl_rcu(robj);
> +		}

The implementation of NO_IMPLICIT_FENCE seems a bit strange to me: READ
| NO_IMPLICIT_FENCE still reserves space for a shared fence. I don't
understand why.

> +	}
> +
> +	return 0;
>  }
>  
> -static void panfrost_attach_object_fences(struct drm_gem_object **bos,
> -					  int bo_count,
> -					  struct dma_fence *fence)
> +static void panfrost_attach_object_fences(struct panfrost_job *job)
>  {
>  	int i;
>  
> -	for (i = 0; i < bo_count; i++)
> -		dma_resv_add_excl_fence(bos[i]->resv, fence);
> +	for (i = 0; i < job->bo_count; i++) {
> +		struct drm_gem_object *obj = job->bos[i].obj;
> +
> +		if (job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)
> +			dma_resv_add_excl_fence(obj->resv,
> +						job->render_done_fence);
> +		else
> +			dma_resv_add_shared_fence(obj->resv,
> +						  job->render_done_fence);
> +	}
> +}
> +
> +static int panfrost_job_lock_bos(struct panfrost_job *job,
> +				 struct ww_acquire_ctx *acquire_ctx)
> +{
> +	int contended = -1;
> +	int i, ret;
> +
> +	ww_acquire_init(acquire_ctx, &reservation_ww_class);
> +
> +retry:
> +	if (contended != -1) {
> +		struct drm_gem_object *obj = job->bos[contended].obj;
> +
> +		ret = ww_mutex_lock_slow_interruptible(&obj->resv->lock,
> +						       acquire_ctx);

dma_resv_lock_slot_interruptible()?

> +		if (ret) {
> +			ww_acquire_done(acquire_ctx);
> +			return ret;
> +		}
> +	}
> +
> +	for (i = 0; i < job->bo_count; i++) {
> +		if (i == contended)
> +			continue;
> +
> +		ret = ww_mutex_lock_interruptible(&job->bos[i].obj->resv->lock,
> +						  acquire_ctx);

dma_resv_lock_interruptible()?

> +		if (ret) {
> +			int j;
> +
> +			for (j = 0; j < i; j++)
> +				ww_mutex_unlock(&job->bos[j].obj->resv->lock);
> +
> +			if (contended != -1 && contended >= i) {
> +				struct drm_gem_object *contended_obj;
> +
> +				contended_obj = job->bos[contended].obj;
> +				ww_mutex_unlock(&contended_obj->resv->lock);
> +			}
> +
> +			if (ret == -EDEADLK) {
> +				contended = i;
> +				goto retry;
> +			}
> +
> +			ww_acquire_done(acquire_ctx);
> +			return ret;
> +		}
> +	}
> +
> +	ww_acquire_done(acquire_ctx);
> +
> +	return 0;
> +}

This looks like a copy of drm_gem_lock_reservations(). The only reason
for it as far as I can see is because we now have an array of struct
panfrost_job_bo_desc rather than a direct array of struct
drm_gem_object. I'm not sure having everything neatly in one structure
is worth this cost?

> +
> +static void panfrost_job_unlock_bos(struct panfrost_job *job,
> +				    struct ww_acquire_ctx *acquire_ctx)
> +{
> +	int i;
> +
> +	for (i = 0; i < job->bo_count; i++)
> +		ww_mutex_unlock(&job->bos[i].obj->resv->lock);
> +
> +	ww_acquire_fini(acquire_ctx);
>  }>
>  int panfrost_job_push(struct panfrost_job *job)
> @@ -223,8 +315,7 @@ int panfrost_job_push(struct panfrost_job *job)
>  
>  	mutex_lock(&pfdev->sched_lock);
>  
> -	ret = drm_gem_lock_reservations(job->bos, job->bo_count,
> -					    &acquire_ctx);
> +	ret = panfrost_job_lock_bos(job, &acquire_ctx);
>  	if (ret) {
>  		mutex_unlock(&pfdev->sched_lock);
>  		return ret;
> @@ -240,18 +331,16 @@ int panfrost_job_push(struct panfrost_job *job)
>  
>  	kref_get(&job->refcount); /* put by scheduler job completion */
>  
> -	panfrost_acquire_object_fences(job->bos, job->bo_count,
> -				       job->implicit_fences);
> +	panfrost_acquire_object_fences(job);
>  
>  	drm_sched_entity_push_job(&job->base, entity);
>  
>  	mutex_unlock(&pfdev->sched_lock);
>  
> -	panfrost_attach_object_fences(job->bos, job->bo_count,
> -				      job->render_done_fence);
> +	panfrost_attach_object_fences(job);
>  
>  unlock:
> -	drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
> +	panfrost_job_unlock_bos(job, &acquire_ctx);
>  
>  	return ret;
>  }
> @@ -267,20 +356,22 @@ static void panfrost_job_cleanup(struct kref *ref)
>  			dma_fence_put(job->in_fences[i]);
>  		kvfree(job->in_fences);
>  	}
> -	if (job->implicit_fences) {
> -		for (i = 0; i < job->bo_count; i++)
> -			dma_fence_put(job->implicit_fences[i]);
> -		kvfree(job->implicit_fences);
> +
> +	for (i = 0; i < job->bo_count; i++) {
> +		unsigned int j;
> +
> +		dma_fence_put(job->bos[i].excl);
> +		for (j = 0; j < job->bos[i].shared_count; j++)
> +			dma_fence_put(job->bos[i].shared[j]);
>  	}
> +
>  	dma_fence_put(job->done_fence);
>  	dma_fence_put(job->render_done_fence);
>  
> -	if (job->bos) {
> -		for (i = 0; i < job->bo_count; i++)
> -			drm_gem_object_put_unlocked(job->bos[i]);
> -		kvfree(job->bos);
> -	}
> +	for (i = 0; i < job->bo_count; i++)
> +		drm_gem_object_put_unlocked(job->bos[i].obj);
>  
> +	kvfree(job->bos);
>  	kfree(job);
>  }
>  
> @@ -314,11 +405,22 @@ static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job
>  		}
>  	}
>  
> -	/* Implicit fences, max. one per BO */
> +	/* Implicit fences */
>  	for (i = 0; i < job->bo_count; i++) {
> -		if (job->implicit_fences[i]) {
> -			fence = job->implicit_fences[i];
> -			job->implicit_fences[i] = NULL;
> +		unsigned int j;
> +
> +		if (job->bos[i].excl) {
> +			fence = job->bos[i].excl;
> +			job->bos[i].excl = NULL;
> +			return fence;
> +		}
> +
> +		for (j = 0; j < job->bos[i].shared_count; j++) {
> +			if (!job->bos[i].shared[j])
> +				continue;
> +
> +			fence = job->bos[i].shared[j];
> +			job->bos[i].shared[j] = NULL;
>  			return fence;
>  		}
>  	}
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
> index 62454128a792..53440640a6e3 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
> @@ -11,6 +11,14 @@ struct panfrost_device;
>  struct panfrost_gem_object;
>  struct panfrost_file_priv;
>  
> +struct panfrost_job_bo_desc {
> +	struct drm_gem_object *obj;
> +	struct dma_fence *excl;
> +	struct dma_fence **shared;
> +	unsigned int shared_count;
> +	u32 flags;
> +};
> +
>  struct panfrost_job {
>  	struct drm_sched_job base;
>  
> @@ -31,8 +39,7 @@ struct panfrost_job {
>  	__u32 flush_id;
>  
>  	/* Exclusive fences we have taken from the BOs to wait for */

Comment needs updating

> -	struct dma_fence **implicit_fences;
> -	struct drm_gem_object **bos;
> +	struct panfrost_job_bo_desc *bos;
>  	u32 bo_count;
>  
>  	/* Fence to be signaled by drm-sched once its done with the job */
> diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
> index ec19db1eead8..029c6ae1b1f0 100644
> --- a/include/uapi/drm/panfrost_drm.h
> +++ b/include/uapi/drm/panfrost_drm.h
> @@ -40,6 +40,28 @@ extern "C" {
>  #define DRM_IOCTL_PANFROST_PERFCNT_DUMP		DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump)
>  
>  #define PANFROST_JD_REQ_FS (1 << 0)
> +
> +#define PANFROST_SUBMIT_BO_READ			(1 << 0)
> +#define PANFROST_SUBMIT_BO_WRITE		(1 << 1)
> +#define PANFROST_SUBMIT_BO_RW			(PANFROST_SUBMIT_BO_READ | \
> +						 PANFROST_SUBMIT_BO_WRITE)
> +#define PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE	(1 << 2)
> +#define PANFROST_SUBMIT_BO_VALID_FLAGS		\
> +	(PANFROST_SUBMIT_BO_RW | PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
> +
> +/**
> + * struct drm_panfrost_submit_bo - BO descriptor passed to the submit ioctl.
> + *
> + * Useful to give detailed information about BOs used by a given job.
> + *
> + * @handle: the GEM handle
> + * @flags: a combination of PANFROST_SUBMIT_BO_*
> + */
> +struct drm_panfrost_submit_bo {
> +	__u32 handle;
> +	__u32 flags;
> +};
> +
>  /**
>   * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D
>   * engine.
> @@ -68,6 +90,25 @@ struct drm_panfrost_submit {
>  
>  	/** A combination of PANFROST_JD_REQ_* */
>  	__u32 requirements;
> +
> +	/**
> +	 * Pointer to a u32 array of &drm_panfrost_submit_bo_desc objects. This
> +	 * field is meant to replace &drm_panfrost_submit.bo_handles which did
> +	 * not provide enough information to relax synchronization between
> +	 * jobs that only only read the BO they share. When both
> +	 * &drm_panfrost_submit.bo_handles and &drm_panfrost_submit.bo_descs
> +	 * are provided, drm_panfrost_submit.bo_handles is ignored.
> +	 */
> +	__u64 bo_descs;
> +
> +	/**
> +	 * Number of BO descriptors passed in (size is that times
> +	 * sizeof(drm_panfrost_submit_bo_desc)).
> +	 */
> +	__u32 bo_desc_count;

We don't really need another count field. bo_handle_count could be
re-used. Indeed this could even be handled with just a flags field with
a new flag specifying that bo_handles no longer points to handles but to
bo_desc objects instead.

Steve

> +
> +	/** Padding, must be 0. */
> +	__u32 pad;
>  };
>  
>  /**
> 

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/panfrost: Extend the bo_wait() ioctl
  2019-09-13 11:17 ` [PATCH 2/2] drm/panfrost: Extend the bo_wait() ioctl Boris Brezillon
@ 2019-09-13 13:46   ` Steven Price
  2019-09-16 12:55     ` Boris Brezillon
  0 siblings, 1 reply; 11+ messages in thread
From: Steven Price @ 2019-09-13 13:46 UTC (permalink / raw)
  To: Boris Brezillon, Rob Herring, Tomeu Vizoso, Alyssa Rosenzweig; +Cc: dri-devel

On 13/09/2019 12:17, Boris Brezillon wrote:
> So we can choose to wait for all BO users, or just for writers.
> 
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>

Looks good to me:

Reviewed-by: Steven Price <steven.price@arm.com>

Although I don't know if the term "writers" should be in the API or if
"exclusive" is the preferred term?

Steve

> ---
>  drivers/gpu/drm/panfrost/panfrost_drv.c | 8 ++++++--
>  include/uapi/drm/panfrost_drm.h         | 4 ++++
>  2 files changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 08082fd557c3..6a94aea2147f 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -322,16 +322,20 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data,
>  	struct drm_panfrost_wait_bo *args = data;
>  	struct drm_gem_object *gem_obj;
>  	unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
> +	bool wait_all = !(args->flags & PANFROST_WAIT_BO_WRITERS);
>  
>  	if (args->pad)
>  		return -EINVAL;
>  
> +	if (args->flags & ~PANFROST_WAIT_BO_WRITERS)
> +		return -EINVAL;
> +
>  	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
>  	if (!gem_obj)
>  		return -ENOENT;
>  
> -	ret = dma_resv_wait_timeout_rcu(gem_obj->resv, true,
> -						  true, timeout);
> +	ret = dma_resv_wait_timeout_rcu(gem_obj->resv, wait_all,
> +					true, timeout);
>  	if (!ret)
>  		ret = timeout ? -ETIMEDOUT : -EBUSY;
>  
> diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
> index 029c6ae1b1f0..ac4facbcee47 100644
> --- a/include/uapi/drm/panfrost_drm.h
> +++ b/include/uapi/drm/panfrost_drm.h
> @@ -111,6 +111,9 @@ struct drm_panfrost_submit {
>  	__u32 pad;
>  };
>  
> +#define PANFROST_WAIT_ALL_BO_USERS	(0 << 0)
> +#define PANFROST_WAIT_BO_WRITERS	(1 << 0)
> +
>  /**
>   * struct drm_panfrost_wait_bo - ioctl argument for waiting for
>   * completion of the last DRM_PANFROST_SUBMIT on a BO.
> @@ -123,6 +126,7 @@ struct drm_panfrost_wait_bo {
>  	__u32 handle;
>  	__u32 pad;
>  	__s64 timeout_ns;	/* absolute */
> +	__u64 flags;
>  };
>  
>  #define PANFROST_BO_NOEXEC	1
> 

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job
  2019-09-13 13:44 ` [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job Steven Price
@ 2019-09-16 12:11   ` Alyssa Rosenzweig
  2019-09-16 13:11   ` Boris Brezillon
  2019-09-16 22:28   ` Rob Herring
  2 siblings, 0 replies; 11+ messages in thread
From: Alyssa Rosenzweig @ 2019-09-16 12:11 UTC (permalink / raw)
  To: Steven Price; +Cc: dri-devel, Boris Brezillon, Rob Herring

> > +	/**
> > +	 * Pointer to a u32 array of &drm_panfrost_submit_bo_desc objects. This
> > +	 * field is meant to replace &drm_panfrost_submit.bo_handles which did
> > +	 * not provide enough information to relax synchronization between
> > +	 * jobs that only only read the BO they share. When both
> > +	 * &drm_panfrost_submit.bo_handles and &drm_panfrost_submit.bo_descs
> > +	 * are provided, drm_panfrost_submit.bo_handles is ignored.
> > +	 */
> > +	__u64 bo_descs;
> > +
> > +	/**
> > +	 * Number of BO descriptors passed in (size is that times
> > +	 * sizeof(drm_panfrost_submit_bo_desc)).
> > +	 */
> > +	__u32 bo_desc_count;
> 
> We don't really need another count field. bo_handle_count could be
> re-used. Indeed this could even be handled with just a flags field with
> a new flag specifying that bo_handles no longer points to handles but to
> bo_desc objects instead.

This seems like the cleaniest approach (also bumping the ABI version).
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/panfrost: Extend the bo_wait() ioctl
  2019-09-13 13:46   ` Steven Price
@ 2019-09-16 12:55     ` Boris Brezillon
  0 siblings, 0 replies; 11+ messages in thread
From: Boris Brezillon @ 2019-09-16 12:55 UTC (permalink / raw)
  To: Steven Price; +Cc: dri-devel, Rob Herring, Alyssa Rosenzweig

On Fri, 13 Sep 2019 14:46:46 +0100
Steven Price <steven.price@arm.com> wrote:

> On 13/09/2019 12:17, Boris Brezillon wrote:
> > So we can choose to wait for all BO users, or just for writers.
> > 
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>  
> 
> Looks good to me:
> 
> Reviewed-by: Steven Price <steven.price@arm.com>
> 
> Although I don't know if the term "writers" should be in the API or if
> "exclusive" is the preferred term?

I'll go for PANFROST_WAIT_BO_EXCLUSIVE then.

Thanks,

Boris

> 
> Steve
> 
> > ---
> >  drivers/gpu/drm/panfrost/panfrost_drv.c | 8 ++++++--
> >  include/uapi/drm/panfrost_drm.h         | 4 ++++
> >  2 files changed, 10 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index 08082fd557c3..6a94aea2147f 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -322,16 +322,20 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data,
> >  	struct drm_panfrost_wait_bo *args = data;
> >  	struct drm_gem_object *gem_obj;
> >  	unsigned long timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
> > +	bool wait_all = !(args->flags & PANFROST_WAIT_BO_WRITERS);
> >  
> >  	if (args->pad)
> >  		return -EINVAL;
> >  
> > +	if (args->flags & ~PANFROST_WAIT_BO_WRITERS)
> > +		return -EINVAL;
> > +
> >  	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
> >  	if (!gem_obj)
> >  		return -ENOENT;
> >  
> > -	ret = dma_resv_wait_timeout_rcu(gem_obj->resv, true,
> > -						  true, timeout);
> > +	ret = dma_resv_wait_timeout_rcu(gem_obj->resv, wait_all,
> > +					true, timeout);
> >  	if (!ret)
> >  		ret = timeout ? -ETIMEDOUT : -EBUSY;
> >  
> > diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
> > index 029c6ae1b1f0..ac4facbcee47 100644
> > --- a/include/uapi/drm/panfrost_drm.h
> > +++ b/include/uapi/drm/panfrost_drm.h
> > @@ -111,6 +111,9 @@ struct drm_panfrost_submit {
> >  	__u32 pad;
> >  };
> >  
> > +#define PANFROST_WAIT_ALL_BO_USERS	(0 << 0)
> > +#define PANFROST_WAIT_BO_WRITERS	(1 << 0)
> > +
> >  /**
> >   * struct drm_panfrost_wait_bo - ioctl argument for waiting for
> >   * completion of the last DRM_PANFROST_SUBMIT on a BO.
> > @@ -123,6 +126,7 @@ struct drm_panfrost_wait_bo {
> >  	__u32 handle;
> >  	__u32 pad;
> >  	__s64 timeout_ns;	/* absolute */
> > +	__u64 flags;
> >  };
> >  
> >  #define PANFROST_BO_NOEXEC	1
> >   
> 

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job
  2019-09-13 13:44 ` [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job Steven Price
  2019-09-16 12:11   ` Alyssa Rosenzweig
@ 2019-09-16 13:11   ` Boris Brezillon
  2019-09-16 22:28   ` Rob Herring
  2 siblings, 0 replies; 11+ messages in thread
From: Boris Brezillon @ 2019-09-16 13:11 UTC (permalink / raw)
  To: Steven Price; +Cc: Rob Herring, dri-devel, Alyssa Rosenzweig

On Fri, 13 Sep 2019 14:44:14 +0100
Steven Price <steven.price@arm.com> wrote:

> On 13/09/2019 12:17, Boris Brezillon wrote:
> > The READ/WRITE flags are particularly useful if we want to avoid
> > serialization of jobs that read from the same BO but never write to it.
> > The NO_IMPLICIT_FENCE might be useful when the user knows the BO is
> > shared but jobs are using different portions of the buffer.
> > 
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>  
> 
> Good feature - we could do with an (easy) way of the user driver
> detecting this - so it might be worth bumping the driver version for this?

I was trying to support this feature without adding a new feature
flag or bumping the minor version, but I guess there's no good
reason to do that.

> 
> Some more comments below.
> 
> > ---
> >  drivers/gpu/drm/panfrost/panfrost_drv.c |  72 +++++++++--
> >  drivers/gpu/drm/panfrost/panfrost_job.c | 164 +++++++++++++++++++-----
> >  drivers/gpu/drm/panfrost/panfrost_job.h |  11 +-
> >  include/uapi/drm/panfrost_drm.h         |  41 ++++++
> >  4 files changed, 247 insertions(+), 41 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index d74442d71048..08082fd557c3 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -119,20 +119,76 @@ panfrost_lookup_bos(struct drm_device *dev,
> >  		  struct drm_panfrost_submit *args,
> >  		  struct panfrost_job *job)
> >  {
> > -	job->bo_count = args->bo_handle_count;
> > +	struct drm_panfrost_submit_bo *bo_descs = NULL;
> > +	u32 *handles = NULL;
> > +	u32 i, bo_count;
> > +	int ret = 0;
> >  
> > -	if (!job->bo_count)
> > +	bo_count = args->bo_desc_count ?
> > +		   args->bo_desc_count : args->bo_handle_count;
> > +	if (!bo_count)
> >  		return 0;
> >  
> > -	job->implicit_fences = kvmalloc_array(job->bo_count,
> > -				  sizeof(struct dma_fence *),
> > +	job->bos = kvmalloc_array(bo_count, sizeof(*job->bos),
> >  				  GFP_KERNEL | __GFP_ZERO);
> > -	if (!job->implicit_fences)
> > +	if (!job->bos)
> >  		return -ENOMEM;
> >  
> > -	return drm_gem_objects_lookup(file_priv,
> > -				      (void __user *)(uintptr_t)args->bo_handles,
> > -				      job->bo_count, &job->bos);
> > +	job->bo_count = bo_count;
> > +	bo_descs = kvmalloc_array(bo_count, sizeof(*bo_descs),
> > +				  GFP_KERNEL | __GFP_ZERO);
> > +	if (!bo_descs) {
> > +		ret = -ENOMEM;
> > +		goto out;  
> 
> This can be just "return -ENOMEM" - both handles and bo_descs will be NULL.

Will fix that.

> 
> > +	}
> > +
> > +	if (!args->bo_desc_count) {
> > +		handles = kvmalloc_array(bo_count, sizeof(*handles),
> > +					 GFP_KERNEL);
> > +		if (!handles) {
> > +			ret =-ENOMEM;
> > +			goto out;
> > +		}
> > +
> > +		if (copy_from_user(handles,
> > +				   (void __user *)(uintptr_t)args->bo_handles,
> > +				   job->bo_count * sizeof(*handles))) {
> > +			ret = -EFAULT;
> > +			goto out;
> > +		}
> > +
> > +		for (i = 0; i < job->bo_count; i++) {
> > +			bo_descs[i].handle = handles[i];
> > +			bo_descs[i].flags = PANFROST_SUBMIT_BO_WRITE |
> > +					    PANFROST_SUBMIT_BO_READ;  
> 
> You can use PANFROST_SUBMIT_BO_RW here.

That as well.

> 
> > +		}
> > +	} else if (copy_from_user(bo_descs,
> > +				  (void __user *)(uintptr_t)args->bo_descs,
> > +				  job->bo_count * sizeof(*bo_descs))) {
> > +		ret = -EFAULT;
> > +		goto out;
> > +	}
> > +
> > +	for (i = 0; i < job->bo_count; i++) {
> > +		if ((bo_descs[i].flags & ~PANFROST_SUBMIT_BO_VALID_FLAGS) ||
> > +                    !(bo_descs[i].flags & PANFROST_SUBMIT_BO_RW)) {
> > +			ret = -EINVAL;
> > +			goto out;
> > +		}
> > +
> > +		job->bos[i].flags = bo_descs[i].flags;
> > +		job->bos[i].obj = drm_gem_object_lookup(file_priv,
> > +							bo_descs[i].handle);
> > +		if (!job->bos[i].obj) {
> > +			ret = -ENOENT;
> > +			goto out;
> > +		}
> > +	}
> > +
> > +out:
> > +	kvfree(handles);
> > +	kvfree(bo_descs);
> > +	return ret;
> >  }
> >  
> >  /**
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> > index 05c85f45a0de..e4b74fde9339 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > @@ -193,24 +193,116 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
> >  	pm_runtime_put_autosuspend(pfdev->dev);
> >  }
> >  
> > -static void panfrost_acquire_object_fences(struct drm_gem_object **bos,
> > -					   int bo_count,
> > -					   struct dma_fence **implicit_fences)
> > +static int panfrost_acquire_object_fences(struct panfrost_job *job)
> >  {
> > -	int i;
> > +	int i, ret;
> >  
> > -	for (i = 0; i < bo_count; i++)
> > -		implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv);
> > +	for (i = 0; i < job->bo_count; i++) {
> > +		struct panfrost_job_bo_desc *bo = &job->bos[i];
> > +		struct dma_resv *robj = bo->obj->resv;
> > +
> > +		if (!(job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)) {
> > +			ret = dma_resv_reserve_shared(robj, 1);
> > +			if (ret)
> > +				return ret;
> > +		}
> > +
> > +		if (bo->flags & PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
> > +			continue;
> > +
> > +		if (bo->flags & PANFROST_SUBMIT_BO_WRITE) {
> > +			ret = dma_resv_get_fences_rcu(robj, &bo->excl,
> > +						      &bo->shared_count,
> > +						      &bo->shared);
> > +			if (ret)
> > +				return ret;
> > +		} else {
> > +			bo->excl = dma_resv_get_excl_rcu(robj);
> > +		}  
> 
> The implementation of NO_IMPLICIT_FENCE seems a bit strange to me: READ
> | NO_IMPLICIT_FENCE still reserves space for a shared fence. I don't
> understand why.

The NO_IMPLICIT_FENCE flag is telling the core we don't want to wait on
the fence installed by other jobs on this BO, but other might want to
wait on our render-done fence (when CPU accesses are required, or simply
because other jobs didn't pass the NO_IMPLICIT fence flag).

> 
> > +	}
> > +
> > +	return 0;
> >  }
> >  
> > -static void panfrost_attach_object_fences(struct drm_gem_object **bos,
> > -					  int bo_count,
> > -					  struct dma_fence *fence)
> > +static void panfrost_attach_object_fences(struct panfrost_job *job)
> >  {
> >  	int i;
> >  
> > -	for (i = 0; i < bo_count; i++)
> > -		dma_resv_add_excl_fence(bos[i]->resv, fence);
> > +	for (i = 0; i < job->bo_count; i++) {
> > +		struct drm_gem_object *obj = job->bos[i].obj;
> > +
> > +		if (job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)
> > +			dma_resv_add_excl_fence(obj->resv,
> > +						job->render_done_fence);
> > +		else
> > +			dma_resv_add_shared_fence(obj->resv,
> > +						  job->render_done_fence);
> > +	}
> > +}
> > +
> > +static int panfrost_job_lock_bos(struct panfrost_job *job,
> > +				 struct ww_acquire_ctx *acquire_ctx)
> > +{
> > +	int contended = -1;
> > +	int i, ret;
> > +
> > +	ww_acquire_init(acquire_ctx, &reservation_ww_class);
> > +
> > +retry:
> > +	if (contended != -1) {
> > +		struct drm_gem_object *obj = job->bos[contended].obj;
> > +
> > +		ret = ww_mutex_lock_slow_interruptible(&obj->resv->lock,
> > +						       acquire_ctx);  
> 
> dma_resv_lock_slot_interruptible()?

Yep (I started this on an older kernel version where dma_resv_ helpers
didn't exit and apparently forgot to convert a few things when
rebasing).

> 
> > +		if (ret) {
> > +			ww_acquire_done(acquire_ctx);
> > +			return ret;
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < job->bo_count; i++) {
> > +		if (i == contended)
> > +			continue;
> > +
> > +		ret = ww_mutex_lock_interruptible(&job->bos[i].obj->resv->lock,
> > +						  acquire_ctx);  
> 
> dma_resv_lock_interruptible()?

Ditto

> 
> > +		if (ret) {
> > +			int j;
> > +
> > +			for (j = 0; j < i; j++)
> > +				ww_mutex_unlock(&job->bos[j].obj->resv->lock);
> > +
> > +			if (contended != -1 && contended >= i) {
> > +				struct drm_gem_object *contended_obj;
> > +
> > +				contended_obj = job->bos[contended].obj;
> > +				ww_mutex_unlock(&contended_obj->resv->lock);
> > +			}
> > +
> > +			if (ret == -EDEADLK) {
> > +				contended = i;
> > +				goto retry;
> > +			}
> > +
> > +			ww_acquire_done(acquire_ctx);
> > +			return ret;
> > +		}
> > +	}
> > +
> > +	ww_acquire_done(acquire_ctx);
> > +
> > +	return 0;
> > +}  
> 
> This looks like a copy of drm_gem_lock_reservations(). The only reason
> for it as far as I can see is because we now have an array of struct
> panfrost_job_bo_desc rather than a direct array of struct
> drm_gem_object. I'm not sure having everything neatly in one structure
> is worth this cost?

I really like the ideas of having all BO related fields put in a
separate structure, but okay.


> >  /**
> >   * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D
> >   * engine.
> > @@ -68,6 +90,25 @@ struct drm_panfrost_submit {
> >  
> >  	/** A combination of PANFROST_JD_REQ_* */
> >  	__u32 requirements;
> > +
> > +	/**
> > +	 * Pointer to a u32 array of &drm_panfrost_submit_bo_desc objects. This
> > +	 * field is meant to replace &drm_panfrost_submit.bo_handles which did
> > +	 * not provide enough information to relax synchronization between
> > +	 * jobs that only only read the BO they share. When both
> > +	 * &drm_panfrost_submit.bo_handles and &drm_panfrost_submit.bo_descs
> > +	 * are provided, drm_panfrost_submit.bo_handles is ignored.
> > +	 */
> > +	__u64 bo_descs;
> > +
> > +	/**
> > +	 * Number of BO descriptors passed in (size is that times
> > +	 * sizeof(drm_panfrost_submit_bo_desc)).
> > +	 */
> > +	__u32 bo_desc_count;  
> 
> We don't really need another count field. bo_handle_count could be
> re-used. Indeed this could even be handled with just a flags field with
> a new flag specifying that bo_handles no longer points to handles but to
> bo_desc objects instead.

As said above, I was trying to avoid bumping the minor version or
adding a feature flag, hence the decision to add 2 separate fields for
the BO desc array. I agree, it's probably not the best solution, so
I'll add a new _REQ_ flag and bump the minor version as you suggest.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job
  2019-09-13 11:17 [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job Boris Brezillon
  2019-09-13 11:17 ` [PATCH 2/2] drm/panfrost: Extend the bo_wait() ioctl Boris Brezillon
  2019-09-13 13:44 ` [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job Steven Price
@ 2019-09-16 22:20 ` Rob Herring
  2019-09-17  7:15   ` Boris Brezillon
  2 siblings, 1 reply; 11+ messages in thread
From: Rob Herring @ 2019-09-16 22:20 UTC (permalink / raw)
  To: Boris Brezillon; +Cc: dri-devel, Alyssa Rosenzweig, Steven Price

On Fri, Sep 13, 2019 at 6:17 AM Boris Brezillon
<boris.brezillon@collabora.com> wrote:
>
> The READ/WRITE flags are particularly useful if we want to avoid
> serialization of jobs that read from the same BO but never write to it.

Any data on performance differences?

> The NO_IMPLICIT_FENCE might be useful when the user knows the BO is
> shared but jobs are using different portions of the buffer.

Why don't we add this when it is useful rather than might be?

>
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job
  2019-09-13 13:44 ` [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job Steven Price
  2019-09-16 12:11   ` Alyssa Rosenzweig
  2019-09-16 13:11   ` Boris Brezillon
@ 2019-09-16 22:28   ` Rob Herring
  2 siblings, 0 replies; 11+ messages in thread
From: Rob Herring @ 2019-09-16 22:28 UTC (permalink / raw)
  To: Steven Price; +Cc: dri-devel, Boris Brezillon, Alyssa Rosenzweig

On Fri, Sep 13, 2019 at 8:44 AM Steven Price <steven.price@arm.com> wrote:
>
> On 13/09/2019 12:17, Boris Brezillon wrote:
> > The READ/WRITE flags are particularly useful if we want to avoid
> > serialization of jobs that read from the same BO but never write to it.
> > The NO_IMPLICIT_FENCE might be useful when the user knows the BO is
> > shared but jobs are using different portions of the buffer.
> >
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
>
> Good feature - we could do with an (easy) way of the user driver
> detecting this - so it might be worth bumping the driver version for this?
>
> Some more comments below.
>
> > ---
> >  drivers/gpu/drm/panfrost/panfrost_drv.c |  72 +++++++++--
> >  drivers/gpu/drm/panfrost/panfrost_job.c | 164 +++++++++++++++++++-----
> >  drivers/gpu/drm/panfrost/panfrost_job.h |  11 +-
> >  include/uapi/drm/panfrost_drm.h         |  41 ++++++
> >  4 files changed, 247 insertions(+), 41 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index d74442d71048..08082fd557c3 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -119,20 +119,76 @@ panfrost_lookup_bos(struct drm_device *dev,
> >                 struct drm_panfrost_submit *args,
> >                 struct panfrost_job *job)
> >  {
> > -     job->bo_count = args->bo_handle_count;
> > +     struct drm_panfrost_submit_bo *bo_descs = NULL;
> > +     u32 *handles = NULL;
> > +     u32 i, bo_count;
> > +     int ret = 0;
> >
> > -     if (!job->bo_count)
> > +     bo_count = args->bo_desc_count ?
> > +                args->bo_desc_count : args->bo_handle_count;
> > +     if (!bo_count)
> >               return 0;
> >
> > -     job->implicit_fences = kvmalloc_array(job->bo_count,
> > -                               sizeof(struct dma_fence *),
> > +     job->bos = kvmalloc_array(bo_count, sizeof(*job->bos),
> >                                 GFP_KERNEL | __GFP_ZERO);
> > -     if (!job->implicit_fences)
> > +     if (!job->bos)
> >               return -ENOMEM;
> >
> > -     return drm_gem_objects_lookup(file_priv,
> > -                                   (void __user *)(uintptr_t)args->bo_handles,
> > -                                   job->bo_count, &job->bos);
> > +     job->bo_count = bo_count;
> > +     bo_descs = kvmalloc_array(bo_count, sizeof(*bo_descs),
> > +                               GFP_KERNEL | __GFP_ZERO);
> > +     if (!bo_descs) {
> > +             ret = -ENOMEM;
> > +             goto out;
>
> This can be just "return -ENOMEM" - both handles and bo_descs will be NULL.
>
> > +     }
> > +
> > +     if (!args->bo_desc_count) {
> > +             handles = kvmalloc_array(bo_count, sizeof(*handles),
> > +                                      GFP_KERNEL);
> > +             if (!handles) {
> > +                     ret =-ENOMEM;
> > +                     goto out;
> > +             }
> > +
> > +             if (copy_from_user(handles,
> > +                                (void __user *)(uintptr_t)args->bo_handles,
> > +                                job->bo_count * sizeof(*handles))) {
> > +                     ret = -EFAULT;
> > +                     goto out;
> > +             }
> > +
> > +             for (i = 0; i < job->bo_count; i++) {
> > +                     bo_descs[i].handle = handles[i];
> > +                     bo_descs[i].flags = PANFROST_SUBMIT_BO_WRITE |
> > +                                         PANFROST_SUBMIT_BO_READ;
>
> You can use PANFROST_SUBMIT_BO_RW here.
>
> > +             }
> > +     } else if (copy_from_user(bo_descs,
> > +                               (void __user *)(uintptr_t)args->bo_descs,
> > +                               job->bo_count * sizeof(*bo_descs))) {
> > +             ret = -EFAULT;
> > +             goto out;
> > +     }
> > +
> > +     for (i = 0; i < job->bo_count; i++) {
> > +             if ((bo_descs[i].flags & ~PANFROST_SUBMIT_BO_VALID_FLAGS) ||
> > +                    !(bo_descs[i].flags & PANFROST_SUBMIT_BO_RW)) {
> > +                     ret = -EINVAL;
> > +                     goto out;
> > +             }
> > +
> > +             job->bos[i].flags = bo_descs[i].flags;
> > +             job->bos[i].obj = drm_gem_object_lookup(file_priv,
> > +                                                     bo_descs[i].handle);
> > +             if (!job->bos[i].obj) {
> > +                     ret = -ENOENT;
> > +                     goto out;
> > +             }
> > +     }
> > +
> > +out:
> > +     kvfree(handles);
> > +     kvfree(bo_descs);
> > +     return ret;
> >  }
> >
> >  /**
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> > index 05c85f45a0de..e4b74fde9339 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > @@ -193,24 +193,116 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
> >       pm_runtime_put_autosuspend(pfdev->dev);
> >  }
> >
> > -static void panfrost_acquire_object_fences(struct drm_gem_object **bos,
> > -                                        int bo_count,
> > -                                        struct dma_fence **implicit_fences)
> > +static int panfrost_acquire_object_fences(struct panfrost_job *job)
> >  {
> > -     int i;
> > +     int i, ret;
> >
> > -     for (i = 0; i < bo_count; i++)
> > -             implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv);
> > +     for (i = 0; i < job->bo_count; i++) {
> > +             struct panfrost_job_bo_desc *bo = &job->bos[i];
> > +             struct dma_resv *robj = bo->obj->resv;
> > +
> > +             if (!(job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)) {
> > +                     ret = dma_resv_reserve_shared(robj, 1);
> > +                     if (ret)
> > +                             return ret;
> > +             }
> > +
> > +             if (bo->flags & PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE)
> > +                     continue;
> > +
> > +             if (bo->flags & PANFROST_SUBMIT_BO_WRITE) {
> > +                     ret = dma_resv_get_fences_rcu(robj, &bo->excl,
> > +                                                   &bo->shared_count,
> > +                                                   &bo->shared);
> > +                     if (ret)
> > +                             return ret;
> > +             } else {
> > +                     bo->excl = dma_resv_get_excl_rcu(robj);
> > +             }
>
> The implementation of NO_IMPLICIT_FENCE seems a bit strange to me: READ
> | NO_IMPLICIT_FENCE still reserves space for a shared fence. I don't
> understand why.
>
> > +     }
> > +
> > +     return 0;
> >  }
> >
> > -static void panfrost_attach_object_fences(struct drm_gem_object **bos,
> > -                                       int bo_count,
> > -                                       struct dma_fence *fence)
> > +static void panfrost_attach_object_fences(struct panfrost_job *job)
> >  {
> >       int i;
> >
> > -     for (i = 0; i < bo_count; i++)
> > -             dma_resv_add_excl_fence(bos[i]->resv, fence);
> > +     for (i = 0; i < job->bo_count; i++) {
> > +             struct drm_gem_object *obj = job->bos[i].obj;
> > +
> > +             if (job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)
> > +                     dma_resv_add_excl_fence(obj->resv,
> > +                                             job->render_done_fence);
> > +             else
> > +                     dma_resv_add_shared_fence(obj->resv,
> > +                                               job->render_done_fence);
> > +     }
> > +}
> > +
> > +static int panfrost_job_lock_bos(struct panfrost_job *job,
> > +                              struct ww_acquire_ctx *acquire_ctx)
> > +{
> > +     int contended = -1;
> > +     int i, ret;
> > +
> > +     ww_acquire_init(acquire_ctx, &reservation_ww_class);
> > +
> > +retry:
> > +     if (contended != -1) {
> > +             struct drm_gem_object *obj = job->bos[contended].obj;
> > +
> > +             ret = ww_mutex_lock_slow_interruptible(&obj->resv->lock,
> > +                                                    acquire_ctx);
>
> dma_resv_lock_slot_interruptible()?
>
> > +             if (ret) {
> > +                     ww_acquire_done(acquire_ctx);
> > +                     return ret;
> > +             }
> > +     }
> > +
> > +     for (i = 0; i < job->bo_count; i++) {
> > +             if (i == contended)
> > +                     continue;
> > +
> > +             ret = ww_mutex_lock_interruptible(&job->bos[i].obj->resv->lock,
> > +                                               acquire_ctx);
>
> dma_resv_lock_interruptible()?
>
> > +             if (ret) {
> > +                     int j;
> > +
> > +                     for (j = 0; j < i; j++)
> > +                             ww_mutex_unlock(&job->bos[j].obj->resv->lock);
> > +
> > +                     if (contended != -1 && contended >= i) {
> > +                             struct drm_gem_object *contended_obj;
> > +
> > +                             contended_obj = job->bos[contended].obj;
> > +                             ww_mutex_unlock(&contended_obj->resv->lock);
> > +                     }
> > +
> > +                     if (ret == -EDEADLK) {
> > +                             contended = i;
> > +                             goto retry;
> > +                     }
> > +
> > +                     ww_acquire_done(acquire_ctx);
> > +                     return ret;
> > +             }
> > +     }
> > +
> > +     ww_acquire_done(acquire_ctx);
> > +
> > +     return 0;
> > +}
>
> This looks like a copy of drm_gem_lock_reservations(). The only reason
> for it as far as I can see is because we now have an array of struct
> panfrost_job_bo_desc rather than a direct array of struct
> drm_gem_object. I'm not sure having everything neatly in one structure
> is worth this cost?

I'm not thrilled about this either. If not a separate array, we could
change the common code to work on a common struct instead.

To put it another way, this is all copy-n-paste from elsewhere that I
don't really understand and want to maintain in the driver.

Rob
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job
  2019-09-16 22:20 ` Rob Herring
@ 2019-09-17  7:15   ` Boris Brezillon
  2019-09-23 13:41     ` Steven Price
  0 siblings, 1 reply; 11+ messages in thread
From: Boris Brezillon @ 2019-09-17  7:15 UTC (permalink / raw)
  To: Rob Herring; +Cc: dri-devel, Alyssa Rosenzweig, Steven Price

On Mon, 16 Sep 2019 17:20:28 -0500
Rob Herring <robh@kernel.org> wrote:

> On Fri, Sep 13, 2019 at 6:17 AM Boris Brezillon
> <boris.brezillon@collabora.com> wrote:
> >
> > The READ/WRITE flags are particularly useful if we want to avoid
> > serialization of jobs that read from the same BO but never write to it.  
> 
> Any data on performance differences?

Unfortunately no. When I initially added support for BO flags I thought
it would fix a regression I had on one glmark2 test (ideas), but the
problem ended up being something completely different (overhead of
calling ioctl(WAIT_BO) on already idle BOs).

I just ran glmark2 again, and there doesn't seem to be a noticeable
improvement with those 2 patches applied (and mesa patched to use the
new flags). This being said, the improvement is likely to be workload
dependent, so I wouldn't consider these patches as useless, but I'm
fine putting them on hold until we see a real need.

Maybe Steven has some real use cases that could help outline the
benefit of these patches.

> 
> > The NO_IMPLICIT_FENCE might be useful when the user knows the BO is
> > shared but jobs are using different portions of the buffer.  
> 
> Why don't we add this when it is useful rather than might be?

I don't have a need for that one yet, but etanviv has it in place so I
thought I'd add both at the same time.
Note that it could help us reduce the number of fence returned by
panfrost_job_dependency(), but I'm not sure it makes a difference in
terms of perfs.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job
  2019-09-17  7:15   ` Boris Brezillon
@ 2019-09-23 13:41     ` Steven Price
  0 siblings, 0 replies; 11+ messages in thread
From: Steven Price @ 2019-09-23 13:41 UTC (permalink / raw)
  To: Boris Brezillon, Rob Herring; +Cc: Alyssa Rosenzweig, dri-devel

On 17/09/2019 08:15, Boris Brezillon wrote:
> On Mon, 16 Sep 2019 17:20:28 -0500
> Rob Herring <robh@kernel.org> wrote:
> 
>> On Fri, Sep 13, 2019 at 6:17 AM Boris Brezillon
>> <boris.brezillon@collabora.com> wrote:
>>>
>>> The READ/WRITE flags are particularly useful if we want to avoid
>>> serialization of jobs that read from the same BO but never write to it.  
>>
>> Any data on performance differences?
> 
> Unfortunately no. When I initially added support for BO flags I thought
> it would fix a regression I had on one glmark2 test (ideas), but the
> problem ended up being something completely different (overhead of
> calling ioctl(WAIT_BO) on already idle BOs).
> 
> I just ran glmark2 again, and there doesn't seem to be a noticeable
> improvement with those 2 patches applied (and mesa patched to use the
> new flags). This being said, the improvement is likely to be workload
> dependent, so I wouldn't consider these patches as useless, but I'm
> fine putting them on hold until we see a real need.
> 
> Maybe Steven has some real use cases that could help outline the
> benefit of these patches.

To be honest I don't really know. The DDK internally does track this
read/write information - but then it doesn't involve the kernel in
tracking it. I was presuming that Mesa (because it exports the buffer
usage to the kernel) would benefit from being able to have multiple
readers - for example of a buffer being used as a texture for multiple
render passes. It's possible we don't see this benefit because we
haven't got the queuing of jobs merged yet?

There might also be some benefits when it comes to interaction with
other drivers, but I don't have any concrete examples.

>>
>>> The NO_IMPLICIT_FENCE might be useful when the user knows the BO is
>>> shared but jobs are using different portions of the buffer.  
>>
>> Why don't we add this when it is useful rather than might be?
> 
> I don't have a need for that one yet, but etanviv has it in place so I
> thought I'd add both at the same time.
> Note that it could help us reduce the number of fence returned by
> panfrost_job_dependency(), but I'm not sure it makes a difference in
> terms of perfs.

I'm not aware of any reason for NO_IMPLICIT_FENCE. I found it somewhat
odd that it is effectively one way (it doesn't wait for fences, but
others could be waiting on the fence added by this usage). If we don't
have a use yet in Mesa then it's probably best to wait until we know how
this is actually going to be used.

There is of course already the option of simply omitting the BO in the
job to prevent any dependencies being created :)

Steve
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-09-23 13:41 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-13 11:17 [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job Boris Brezillon
2019-09-13 11:17 ` [PATCH 2/2] drm/panfrost: Extend the bo_wait() ioctl Boris Brezillon
2019-09-13 13:46   ` Steven Price
2019-09-16 12:55     ` Boris Brezillon
2019-09-13 13:44 ` [PATCH 1/2] drm/panfrost: Allow passing extra information about BOs used by a job Steven Price
2019-09-16 12:11   ` Alyssa Rosenzweig
2019-09-16 13:11   ` Boris Brezillon
2019-09-16 22:28   ` Rob Herring
2019-09-16 22:20 ` Rob Herring
2019-09-17  7:15   ` Boris Brezillon
2019-09-23 13:41     ` Steven Price

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.