All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] drm/msm: Un-break multi-context gl
@ 2021-10-01 17:58 Rob Clark
  2021-10-01 17:58 ` [PATCH 1/2] drm/msm: A bit more docs + cleanup Rob Clark
  2021-10-01 17:58 ` [PATCH 2/2] drm/msm: One sched entity per process per priority Rob Clark
  0 siblings, 2 replies; 3+ messages in thread
From: Rob Clark @ 2021-10-01 17:58 UTC (permalink / raw)
  To: dri-devel
  Cc: freedreno, linux-arm-msm, Jordan Crouse, Akhil P Oommen,
	Rob Clark, open list

From: Rob Clark <robdclark@chromium.org>

Userspace is expecting that a single thread doing rendering against
multiple contexts does not need additional synchronization between those
contexts beyond ensuring work is flushed to the kernel in the correct
order.  But if we have a sched-entity per-context, and are not using
implicit sync, GPU jobs from different contexts can execute in a
different order than they were flushed to the kernel.

To solve that, share sched-entities for a given priority level between
submitqueues (which map to gl contexts).

Rob Clark (2):
  drm/msm: A bit more docs + cleanup
  drm/msm: One sched entity per process per priority

 drivers/gpu/drm/msm/msm_drv.h         | 44 -----------------
 drivers/gpu/drm/msm/msm_gem_submit.c  |  2 +-
 drivers/gpu/drm/msm/msm_gpu.h         | 66 +++++++++++++++++++++++++-
 drivers/gpu/drm/msm/msm_submitqueue.c | 68 +++++++++++++++++++++++----
 4 files changed, 123 insertions(+), 57 deletions(-)

-- 
2.31.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] drm/msm: A bit more docs + cleanup
  2021-10-01 17:58 [PATCH 0/2] drm/msm: Un-break multi-context gl Rob Clark
@ 2021-10-01 17:58 ` Rob Clark
  2021-10-01 17:58 ` [PATCH 2/2] drm/msm: One sched entity per process per priority Rob Clark
  1 sibling, 0 replies; 3+ messages in thread
From: Rob Clark @ 2021-10-01 17:58 UTC (permalink / raw)
  To: dri-devel
  Cc: freedreno, linux-arm-msm, Jordan Crouse, Akhil P Oommen,
	Rob Clark, Rob Clark, Sean Paul, David Airlie, Daniel Vetter,
	open list

From: Rob Clark <robdclark@chromium.org>

msm_file_private is more gpu related, and in the next commit it will
need access to other GPU specific #defines.  While we're at it, add
some comments.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_drv.h | 44 --------------------------
 drivers/gpu/drm/msm/msm_gpu.h | 58 ++++++++++++++++++++++++++++++++++-
 2 files changed, 57 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 8633d0059a3e..31b39c27156d 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -53,15 +53,6 @@ struct msm_disp_state;
 
 #define FRAC_16_16(mult, div)    (((mult) << 16) / (div))
 
-struct msm_file_private {
-	rwlock_t queuelock;
-	struct list_head submitqueues;
-	int queueid;
-	struct msm_gem_address_space *aspace;
-	struct kref ref;
-	int seqno;
-};
-
 enum msm_mdp_plane_property {
 	PLANE_PROP_ZPOS,
 	PLANE_PROP_ALPHA,
@@ -511,41 +502,6 @@ void msm_hrtimer_work_init(struct msm_hrtimer_work *work,
 			   clockid_t clock_id,
 			   enum hrtimer_mode mode);
 
-struct msm_gpu_submitqueue;
-int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
-struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
-		u32 id);
-int msm_submitqueue_create(struct drm_device *drm,
-		struct msm_file_private *ctx,
-		u32 prio, u32 flags, u32 *id);
-int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
-		struct drm_msm_submitqueue_query *args);
-int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
-void msm_submitqueue_close(struct msm_file_private *ctx);
-
-void msm_submitqueue_destroy(struct kref *kref);
-
-static inline void __msm_file_private_destroy(struct kref *kref)
-{
-	struct msm_file_private *ctx = container_of(kref,
-		struct msm_file_private, ref);
-
-	msm_gem_address_space_put(ctx->aspace);
-	kfree(ctx);
-}
-
-static inline void msm_file_private_put(struct msm_file_private *ctx)
-{
-	kref_put(&ctx->ref, __msm_file_private_destroy);
-}
-
-static inline struct msm_file_private *msm_file_private_get(
-	struct msm_file_private *ctx)
-{
-	kref_get(&ctx->ref);
-	return ctx;
-}
-
 #define DBG(fmt, ...) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__)
 #define VERB(fmt, ...) if (0) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__)
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 2fcb6c195865..592334cb9a0b 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -272,6 +272,26 @@ struct msm_gpu_perfcntr {
  */
 #define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN)
 
+/**
+ * struct msm_file_private - per-drm_file context
+ *
+ * @queuelock:    synchronizes access to submitqueues list
+ * @submitqueues: list of &msm_gpu_submitqueue created by userspace
+ * @queueid:      counter incremented each time a submitqueue is created,
+ *                used to assign &msm_gpu_submitqueue.id
+ * @aspace:       the per-process GPU address-space
+ * @ref:          reference count
+ * @seqno:        unique per process seqno
+ */
+struct msm_file_private {
+	rwlock_t queuelock;
+	struct list_head submitqueues;
+	int queueid;
+	struct msm_gem_address_space *aspace;
+	struct kref ref;
+	int seqno;
+};
+
 /**
  * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority
  *
@@ -319,6 +339,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
 }
 
 /**
+ * struct msm_gpu_submitqueues - Userspace created context.
+ *
  * A submitqueue is associated with a gl context or vk queue (or equiv)
  * in userspace.
  *
@@ -336,7 +358,7 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
  *             seqno, protected by submitqueue lock
  * @lock:      submitqueue lock
  * @ref:       reference count
- * @entity: the submit job-queue
+ * @entity:    the submit job-queue
  */
 struct msm_gpu_submitqueue {
 	int id;
@@ -436,6 +458,40 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
 int msm_gpu_pm_suspend(struct msm_gpu *gpu);
 int msm_gpu_pm_resume(struct msm_gpu *gpu);
 
+int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
+struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
+		u32 id);
+int msm_submitqueue_create(struct drm_device *drm,
+		struct msm_file_private *ctx,
+		u32 prio, u32 flags, u32 *id);
+int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
+		struct drm_msm_submitqueue_query *args);
+int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
+void msm_submitqueue_close(struct msm_file_private *ctx);
+
+void msm_submitqueue_destroy(struct kref *kref);
+
+static inline void __msm_file_private_destroy(struct kref *kref)
+{
+	struct msm_file_private *ctx = container_of(kref,
+		struct msm_file_private, ref);
+
+	msm_gem_address_space_put(ctx->aspace);
+	kfree(ctx);
+}
+
+static inline void msm_file_private_put(struct msm_file_private *ctx)
+{
+	kref_put(&ctx->ref, __msm_file_private_destroy);
+}
+
+static inline struct msm_file_private *msm_file_private_get(
+	struct msm_file_private *ctx)
+{
+	kref_get(&ctx->ref);
+	return ctx;
+}
+
 void msm_devfreq_init(struct msm_gpu *gpu);
 void msm_devfreq_cleanup(struct msm_gpu *gpu);
 void msm_devfreq_resume(struct msm_gpu *gpu);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] drm/msm: One sched entity per process per priority
  2021-10-01 17:58 [PATCH 0/2] drm/msm: Un-break multi-context gl Rob Clark
  2021-10-01 17:58 ` [PATCH 1/2] drm/msm: A bit more docs + cleanup Rob Clark
@ 2021-10-01 17:58 ` Rob Clark
  1 sibling, 0 replies; 3+ messages in thread
From: Rob Clark @ 2021-10-01 17:58 UTC (permalink / raw)
  To: dri-devel
  Cc: freedreno, linux-arm-msm, Jordan Crouse, Akhil P Oommen,
	Rob Clark, Rob Clark, Sean Paul, David Airlie, Daniel Vetter,
	open list

From: Rob Clark <robdclark@chromium.org>

Some userspace apps make assumptions that rendering against multiple
contexts within the same process (from the same thread, with appropriate
MakeCurrent() calls) provides sufficient synchronization without any
external synchronization (ie. glFenceSync()/glWaitSync()).  Since a
submitqueue maps to a gl/vk context, having multiple sched entities of
the same priority only works with implicit sync enabled.

To fix this, limit things to a single sched entity per priority level
per process.

An alternative would be sharing submitqueues between contexts in
userspace, but tracking of per-context faults (ie. GL_EXT_robustness)
is already done at the submitqueue level, so this is not an option.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
Unfortunately, due to a finch experiment (a sort of A/B experiment)
all my testing of the drm/scheduler with chrome(ium) was using
SkiaRenderer which does not trigger this bug.  It wasn't until folks
started reporting misrendering on dev channel, and I tracked it down
to legacy GLRenderer vs SkiaRenderer, that I realized the problem :-(

 drivers/gpu/drm/msm/msm_gem_submit.c  |  2 +-
 drivers/gpu/drm/msm/msm_gpu.h         | 24 ++++++----
 drivers/gpu/drm/msm/msm_submitqueue.c | 68 +++++++++++++++++++++++----
 3 files changed, 74 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 924b01b9c105..34ed56b24224 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -46,7 +46,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 	if (!submit)
 		return ERR_PTR(-ENOMEM);
 
-	ret = drm_sched_job_init(&submit->base, &queue->entity, queue);
+	ret = drm_sched_job_init(&submit->base, queue->entity, queue);
 	if (ret) {
 		kfree(submit);
 		return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 592334cb9a0b..d72b1de3cb1f 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -290,6 +290,19 @@ struct msm_file_private {
 	struct msm_gem_address_space *aspace;
 	struct kref ref;
 	int seqno;
+
+	/**
+	 * entities:
+	 *
+	 * Table of per-priority-level sched entities used by submitqueues
+	 * associated with this &drm_file.  Because some userspace apps
+	 * make assumptions about rendering from multiple gl contexts
+	 * (of the same priority) within the process happening in FIFO
+	 * order without requiring any fencing beyond MakeCurrent(), we
+	 * create at most one &drm_sched_entity per-process per-priority-
+	 * level.
+	 */
+	struct drm_sched_entity *entities[NR_SCHED_PRIORITIES * MSM_GPU_MAX_RINGS];
 };
 
 /**
@@ -370,7 +383,7 @@ struct msm_gpu_submitqueue {
 	struct idr fence_idr;
 	struct mutex lock;
 	struct kref ref;
-	struct drm_sched_entity entity;
+	struct drm_sched_entity *entity;
 };
 
 struct msm_gpu_state_bo {
@@ -471,14 +484,7 @@ void msm_submitqueue_close(struct msm_file_private *ctx);
 
 void msm_submitqueue_destroy(struct kref *kref);
 
-static inline void __msm_file_private_destroy(struct kref *kref)
-{
-	struct msm_file_private *ctx = container_of(kref,
-		struct msm_file_private, ref);
-
-	msm_gem_address_space_put(ctx->aspace);
-	kfree(ctx);
-}
+void __msm_file_private_destroy(struct kref *kref);
 
 static inline void msm_file_private_put(struct msm_file_private *ctx)
 {
diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
index 7ce0771b5582..b8621c6e0554 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -7,6 +7,24 @@
 
 #include "msm_gpu.h"
 
+void __msm_file_private_destroy(struct kref *kref)
+{
+	struct msm_file_private *ctx = container_of(kref,
+		struct msm_file_private, ref);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->entities); i++) {
+		if (!ctx->entities[i])
+			continue;
+
+		drm_sched_entity_destroy(ctx->entities[i]);
+		kfree(ctx->entities[i]);
+	}
+
+	msm_gem_address_space_put(ctx->aspace);
+	kfree(ctx);
+}
+
 void msm_submitqueue_destroy(struct kref *kref)
 {
 	struct msm_gpu_submitqueue *queue = container_of(kref,
@@ -14,8 +32,6 @@ void msm_submitqueue_destroy(struct kref *kref)
 
 	idr_destroy(&queue->fence_idr);
 
-	drm_sched_entity_destroy(&queue->entity);
-
 	msm_file_private_put(queue->ctx);
 
 	kfree(queue);
@@ -61,13 +77,47 @@ void msm_submitqueue_close(struct msm_file_private *ctx)
 	}
 }
 
+static struct drm_sched_entity *
+get_sched_entity(struct msm_file_private *ctx, struct msm_ringbuffer *ring,
+		 unsigned ring_nr, enum drm_sched_priority sched_prio)
+{
+	static DEFINE_MUTEX(entity_lock);
+	unsigned idx = (ring_nr * NR_SCHED_PRIORITIES) + sched_prio;
+
+	/* We should have already validated that the requested priority is
+	 * valid by the time we get here.
+	 */
+	if (WARN_ON(idx >= ARRAY_SIZE(ctx->entities)))
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&entity_lock);
+
+	if (!ctx->entities[idx]) {
+		struct drm_sched_entity *entity;
+		struct drm_gpu_scheduler *sched = &ring->sched;
+		int ret;
+
+		entity = kzalloc(sizeof(*ctx->entities[idx]), GFP_KERNEL);
+
+		ret = drm_sched_entity_init(entity, sched_prio, &sched, 1, NULL);
+		if (ret) {
+			kfree(entity);
+			return ERR_PTR(ret);
+		}
+
+		ctx->entities[idx] = entity;
+	}
+
+	mutex_unlock(&entity_lock);
+
+	return ctx->entities[idx];
+}
+
 int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
 		u32 prio, u32 flags, u32 *id)
 {
 	struct msm_drm_private *priv = drm->dev_private;
 	struct msm_gpu_submitqueue *queue;
-	struct msm_ringbuffer *ring;
-	struct drm_gpu_scheduler *sched;
 	enum drm_sched_priority sched_prio;
 	unsigned ring_nr;
 	int ret;
@@ -91,12 +141,10 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
 	queue->flags = flags;
 	queue->ring_nr = ring_nr;
 
-	ring = priv->gpu->rb[ring_nr];
-	sched = &ring->sched;
-
-	ret = drm_sched_entity_init(&queue->entity,
-			sched_prio, &sched, 1, NULL);
-	if (ret) {
+	queue->entity = get_sched_entity(ctx, priv->gpu->rb[ring_nr],
+					 ring_nr, sched_prio);
+	if (IS_ERR(queue->entity)) {
+		ret = PTR_ERR(queue->entity);
 		kfree(queue);
 		return ret;
 	}
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-10-01 17:54 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-01 17:58 [PATCH 0/2] drm/msm: Un-break multi-context gl Rob Clark
2021-10-01 17:58 ` [PATCH 1/2] drm/msm: A bit more docs + cleanup Rob Clark
2021-10-01 17:58 ` [PATCH 2/2] drm/msm: One sched entity per process per priority Rob Clark

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.