linux-media.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] drm/msm: Reduce fence signal latency
@ 2021-07-26 14:43 Rob Clark
  2021-07-26 14:43 ` [PATCH 1/2] drm/msm: Let fences read directly from memptrs Rob Clark
  0 siblings, 1 reply; 2+ messages in thread
From: Rob Clark @ 2021-07-26 14:43 UTC (permalink / raw)
  To: dri-devel
  Cc: Rob Clark, open list:DRM DRIVER FOR MSM ADRENO GPU,
	moderated list:DMA BUFFER SHARING FRAMEWORK,
	open list:DRM DRIVER FOR MSM ADRENO GPU, open list,
	open list:DMA BUFFER SHARING FRAMEWORK

From: Rob Clark <robdclark@chromium.org>

A couple tweaks to reduce fence signal latency.

Rob Clark (2):
  drm/msm: Let fences read directly from memptrs
  drm/msm: Signal fences sooner

 drivers/gpu/drm/msm/msm_fence.c      | 11 +++++--
 drivers/gpu/drm/msm/msm_fence.h      | 41 +++++++++++++++++++++++---
 drivers/gpu/drm/msm/msm_gpu.c        | 44 ++++++++++++++++------------
 drivers/gpu/drm/msm/msm_ringbuffer.c |  2 +-
 4 files changed, 73 insertions(+), 25 deletions(-)

-- 
2.31.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH 1/2] drm/msm: Let fences read directly from memptrs
  2021-07-26 14:43 [PATCH 0/2] drm/msm: Reduce fence signal latency Rob Clark
@ 2021-07-26 14:43 ` Rob Clark
  0 siblings, 0 replies; 2+ messages in thread
From: Rob Clark @ 2021-07-26 14:43 UTC (permalink / raw)
  To: dri-devel
  Cc: Rob Clark, Rob Clark, Sean Paul, David Airlie, Daniel Vetter,
	Sumit Semwal, Christian König,
	open list:DRM DRIVER FOR MSM ADRENO GPU,
	open list:DRM DRIVER FOR MSM ADRENO GPU, open list,
	open list:DMA BUFFER SHARING FRAMEWORK,
	moderated list:DMA BUFFER SHARING FRAMEWORK

From: Rob Clark <robdclark@chromium.org>

Let dma_fence::signaled, etc, read directly from the address that the hw
is writing with updated completed fence seqno, so we can potentially
notice that the fence is signaled sooner.

Plus add some docs.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_fence.c      | 11 ++++++--
 drivers/gpu/drm/msm/msm_fence.h      | 41 +++++++++++++++++++++++++---
 drivers/gpu/drm/msm/msm_ringbuffer.c |  2 +-
 3 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c
index cd59a5918038..b92a9091a1e2 100644
--- a/drivers/gpu/drm/msm/msm_fence.c
+++ b/drivers/gpu/drm/msm/msm_fence.c
@@ -11,7 +11,8 @@
 
 
 struct msm_fence_context *
-msm_fence_context_alloc(struct drm_device *dev, const char *name)
+msm_fence_context_alloc(struct drm_device *dev, volatile uint32_t *fenceptr,
+		const char *name)
 {
 	struct msm_fence_context *fctx;
 
@@ -22,6 +23,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name)
 	fctx->dev = dev;
 	strncpy(fctx->name, name, sizeof(fctx->name));
 	fctx->context = dma_fence_context_alloc(1);
+	fctx->fenceptr = fenceptr;
 	init_waitqueue_head(&fctx->event);
 	spin_lock_init(&fctx->spinlock);
 
@@ -35,7 +37,12 @@ void msm_fence_context_free(struct msm_fence_context *fctx)
 
 static inline bool fence_completed(struct msm_fence_context *fctx, uint32_t fence)
 {
-	return (int32_t)(fctx->completed_fence - fence) >= 0;
+	/*
+	 * Note: Check completed_fence first, as fenceptr is in a write-combine
+	 * mapping, so it will be more expensive to read.
+	 */
+	return (int32_t)(fctx->completed_fence - fence) >= 0 ||
+		(int32_t)(*fctx->fenceptr - fence) >= 0;
 }
 
 /* legacy path for WAIT_FENCE ioctl: */
diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h
index 2d9af66dcca5..6ab97062ff1a 100644
--- a/drivers/gpu/drm/msm/msm_fence.h
+++ b/drivers/gpu/drm/msm/msm_fence.h
@@ -9,19 +9,52 @@
 
 #include "msm_drv.h"
 
+/**
+ * struct msm_fence_context - fence context for gpu
+ *
+ * Each ringbuffer has a single fence context, with the GPU writing an
+ * incrementing fence seqno at the end of each submit
+ */
 struct msm_fence_context {
 	struct drm_device *dev;
+	/** name: human readable name for fence timeline */
 	char name[32];
+	/** context: see dma_fence_context_alloc() */
 	unsigned context;
-	/* last_fence == completed_fence --> no pending work */
-	uint32_t last_fence;          /* last assigned fence */
-	uint32_t completed_fence;     /* last completed fence */
+
+	/**
+	 * last_fence:
+	 *
+	 * Last assigned fence, incremented each time a fence is created
+	 * on this fence context.  If last_fence == completed_fence,
+	 * there is no remaining pending work
+	 */
+	uint32_t last_fence;
+
+	/**
+	 * completed_fence:
+	 *
+	 * The last completed fence, updated from the CPU after interrupt
+	 * from GPU
+	 */
+	uint32_t completed_fence;
+
+	/**
+	 * fenceptr:
+	 *
+	 * The address that the GPU directly writes with completed fence
+	 * seqno.  This can be ahead of completed_fence.  We can peek at
+	 * this to see if a fence has already signaled but the CPU hasn't
+	 * gotten around to handling the irq and updating completed_fence
+	 */
+	volatile uint32_t *fenceptr;
+
 	wait_queue_head_t event;
 	spinlock_t spinlock;
 };
 
 struct msm_fence_context * msm_fence_context_alloc(struct drm_device *dev,
-		const char *name);
+		volatile uint32_t *fenceptr, const char *name);
 void msm_fence_context_free(struct msm_fence_context *fctx);
 
 int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence,
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 4d2a2a4abef8..7e92d9532454 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -51,7 +51,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
 
 	snprintf(name, sizeof(name), "gpu-ring-%d", ring->id);
 
-	ring->fctx = msm_fence_context_alloc(gpu->dev, name);
+	ring->fctx = msm_fence_context_alloc(gpu->dev, &ring->memptrs->fence, name);
 
 	return ring;
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-07-26 14:39 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-26 14:43 [PATCH 0/2] drm/msm: Reduce fence signal latency Rob Clark
2021-07-26 14:43 ` [PATCH 1/2] drm/msm: Let fences read directly from memptrs Rob Clark

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).