All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/v3d: Document cache flushing ABI.
@ 2018-12-01  0:57 Eric Anholt
  2018-12-01  0:57   ` Eric Anholt
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: Eric Anholt @ 2018-12-01  0:57 UTC (permalink / raw)
  To: dri-devel; +Cc: linux-kernel, Dave Emett, Thomas Spurden, Eric Anholt

Right now, userspace doesn't do any L2T writes, but we should lay out
our expectations for how it works.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 include/uapi/drm/v3d_drm.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 35c7d813c66e..95b8f8e82ea5 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -52,6 +52,13 @@ extern "C" {
  *
  * This asks the kernel to have the GPU execute an optional binner
  * command list, and a render command list.
+ *
+ * The caches (L1T, slice, and L2T) will be flushed before the job
+ * executes.  The TLB writes are guaranteed to have been flushed by
+ * the time the render done IRQ happens, which is the trigger for
+ * out_sync.  Any dirtying of cachelines by the job (only possible
+ * using TMU writes) must be flushed by the caller using the CL's
+ * cache flush commands.
  */
 struct drm_v3d_submit_cl {
 	/* Pointer to the binner command list.
-- 
2.20.0.rc1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/6] drm/v3d: Drop unused v3d_flush_caches().
  2018-12-01  0:57 [PATCH 1/6] drm/v3d: Document cache flushing ABI Eric Anholt
@ 2018-12-01  0:57   ` Eric Anholt
  2018-12-01  0:57   ` Eric Anholt
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Eric Anholt @ 2018-12-01  0:57 UTC (permalink / raw)
  To: dri-devel; +Cc: linux-kernel, Dave Emett, Thomas Spurden, Eric Anholt

Now that I've specified how the end-of-pipeline flushing should work,
we're never going to use this function.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/v3d/v3d_drv.h |  1 -
 drivers/gpu/drm/v3d/v3d_gem.c | 21 ---------------------
 2 files changed, 22 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index bcd3d567bec2..239b56d76f3e 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -314,7 +314,6 @@ void v3d_exec_put(struct v3d_exec_info *exec);
 void v3d_tfu_job_put(struct v3d_tfu_job *exec);
 void v3d_reset(struct v3d_dev *v3d);
 void v3d_invalidate_caches(struct v3d_dev *v3d);
-void v3d_flush_caches(struct v3d_dev *v3d);
 
 /* v3d_irq.c */
 void v3d_irq_init(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 8b4af512450f..34103205b7cb 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -175,20 +175,6 @@ v3d_invalidate_slices(struct v3d_dev *v3d, int core)
 		       V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
 }
 
-/* Invalidates texture L2 cachelines */
-static void
-v3d_invalidate_l2t(struct v3d_dev *v3d, int core)
-{
-	V3D_CORE_WRITE(core,
-		       V3D_CTL_L2TCACTL,
-		       V3D_L2TCACTL_L2TFLS |
-		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAR, V3D_L2TCACTL_FLM));
-	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
-		       V3D_L2TCACTL_L2TFLS), 100)) {
-		DRM_ERROR("Timeout waiting for L2T invalidate\n");
-	}
-}
-
 void
 v3d_invalidate_caches(struct v3d_dev *v3d)
 {
@@ -199,13 +185,6 @@ v3d_invalidate_caches(struct v3d_dev *v3d)
 	v3d_flush_l2t(v3d, 0);
 }
 
-void
-v3d_flush_caches(struct v3d_dev *v3d)
-{
-	v3d_invalidate_l1td(v3d, 0);
-	v3d_invalidate_l2t(v3d, 0);
-}
-
 static void
 v3d_attach_object_fences(struct v3d_bo **bos, int bo_count,
 			 struct dma_fence *fence)
-- 
2.20.0.rc1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/6] drm/v3d: Drop unused v3d_flush_caches().
@ 2018-12-01  0:57   ` Eric Anholt
  0 siblings, 0 replies; 16+ messages in thread
From: Eric Anholt @ 2018-12-01  0:57 UTC (permalink / raw)
  To: dri-devel; +Cc: Dave Emett, Thomas Spurden, linux-kernel

Now that I've specified how the end-of-pipeline flushing should work,
we're never going to use this function.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/v3d/v3d_drv.h |  1 -
 drivers/gpu/drm/v3d/v3d_gem.c | 21 ---------------------
 2 files changed, 22 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index bcd3d567bec2..239b56d76f3e 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -314,7 +314,6 @@ void v3d_exec_put(struct v3d_exec_info *exec);
 void v3d_tfu_job_put(struct v3d_tfu_job *exec);
 void v3d_reset(struct v3d_dev *v3d);
 void v3d_invalidate_caches(struct v3d_dev *v3d);
-void v3d_flush_caches(struct v3d_dev *v3d);
 
 /* v3d_irq.c */
 void v3d_irq_init(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 8b4af512450f..34103205b7cb 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -175,20 +175,6 @@ v3d_invalidate_slices(struct v3d_dev *v3d, int core)
 		       V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
 }
 
-/* Invalidates texture L2 cachelines */
-static void
-v3d_invalidate_l2t(struct v3d_dev *v3d, int core)
-{
-	V3D_CORE_WRITE(core,
-		       V3D_CTL_L2TCACTL,
-		       V3D_L2TCACTL_L2TFLS |
-		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAR, V3D_L2TCACTL_FLM));
-	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
-		       V3D_L2TCACTL_L2TFLS), 100)) {
-		DRM_ERROR("Timeout waiting for L2T invalidate\n");
-	}
-}
-
 void
 v3d_invalidate_caches(struct v3d_dev *v3d)
 {
@@ -199,13 +185,6 @@ v3d_invalidate_caches(struct v3d_dev *v3d)
 	v3d_flush_l2t(v3d, 0);
 }
 
-void
-v3d_flush_caches(struct v3d_dev *v3d)
-{
-	v3d_invalidate_l1td(v3d, 0);
-	v3d_invalidate_l2t(v3d, 0);
-}
-
 static void
 v3d_attach_object_fences(struct v3d_bo **bos, int bo_count,
 			 struct dma_fence *fence)
-- 
2.20.0.rc1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/6] drm/v3d: Don't bother flushing L1TD at job start.
  2018-12-01  0:57 [PATCH 1/6] drm/v3d: Document cache flushing ABI Eric Anholt
@ 2018-12-01  0:57   ` Eric Anholt
  2018-12-01  0:57   ` Eric Anholt
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Eric Anholt @ 2018-12-01  0:57 UTC (permalink / raw)
  To: dri-devel; +Cc: linux-kernel, Dave Emett, Thomas Spurden, Eric Anholt

This is the write combiner for TMU writes.  You're supposed to flush
that at job end if you had dirtied any cachelines.  Flushing it at job
start then doesn't make any sense.

Signed-off-by: Eric Anholt <eric@anholt.net>
Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+")
---
 drivers/gpu/drm/v3d/v3d_gem.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 34103205b7cb..cc4d025b01e0 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -139,22 +139,10 @@ v3d_invalidate_l2(struct v3d_dev *v3d, int core)
 		       V3D_L2CACTL_L2CENA);
 }
 
-static void
-v3d_invalidate_l1td(struct v3d_dev *v3d, int core)
-{
-	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
-	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
-		       V3D_L2TCACTL_L2TFLS), 100)) {
-		DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
-	}
-}
-
 /* Invalidates texture L2 cachelines */
 static void
 v3d_flush_l2t(struct v3d_dev *v3d, int core)
 {
-	v3d_invalidate_l1td(v3d, core);
-
 	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
 		       V3D_L2TCACTL_L2TFLS |
 		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
-- 
2.20.0.rc1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/6] drm/v3d: Don't bother flushing L1TD at job start.
@ 2018-12-01  0:57   ` Eric Anholt
  0 siblings, 0 replies; 16+ messages in thread
From: Eric Anholt @ 2018-12-01  0:57 UTC (permalink / raw)
  To: dri-devel; +Cc: Dave Emett, Thomas Spurden, linux-kernel

This is the write combiner for TMU writes.  You're supposed to flush
that at job end if you had dirtied any cachelines.  Flushing it at job
start then doesn't make any sense.

Signed-off-by: Eric Anholt <eric@anholt.net>
Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+")
---
 drivers/gpu/drm/v3d/v3d_gem.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 34103205b7cb..cc4d025b01e0 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -139,22 +139,10 @@ v3d_invalidate_l2(struct v3d_dev *v3d, int core)
 		       V3D_L2CACTL_L2CENA);
 }
 
-static void
-v3d_invalidate_l1td(struct v3d_dev *v3d, int core)
-{
-	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
-	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
-		       V3D_L2TCACTL_L2TFLS), 100)) {
-		DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
-	}
-}
-
 /* Invalidates texture L2 cachelines */
 static void
 v3d_flush_l2t(struct v3d_dev *v3d, int core)
 {
-	v3d_invalidate_l1td(v3d, core);
-
 	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
 		       V3D_L2TCACTL_L2TFLS |
 		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
-- 
2.20.0.rc1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/6] drm/v3d: Drop the wait for L2T flush to complete.
  2018-12-01  0:57 [PATCH 1/6] drm/v3d: Document cache flushing ABI Eric Anholt
  2018-12-01  0:57   ` Eric Anholt
  2018-12-01  0:57   ` Eric Anholt
@ 2018-12-01  0:57 ` Eric Anholt
  2018-12-03 17:59   ` Dave Emett
  2018-12-01  0:57 ` [PATCH 5/6] drm/v3d: Add more tracepoints for V3D GPU rendering Eric Anholt
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Eric Anholt @ 2018-12-01  0:57 UTC (permalink / raw)
  To: dri-devel; +Cc: linux-kernel, Dave Emett, Thomas Spurden, Eric Anholt

According to Dave, once you've started an L2T flush, all L2T accesses
will be blocked until the flush completes.  This fixes a consistent
3-4ms stall between the ioctl and running the job, and 3DMMES Taiji
goes from 27fps to 110fps.

Signed-off-by: Eric Anholt <eric@anholt.net>
Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+")
---
 drivers/gpu/drm/v3d/v3d_gem.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index cc4d025b01e0..0bd6892e3044 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -146,10 +146,6 @@ v3d_flush_l2t(struct v3d_dev *v3d, int core)
 	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
 		       V3D_L2TCACTL_L2TFLS |
 		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
-	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
-		       V3D_L2TCACTL_L2TFLS), 100)) {
-		DRM_ERROR("Timeout waiting for L2T flush\n");
-	}
 }
 
 /* Invalidates the slice caches.  These are read-only caches. */
-- 
2.20.0.rc1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 5/6] drm/v3d: Add more tracepoints for V3D GPU rendering.
  2018-12-01  0:57 [PATCH 1/6] drm/v3d: Document cache flushing ABI Eric Anholt
                   ` (2 preceding siblings ...)
  2018-12-01  0:57 ` [PATCH 4/6] drm/v3d: Drop the wait for L2T flush to complete Eric Anholt
@ 2018-12-01  0:57 ` Eric Anholt
  2018-12-03 17:41   ` Dave Emett
  2018-12-01  0:57 ` [PATCH 6/6] drm/v3d: Add missing fence timeline name for TFU Eric Anholt
  2018-12-03 17:22 ` [PATCH 1/6] drm/v3d: Document cache flushing ABI Dave Emett
  5 siblings, 1 reply; 16+ messages in thread
From: Eric Anholt @ 2018-12-01  0:57 UTC (permalink / raw)
  To: dri-devel; +Cc: linux-kernel, Dave Emett, Thomas Spurden, Eric Anholt

The core scheduler tells us when the job is pushed to the scheduler's
queue, and I had the job_run functions saying when they actually queue
the job to the hardware.  By adding tracepoints for the very top of
the ioctls and the IRQs signaling job completion, "perf record -a -e
v3d:.\* -e gpu_scheduler:.\* <job>; perf script" gets you a pretty
decent timeline.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/v3d/v3d_gem.c   |   4 ++
 drivers/gpu/drm/v3d/v3d_irq.c   |  19 +++++-
 drivers/gpu/drm/v3d/v3d_trace.h | 101 ++++++++++++++++++++++++++++++++
 3 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 0bd6892e3044..2f82e2724b1f 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -484,6 +484,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct drm_syncobj *sync_out;
 	int ret = 0;
 
+	trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
+
 	if (args->pad != 0) {
 		DRM_INFO("pad must be zero: %d\n", args->pad);
 		return -EINVAL;
@@ -611,6 +613,8 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 	int ret = 0;
 	int bo_count;
 
+	trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
+
 	job = kcalloc(1, sizeof(*job), GFP_KERNEL);
 	if (!job)
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index dd7a7b0bd5a1..69338da70ddc 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -15,6 +15,7 @@
 
 #include "v3d_drv.h"
 #include "v3d_regs.h"
+#include "v3d_trace.h"
 
 #define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM |	\
 			     V3D_INT_FLDONE |	\
@@ -88,12 +89,20 @@ v3d_irq(int irq, void *arg)
 	}
 
 	if (intsts & V3D_INT_FLDONE) {
-		dma_fence_signal(v3d->bin_job->bin.done_fence);
+		struct v3d_fence *fence =
+			to_v3d_fence(v3d->bin_job->bin.done_fence);
+
+		trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
+		dma_fence_signal(&fence->base);
 		status = IRQ_HANDLED;
 	}
 
 	if (intsts & V3D_INT_FRDONE) {
-		dma_fence_signal(v3d->render_job->render.done_fence);
+		struct v3d_fence *fence =
+			to_v3d_fence(v3d->render_job->render.done_fence);
+
+		trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
+		dma_fence_signal(&fence->base);
 		status = IRQ_HANDLED;
 	}
 
@@ -119,7 +128,11 @@ v3d_hub_irq(int irq, void *arg)
 	V3D_WRITE(V3D_HUB_INT_CLR, intsts);
 
 	if (intsts & V3D_HUB_INT_TFUC) {
-		dma_fence_signal(v3d->tfu_job->done_fence);
+		struct v3d_fence *fence =
+			to_v3d_fence(v3d->tfu_job->done_fence);
+
+		trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
+		dma_fence_signal(&fence->base);
 		status = IRQ_HANDLED;
 	}
 
diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h
index f54ed9cd3444..edd984afa33f 100644
--- a/drivers/gpu/drm/v3d/v3d_trace.h
+++ b/drivers/gpu/drm/v3d/v3d_trace.h
@@ -12,6 +12,28 @@
 #define TRACE_SYSTEM v3d
 #define TRACE_INCLUDE_FILE v3d_trace
 
+TRACE_EVENT(v3d_submit_cl_ioctl,
+	    TP_PROTO(struct drm_device *dev, u32 ct1qba, u32 ct1qea),
+	    TP_ARGS(dev, ct1qba, ct1qea),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, ct1qba)
+			     __field(u32, ct1qea)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->ct1qba = ct1qba;
+			   __entry->ct1qea = ct1qea;
+			   ),
+
+	    TP_printk("dev=%u, RCL 0x%08x..0x%08x",
+		      __entry->dev,
+		      __entry->ct1qba,
+		      __entry->ct1qea)
+);
+
 TRACE_EVENT(v3d_submit_cl,
 	    TP_PROTO(struct drm_device *dev, bool is_render,
 		     uint64_t seqno,
@@ -42,6 +64,85 @@ TRACE_EVENT(v3d_submit_cl,
 		      __entry->ctnqea)
 );
 
+TRACE_EVENT(v3d_bcl_irq,
+	    TP_PROTO(struct drm_device *dev,
+		     uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev,
+		      __entry->seqno)
+);
+
+TRACE_EVENT(v3d_rcl_irq,
+	    TP_PROTO(struct drm_device *dev,
+		     uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev,
+		      __entry->seqno)
+);
+
+TRACE_EVENT(v3d_tfu_irq,
+	    TP_PROTO(struct drm_device *dev,
+		     uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev,
+		      __entry->seqno)
+);
+
+TRACE_EVENT(v3d_submit_tfu_ioctl,
+	    TP_PROTO(struct drm_device *dev, u32 iia),
+	    TP_ARGS(dev, iia),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, iia)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->iia = iia;
+			   ),
+
+	    TP_printk("dev=%u, IIA 0x%08x",
+		      __entry->dev,
+		      __entry->iia)
+);
+
 TRACE_EVENT(v3d_submit_tfu,
 	    TP_PROTO(struct drm_device *dev,
 		     uint64_t seqno),
-- 
2.20.0.rc1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 6/6] drm/v3d: Add missing fence timeline name for TFU.
  2018-12-01  0:57 [PATCH 1/6] drm/v3d: Document cache flushing ABI Eric Anholt
                   ` (3 preceding siblings ...)
  2018-12-01  0:57 ` [PATCH 5/6] drm/v3d: Add more tracepoints for V3D GPU rendering Eric Anholt
@ 2018-12-01  0:57 ` Eric Anholt
  2018-12-03 18:03   ` Dave Emett
  2018-12-03 17:22 ` [PATCH 1/6] drm/v3d: Document cache flushing ABI Dave Emett
  5 siblings, 1 reply; 16+ messages in thread
From: Eric Anholt @ 2018-12-01  0:57 UTC (permalink / raw)
  To: dri-devel; +Cc: linux-kernel, Dave Emett, Thomas Spurden, Eric Anholt

We shouldn't be returning v3d-render in for our new queue.

Signed-off-by: Eric Anholt <eric@anholt.net>
Fixes: 83d5139982db ("drm/v3d: Add support for submitting jobs to the TFU.")
---
 drivers/gpu/drm/v3d/v3d_fence.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
index 50bfcf9a8a1a..b0a2a1ae2eb1 100644
--- a/drivers/gpu/drm/v3d/v3d_fence.c
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -29,10 +29,16 @@ static const char *v3d_fence_get_timeline_name(struct dma_fence *fence)
 {
 	struct v3d_fence *f = to_v3d_fence(fence);
 
-	if (f->queue == V3D_BIN)
+	switch (f->queue) {
+	case V3D_BIN:
 		return "v3d-bin";
-	else
+	case V3D_RENDER:
 		return "v3d-render";
+	case V3D_TFU:
+		return "v3d-tfu";
+	default:
+		return NULL;
+	}
 }
 
 const struct dma_fence_ops v3d_fence_ops = {
-- 
2.20.0.rc1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/6] drm/v3d: Document cache flushing ABI.
  2018-12-01  0:57 [PATCH 1/6] drm/v3d: Document cache flushing ABI Eric Anholt
                   ` (4 preceding siblings ...)
  2018-12-01  0:57 ` [PATCH 6/6] drm/v3d: Add missing fence timeline name for TFU Eric Anholt
@ 2018-12-03 17:22 ` Dave Emett
  2018-12-03 17:28   ` Dave Emett
  5 siblings, 1 reply; 16+ messages in thread
From: Dave Emett @ 2018-12-03 17:22 UTC (permalink / raw)
  To: Eric Anholt; +Cc: dri-devel, linux-kernel, Thomas Spurden

> + * The caches (L1T, slice, and L2T) will be flushed before the job
> + * executes.  The TLB writes are guaranteed to have been flushed by

I would say before *each* job executes, as the caches are flushed
before both bin and render.
I wouldn't say "the caches" as not all of the V3D caches are flushed
before executing a control list. In particular, the VCD cache is not
cleared by the kernel driver (not even sure if there is a register
interface to do this); it is expected that the control list will do
this itself (using the CLEAR_VCD_CACHE instruction).
On 3.3 and earlier there is a separate L2C for instructions/uniforms
and a GCA. These do need to be flushed, and it looks like they are, so
they should be mentioned here.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/6] drm/v3d: Document cache flushing ABI.
  2018-12-03 17:22 ` [PATCH 1/6] drm/v3d: Document cache flushing ABI Dave Emett
@ 2018-12-03 17:28   ` Dave Emett
  2018-12-03 20:39     ` Eric Anholt
  0 siblings, 1 reply; 16+ messages in thread
From: Dave Emett @ 2018-12-03 17:28 UTC (permalink / raw)
  To: Eric Anholt; +Cc: dri-devel, linux-kernel, Thomas Spurden

On Mon, 3 Dec 2018 at 17:22, Dave Emett <david.emett@broadcom.com> wrote:
>
> > + * The caches (L1T, slice, and L2T) will be flushed before the job
> > + * executes.  The TLB writes are guaranteed to have been flushed by
>
> I would say before *each* job executes, as the caches are flushed
> before both bin and render.
> I wouldn't say "the caches" as not all of the V3D caches are flushed
> before executing a control list. In particular, the VCD cache is not
> cleared by the kernel driver (not even sure if there is a register
> interface to do this); it is expected that the control list will do
> this itself (using the CLEAR_VCD_CACHE instruction).
> On 3.3 and earlier there is a separate L2C for instructions/uniforms
> and a GCA. These do need to be flushed, and it looks like they are, so
> they should be mentioned here.

Correction: on *3.2* and earlier there is a separate L2C for
instructions/uniforms. On 3.3 and earlier there is a GCA.

It looks like we're currently unconditionally writing the L2C clear
registers. We should really only be doing this on 3.2 and earlier.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/6] drm/v3d: Don't bother flushing L1TD at job start.
  2018-12-01  0:57   ` Eric Anholt
  (?)
@ 2018-12-03 17:36   ` Dave Emett
  -1 siblings, 0 replies; 16+ messages in thread
From: Dave Emett @ 2018-12-03 17:36 UTC (permalink / raw)
  To: Eric Anholt; +Cc: dri-devel, linux-kernel, Thomas Spurden

On Sat, 1 Dec 2018 at 00:58, Eric Anholt <eric@anholt.net> wrote:
>
> This is the write combiner for TMU writes.  You're supposed to flush
> that at job end if you had dirtied any cachelines.  Flushing it at job
> start then doesn't make any sense.
>
> Signed-off-by: Eric Anholt <eric@anholt.net>

Reviewed-by: Dave Emett <david.emett@broadcom.com>

> Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+")
> ---
>  drivers/gpu/drm/v3d/v3d_gem.c | 12 ------------
>  1 file changed, 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 34103205b7cb..cc4d025b01e0 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -139,22 +139,10 @@ v3d_invalidate_l2(struct v3d_dev *v3d, int core)
>                        V3D_L2CACTL_L2CENA);
>  }
>
> -static void
> -v3d_invalidate_l1td(struct v3d_dev *v3d, int core)
> -{
> -       V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
> -       if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
> -                      V3D_L2TCACTL_L2TFLS), 100)) {
> -               DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
> -       }
> -}
> -
>  /* Invalidates texture L2 cachelines */
>  static void
>  v3d_flush_l2t(struct v3d_dev *v3d, int core)
>  {
> -       v3d_invalidate_l1td(v3d, core);
> -
>         V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
>                        V3D_L2TCACTL_L2TFLS |
>                        V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
> --
> 2.20.0.rc1
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 5/6] drm/v3d: Add more tracepoints for V3D GPU rendering.
  2018-12-01  0:57 ` [PATCH 5/6] drm/v3d: Add more tracepoints for V3D GPU rendering Eric Anholt
@ 2018-12-03 17:41   ` Dave Emett
  0 siblings, 0 replies; 16+ messages in thread
From: Dave Emett @ 2018-12-03 17:41 UTC (permalink / raw)
  To: Eric Anholt; +Cc: dri-devel, linux-kernel, Thomas Spurden

On Sat, 1 Dec 2018 at 00:58, Eric Anholt <eric@anholt.net> wrote:
>
> The core scheduler tells us when the job is pushed to the scheduler's
> queue, and I had the job_run functions saying when they actually queue
> the job to the hardware.  By adding tracepoints for the very top of
> the ioctls and the IRQs signaling job completion, "perf record -a -e
> v3d:.\* -e gpu_scheduler:.\* <job>; perf script" gets you a pretty
> decent timeline.
>
> Signed-off-by: Eric Anholt <eric@anholt.net>

Reviewed-by: Dave Emett <david.emett@broadcom.com>

> ---
>  drivers/gpu/drm/v3d/v3d_gem.c   |   4 ++
>  drivers/gpu/drm/v3d/v3d_irq.c   |  19 +++++-
>  drivers/gpu/drm/v3d/v3d_trace.h | 101 ++++++++++++++++++++++++++++++++
>  3 files changed, 121 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 0bd6892e3044..2f82e2724b1f 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -484,6 +484,8 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
>         struct drm_syncobj *sync_out;
>         int ret = 0;
>
> +       trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
> +
>         if (args->pad != 0) {
>                 DRM_INFO("pad must be zero: %d\n", args->pad);
>                 return -EINVAL;
> @@ -611,6 +613,8 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
>         int ret = 0;
>         int bo_count;
>
> +       trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
> +
>         job = kcalloc(1, sizeof(*job), GFP_KERNEL);
>         if (!job)
>                 return -ENOMEM;
> diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
> index dd7a7b0bd5a1..69338da70ddc 100644
> --- a/drivers/gpu/drm/v3d/v3d_irq.c
> +++ b/drivers/gpu/drm/v3d/v3d_irq.c
> @@ -15,6 +15,7 @@
>
>  #include "v3d_drv.h"
>  #include "v3d_regs.h"
> +#include "v3d_trace.h"
>
>  #define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \
>                              V3D_INT_FLDONE |   \
> @@ -88,12 +89,20 @@ v3d_irq(int irq, void *arg)
>         }
>
>         if (intsts & V3D_INT_FLDONE) {
> -               dma_fence_signal(v3d->bin_job->bin.done_fence);
> +               struct v3d_fence *fence =
> +                       to_v3d_fence(v3d->bin_job->bin.done_fence);
> +
> +               trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
> +               dma_fence_signal(&fence->base);
>                 status = IRQ_HANDLED;
>         }
>
>         if (intsts & V3D_INT_FRDONE) {
> -               dma_fence_signal(v3d->render_job->render.done_fence);
> +               struct v3d_fence *fence =
> +                       to_v3d_fence(v3d->render_job->render.done_fence);
> +
> +               trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
> +               dma_fence_signal(&fence->base);
>                 status = IRQ_HANDLED;
>         }
>
> @@ -119,7 +128,11 @@ v3d_hub_irq(int irq, void *arg)
>         V3D_WRITE(V3D_HUB_INT_CLR, intsts);
>
>         if (intsts & V3D_HUB_INT_TFUC) {
> -               dma_fence_signal(v3d->tfu_job->done_fence);
> +               struct v3d_fence *fence =
> +                       to_v3d_fence(v3d->tfu_job->done_fence);
> +
> +               trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
> +               dma_fence_signal(&fence->base);
>                 status = IRQ_HANDLED;
>         }
>
> diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h
> index f54ed9cd3444..edd984afa33f 100644
> --- a/drivers/gpu/drm/v3d/v3d_trace.h
> +++ b/drivers/gpu/drm/v3d/v3d_trace.h
> @@ -12,6 +12,28 @@
>  #define TRACE_SYSTEM v3d
>  #define TRACE_INCLUDE_FILE v3d_trace
>
> +TRACE_EVENT(v3d_submit_cl_ioctl,
> +           TP_PROTO(struct drm_device *dev, u32 ct1qba, u32 ct1qea),
> +           TP_ARGS(dev, ct1qba, ct1qea),
> +
> +           TP_STRUCT__entry(
> +                            __field(u32, dev)
> +                            __field(u32, ct1qba)
> +                            __field(u32, ct1qea)
> +                            ),
> +
> +           TP_fast_assign(
> +                          __entry->dev = dev->primary->index;
> +                          __entry->ct1qba = ct1qba;
> +                          __entry->ct1qea = ct1qea;
> +                          ),
> +
> +           TP_printk("dev=%u, RCL 0x%08x..0x%08x",
> +                     __entry->dev,
> +                     __entry->ct1qba,
> +                     __entry->ct1qea)
> +);
> +
>  TRACE_EVENT(v3d_submit_cl,
>             TP_PROTO(struct drm_device *dev, bool is_render,
>                      uint64_t seqno,
> @@ -42,6 +64,85 @@ TRACE_EVENT(v3d_submit_cl,
>                       __entry->ctnqea)
>  );
>
> +TRACE_EVENT(v3d_bcl_irq,
> +           TP_PROTO(struct drm_device *dev,
> +                    uint64_t seqno),
> +           TP_ARGS(dev, seqno),
> +
> +           TP_STRUCT__entry(
> +                            __field(u32, dev)
> +                            __field(u64, seqno)
> +                            ),
> +
> +           TP_fast_assign(
> +                          __entry->dev = dev->primary->index;
> +                          __entry->seqno = seqno;
> +                          ),
> +
> +           TP_printk("dev=%u, seqno=%llu",
> +                     __entry->dev,
> +                     __entry->seqno)
> +);
> +
> +TRACE_EVENT(v3d_rcl_irq,
> +           TP_PROTO(struct drm_device *dev,
> +                    uint64_t seqno),
> +           TP_ARGS(dev, seqno),
> +
> +           TP_STRUCT__entry(
> +                            __field(u32, dev)
> +                            __field(u64, seqno)
> +                            ),
> +
> +           TP_fast_assign(
> +                          __entry->dev = dev->primary->index;
> +                          __entry->seqno = seqno;
> +                          ),
> +
> +           TP_printk("dev=%u, seqno=%llu",
> +                     __entry->dev,
> +                     __entry->seqno)
> +);
> +
> +TRACE_EVENT(v3d_tfu_irq,
> +           TP_PROTO(struct drm_device *dev,
> +                    uint64_t seqno),
> +           TP_ARGS(dev, seqno),
> +
> +           TP_STRUCT__entry(
> +                            __field(u32, dev)
> +                            __field(u64, seqno)
> +                            ),
> +
> +           TP_fast_assign(
> +                          __entry->dev = dev->primary->index;
> +                          __entry->seqno = seqno;
> +                          ),
> +
> +           TP_printk("dev=%u, seqno=%llu",
> +                     __entry->dev,
> +                     __entry->seqno)
> +);
> +
> +TRACE_EVENT(v3d_submit_tfu_ioctl,
> +           TP_PROTO(struct drm_device *dev, u32 iia),
> +           TP_ARGS(dev, iia),
> +
> +           TP_STRUCT__entry(
> +                            __field(u32, dev)
> +                            __field(u32, iia)
> +                            ),
> +
> +           TP_fast_assign(
> +                          __entry->dev = dev->primary->index;
> +                          __entry->iia = iia;
> +                          ),
> +
> +           TP_printk("dev=%u, IIA 0x%08x",
> +                     __entry->dev,
> +                     __entry->iia)
> +);
> +
>  TRACE_EVENT(v3d_submit_tfu,
>             TP_PROTO(struct drm_device *dev,
>                      uint64_t seqno),
> --
> 2.20.0.rc1
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/6] drm/v3d: Drop unused v3d_flush_caches().
  2018-12-01  0:57   ` Eric Anholt
  (?)
@ 2018-12-03 17:44   ` Dave Emett
  -1 siblings, 0 replies; 16+ messages in thread
From: Dave Emett @ 2018-12-03 17:44 UTC (permalink / raw)
  To: Eric Anholt; +Cc: dri-devel, linux-kernel, Thomas Spurden

It will be needed for the CSD, which does not have a way to clean the
L1T/L2T before reporting completion.
In any case it's unused now, so I don't see a problem with removing it.

On Sat, 1 Dec 2018 at 00:58, Eric Anholt <eric@anholt.net> wrote:
>
> Now that I've specified how the end-of-pipeline flushing should work,
> we're never going to use this function.
>
> Signed-off-by: Eric Anholt <eric@anholt.net>

Reviewed-by: Dave Emett <david.emett@broadcom.com>

> ---
>  drivers/gpu/drm/v3d/v3d_drv.h |  1 -
>  drivers/gpu/drm/v3d/v3d_gem.c | 21 ---------------------
>  2 files changed, 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
> index bcd3d567bec2..239b56d76f3e 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -314,7 +314,6 @@ void v3d_exec_put(struct v3d_exec_info *exec);
>  void v3d_tfu_job_put(struct v3d_tfu_job *exec);
>  void v3d_reset(struct v3d_dev *v3d);
>  void v3d_invalidate_caches(struct v3d_dev *v3d);
> -void v3d_flush_caches(struct v3d_dev *v3d);
>
>  /* v3d_irq.c */
>  void v3d_irq_init(struct v3d_dev *v3d);
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 8b4af512450f..34103205b7cb 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -175,20 +175,6 @@ v3d_invalidate_slices(struct v3d_dev *v3d, int core)
>                        V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
>  }
>
> -/* Invalidates texture L2 cachelines */
> -static void
> -v3d_invalidate_l2t(struct v3d_dev *v3d, int core)
> -{
> -       V3D_CORE_WRITE(core,
> -                      V3D_CTL_L2TCACTL,
> -                      V3D_L2TCACTL_L2TFLS |
> -                      V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAR, V3D_L2TCACTL_FLM));
> -       if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
> -                      V3D_L2TCACTL_L2TFLS), 100)) {
> -               DRM_ERROR("Timeout waiting for L2T invalidate\n");
> -       }
> -}
> -
>  void
>  v3d_invalidate_caches(struct v3d_dev *v3d)
>  {
> @@ -199,13 +185,6 @@ v3d_invalidate_caches(struct v3d_dev *v3d)
>         v3d_flush_l2t(v3d, 0);
>  }
>
> -void
> -v3d_flush_caches(struct v3d_dev *v3d)
> -{
> -       v3d_invalidate_l1td(v3d, 0);
> -       v3d_invalidate_l2t(v3d, 0);
> -}
> -
>  static void
>  v3d_attach_object_fences(struct v3d_bo **bos, int bo_count,
>                          struct dma_fence *fence)
> --
> 2.20.0.rc1
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/6] drm/v3d: Drop the wait for L2T flush to complete.
  2018-12-01  0:57 ` [PATCH 4/6] drm/v3d: Drop the wait for L2T flush to complete Eric Anholt
@ 2018-12-03 17:59   ` Dave Emett
  0 siblings, 0 replies; 16+ messages in thread
From: Dave Emett @ 2018-12-03 17:59 UTC (permalink / raw)
  To: Eric Anholt; +Cc: dri-devel, linux-kernel, Thomas Spurden

I'm surprised this made such a big difference. It should not take a
long time to flush the L2T -- no memory access should be involved, it
should just be a matter of the L2T iterating over its lines clearing
the tags. This should take thousands of V3D cycles, not milliseconds.
So the 3-4ms stall seems worthy of more investigation to me.

A comment describing why no waits are necessary would be good.

On Sat, 1 Dec 2018 at 00:58, Eric Anholt <eric@anholt.net> wrote:
>
> According to Dave, once you've started an L2T flush, all L2T accesses
> will be blocked until the flush completes.  This fixes a consistent
> 3-4ms stall between the ioctl and running the job, and 3DMMES Taiji
> goes from 27fps to 110fps.
>
> Signed-off-by: Eric Anholt <eric@anholt.net>

Reviewed-by: Dave Emett <david.emett@broadcom.com>

> Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+")
> ---
>  drivers/gpu/drm/v3d/v3d_gem.c | 4 ----
>  1 file changed, 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index cc4d025b01e0..0bd6892e3044 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -146,10 +146,6 @@ v3d_flush_l2t(struct v3d_dev *v3d, int core)
>         V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
>                        V3D_L2TCACTL_L2TFLS |
>                        V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
> -       if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
> -                      V3D_L2TCACTL_L2TFLS), 100)) {
> -               DRM_ERROR("Timeout waiting for L2T flush\n");
> -       }
>  }
>
>  /* Invalidates the slice caches.  These are read-only caches. */
> --
> 2.20.0.rc1
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 6/6] drm/v3d: Add missing fence timeline name for TFU.
  2018-12-01  0:57 ` [PATCH 6/6] drm/v3d: Add missing fence timeline name for TFU Eric Anholt
@ 2018-12-03 18:03   ` Dave Emett
  0 siblings, 0 replies; 16+ messages in thread
From: Dave Emett @ 2018-12-03 18:03 UTC (permalink / raw)
  To: Eric Anholt; +Cc: dri-devel, linux-kernel, Thomas Spurden

On Sat, 1 Dec 2018 at 00:58, Eric Anholt <eric@anholt.net> wrote:
>
> We shouldn't be returning v3d-render in for our new queue.
>
> Signed-off-by: Eric Anholt <eric@anholt.net>

Reviewed-by: Dave Emett <david.emett@broadcom.com>

> Fixes: 83d5139982db ("drm/v3d: Add support for submitting jobs to the TFU.")
> ---
>  drivers/gpu/drm/v3d/v3d_fence.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
> index 50bfcf9a8a1a..b0a2a1ae2eb1 100644
> --- a/drivers/gpu/drm/v3d/v3d_fence.c
> +++ b/drivers/gpu/drm/v3d/v3d_fence.c
> @@ -29,10 +29,16 @@ static const char *v3d_fence_get_timeline_name(struct dma_fence *fence)
>  {
>         struct v3d_fence *f = to_v3d_fence(fence);
>
> -       if (f->queue == V3D_BIN)
> +       switch (f->queue) {
> +       case V3D_BIN:
>                 return "v3d-bin";
> -       else
> +       case V3D_RENDER:
>                 return "v3d-render";
> +       case V3D_TFU:
> +               return "v3d-tfu";
> +       default:
> +               return NULL;
> +       }
>  }
>
>  const struct dma_fence_ops v3d_fence_ops = {
> --
> 2.20.0.rc1
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/6] drm/v3d: Document cache flushing ABI.
  2018-12-03 17:28   ` Dave Emett
@ 2018-12-03 20:39     ` Eric Anholt
  0 siblings, 0 replies; 16+ messages in thread
From: Eric Anholt @ 2018-12-03 20:39 UTC (permalink / raw)
  To: Dave Emett; +Cc: dri-devel, linux-kernel, Thomas Spurden

[-- Attachment #1: Type: text/plain, Size: 1377 bytes --]

Dave Emett <david.emett@broadcom.com> writes:

> On Mon, 3 Dec 2018 at 17:22, Dave Emett <david.emett@broadcom.com> wrote:
>>
>> > + * The caches (L1T, slice, and L2T) will be flushed before the job
>> > + * executes.  The TLB writes are guaranteed to have been flushed by
>>
>> I would say before *each* job executes, as the caches are flushed
>> before both bin and render.
>> I wouldn't say "the caches" as not all of the V3D caches are flushed
>> before executing a control list. In particular, the VCD cache is not
>> cleared by the kernel driver (not even sure if there is a register
>> interface to do this); it is expected that the control list will do
>> this itself (using the CLEAR_VCD_CACHE instruction).
>> On 3.3 and earlier there is a separate L2C for instructions/uniforms
>> and a GCA. These do need to be flushed, and it looks like they are, so
>> they should be mentioned here.
>
> Correction: on *3.2* and earlier there is a separate L2C for
> instructions/uniforms. On 3.3 and earlier there is a GCA.
>
> It looks like we're currently unconditionally writing the L2C clear
> registers. We should really only be doing this on 3.2 and earlier.

OK, I've pushed the timeline name and tracing to drm-misc-next, and I'll
resend the rest with more comments and the L2C change and outside-in
invalidation once I'm far enough into this CTS run that I'm confident.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-12-03 20:39 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-01  0:57 [PATCH 1/6] drm/v3d: Document cache flushing ABI Eric Anholt
2018-12-01  0:57 ` [PATCH 2/6] drm/v3d: Drop unused v3d_flush_caches() Eric Anholt
2018-12-01  0:57   ` Eric Anholt
2018-12-03 17:44   ` Dave Emett
2018-12-01  0:57 ` [PATCH 3/6] drm/v3d: Don't bother flushing L1TD at job start Eric Anholt
2018-12-01  0:57   ` Eric Anholt
2018-12-03 17:36   ` Dave Emett
2018-12-01  0:57 ` [PATCH 4/6] drm/v3d: Drop the wait for L2T flush to complete Eric Anholt
2018-12-03 17:59   ` Dave Emett
2018-12-01  0:57 ` [PATCH 5/6] drm/v3d: Add more tracepoints for V3D GPU rendering Eric Anholt
2018-12-03 17:41   ` Dave Emett
2018-12-01  0:57 ` [PATCH 6/6] drm/v3d: Add missing fence timeline name for TFU Eric Anholt
2018-12-03 18:03   ` Dave Emett
2018-12-03 17:22 ` [PATCH 1/6] drm/v3d: Document cache flushing ABI Dave Emett
2018-12-03 17:28   ` Dave Emett
2018-12-03 20:39     ` Eric Anholt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.