dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] Etnaviv GPU scheduler
@ 2017-12-06 17:10 Lucas Stach
  2017-12-06 17:10 ` [PATCH 1/4] drm/etnaviv: hook up DRM " Lucas Stach
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Lucas Stach @ 2017-12-06 17:10 UTC (permalink / raw)
  To: etnaviv; +Cc: patchwork-lst, kernel, dri-devel, Russell King

Hi all,

this hooks up the AMDGPU/DRM GPU scheduler in etnaviv. This depends on both
the etnaviv job lifetime fixes series, as well as the AMDGPU scheduler move.
Please keep this in mind while reviewing.

The scheduler has 4 main benefits to etnaviv:
1. Cross GPU/device synchronization is handled in the kernel without blocking
   the submitting userspace process.
2. Fairness in GPU time is improved, as all processes get to submit to the
   HW in a round robin fashion. Before this a single application could
   pretty much DoS the GPU by submitting jobs with minimal dependencies.
3. GPU resets loose a lot less state. The scheduler keeps track of the
   HW submitted jobs and replays innocent jobs after a GPU reset. Thus
   isolation between different processes using the GPU is improved as
   a process hanging the GPU will not impact other jobs anymore.
4. It provides useful tracepoints to keep track of the GPU execution.

Regards,
Lucas

Lucas Stach (4):
  drm/etnaviv: hook up DRM GPU scheduler
  drm/etnaviv: move dependency handling to scheduler
  drm/etnaviv: lock BOs after all other submit work is done
  drm/etnaviv: replace hangcheck with scheduler timeout

 drivers/gpu/drm/etnaviv/Kconfig              |   1 +
 drivers/gpu/drm/etnaviv/Makefile             |   3 +-
 drivers/gpu/drm/etnaviv/etnaviv_drv.c        |  16 ++
 drivers/gpu/drm/etnaviv/etnaviv_drv.h        |   7 +-
 drivers/gpu/drm/etnaviv/etnaviv_dump.c       |  21 ++-
 drivers/gpu/drm/etnaviv/etnaviv_gem.h        |   4 +
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c |  65 ++++---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c        | 250 +++++----------------------
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h        |  28 +--
 drivers/gpu/drm/etnaviv/etnaviv_sched.c      | 169 ++++++++++++++++++
 drivers/gpu/drm/etnaviv/etnaviv_sched.h      |  34 ++++
 11 files changed, 339 insertions(+), 259 deletions(-)
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_sched.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_sched.h

-- 
2.11.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/4] drm/etnaviv: hook up DRM GPU scheduler
  2017-12-06 17:10 [PATCH 0/4] Etnaviv GPU scheduler Lucas Stach
@ 2017-12-06 17:10 ` Lucas Stach
  2017-12-06 17:10 ` [PATCH 2/4] drm/etnaviv: move dependency handling to scheduler Lucas Stach
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Lucas Stach @ 2017-12-06 17:10 UTC (permalink / raw)
  To: etnaviv; +Cc: patchwork-lst, kernel, dri-devel, Russell King

This hooks in the DRM GPU scheduler. No improvement yet, as all the
dependency handling is still done in etnaviv_gem_submit. This just
replaces the actual GPU submit by passing through the scheduler.

Allows to get rid of the retire worker, as this is now driven by the
scheduler.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/Kconfig              |   1 +
 drivers/gpu/drm/etnaviv/Makefile             |   3 +-
 drivers/gpu/drm/etnaviv/etnaviv_drv.c        |  16 ++++
 drivers/gpu/drm/etnaviv/etnaviv_drv.h        |   7 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem.h        |   1 +
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c |  11 ++-
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c        | 115 +++++++++---------------
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h        |  14 +--
 drivers/gpu/drm/etnaviv/etnaviv_sched.c      | 125 +++++++++++++++++++++++++++
 drivers/gpu/drm/etnaviv/etnaviv_sched.h      |  22 +++++
 10 files changed, 222 insertions(+), 93 deletions(-)
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_sched.c
 create mode 100644 drivers/gpu/drm/etnaviv/etnaviv_sched.h

diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig
index 3f58b4077767..e5bfeca361bd 100644
--- a/drivers/gpu/drm/etnaviv/Kconfig
+++ b/drivers/gpu/drm/etnaviv/Kconfig
@@ -11,6 +11,7 @@ config DRM_ETNAVIV
 	select WANT_DEV_COREDUMP
 	select CMA if HAVE_DMA_CONTIGUOUS
 	select DMA_CMA if HAVE_DMA_CONTIGUOUS
+	select DRM_SCHED
 	help
 	  DRM driver for Vivante GPUs.
 
diff --git a/drivers/gpu/drm/etnaviv/Makefile b/drivers/gpu/drm/etnaviv/Makefile
index 1281c8d4fae5..9bb780c22501 100644
--- a/drivers/gpu/drm/etnaviv/Makefile
+++ b/drivers/gpu/drm/etnaviv/Makefile
@@ -12,6 +12,7 @@ etnaviv-y := \
 	etnaviv_iommu_v2.o \
 	etnaviv_iommu.o \
 	etnaviv_mmu.o \
-	etnaviv_perfmon.o
+	etnaviv_perfmon.o \
+	etnaviv_sched.o
 
 obj-$(CONFIG_DRM_ETNAVIV)	+= etnaviv.o
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 6faf4042db23..8a73414682b2 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -101,12 +101,25 @@ static void load_gpu(struct drm_device *dev)
 
 static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
 {
+	struct etnaviv_drm_private *priv = dev->dev_private;
 	struct etnaviv_file_private *ctx;
+	int i;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
+	for (i = 0; i < ETNA_MAX_PIPES; i++) {
+		struct etnaviv_gpu *gpu = priv->gpu[i];
+
+		if (gpu) {
+			drm_sched_entity_init(&gpu->sched,
+				&ctx->sched_entity[i],
+				&gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL],
+				32, NULL);
+			}
+	}
+
 	file->driver_priv = ctx;
 
 	return 0;
@@ -126,6 +139,9 @@ static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
 			if (gpu->lastctx == ctx)
 				gpu->lastctx = NULL;
 			mutex_unlock(&gpu->lock);
+
+			drm_sched_entity_fini(&gpu->sched,
+					      &ctx->sched_entity[i]);
 		}
 	}
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index a54f0b758a5c..1f055d931c6c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -34,6 +34,7 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem.h>
 #include <drm/etnaviv_drm.h>
+#include <drm/gpu_scheduler.h>
 
 struct etnaviv_cmdbuf;
 struct etnaviv_gpu;
@@ -42,11 +43,11 @@ struct etnaviv_gem_object;
 struct etnaviv_gem_submit;
 
 struct etnaviv_file_private {
-	/* currently we don't do anything useful with this.. but when
-	 * per-context address spaces are supported we'd keep track of
+	/*
+	 * When per-context address spaces are supported we'd keep track of
 	 * the context's page-tables here.
 	 */
-	int dummy;
+	struct drm_sched_entity		sched_entity[ETNA_MAX_PIPES];
 };
 
 struct etnaviv_drm_private {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index c30964152381..ae352f2a77f9 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -101,6 +101,7 @@ struct etnaviv_gem_submit_bo {
  * make it easier to unwind when things go wrong, etc).
  */
 struct etnaviv_gem_submit {
+	struct drm_sched_job sched_job;
 	struct kref refcount;
 	struct etnaviv_gpu *gpu;
 	struct dma_fence *out_fence, *in_fence;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 919c8dc39f32..0bc89e4daade 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -22,6 +22,7 @@
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_perfmon.h"
+#include "etnaviv_sched.h"
 
 /*
  * Cmdstream submission:
@@ -381,8 +382,13 @@ static void submit_cleanup(struct kref *kref)
 
 	if (submit->in_fence)
 		dma_fence_put(submit->in_fence);
-	if (submit->out_fence)
+	if (submit->out_fence) {
+		/* first remove from IDR, so fence can not be found anymore */
+		mutex_lock(&submit->gpu->fence_idr_lock);
+		idr_remove(&submit->gpu->fence_idr, submit->out_fence_id);
+		mutex_unlock(&submit->gpu->fence_idr_lock);
 		dma_fence_put(submit->out_fence);
+	}
 	kfree(submit->pmrs);
 	kfree(submit);
 }
@@ -395,6 +401,7 @@ void etnaviv_submit_put(struct etnaviv_gem_submit *submit)
 int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		struct drm_file *file)
 {
+	struct etnaviv_file_private *ctx = file->driver_priv;
 	struct etnaviv_drm_private *priv = dev->dev_private;
 	struct drm_etnaviv_gem_submit *args = data;
 	struct drm_etnaviv_gem_submit_reloc *relocs;
@@ -541,7 +548,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	memcpy(submit->cmdbuf.vaddr, stream, args->stream_size);
 	submit->cmdbuf.user_size = ALIGN(args->stream_size, 8);
 
-	ret = etnaviv_gpu_submit(gpu, submit);
+	ret = etnaviv_sched_push_job(&ctx->sched_entity[args->pipe], submit);
 	if (ret)
 		goto err_submit_objects;
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 935d99be748e..bfa54abbbdd1 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -26,6 +26,7 @@
 #include "etnaviv_gem.h"
 #include "etnaviv_mmu.h"
 #include "etnaviv_perfmon.h"
+#include "etnaviv_sched.h"
 #include "common.xml.h"
 #include "state.xml.h"
 #include "state_hi.xml.h"
@@ -955,9 +956,6 @@ static void recover_worker(struct work_struct *work)
 	mutex_unlock(&gpu->lock);
 	pm_runtime_mark_last_busy(gpu->dev);
 	pm_runtime_put_autosuspend(gpu->dev);
-
-	/* Retire the buffer objects in a work */
-	queue_work(gpu->wq, &gpu->retire_work);
 }
 
 static void hangcheck_timer_reset(struct etnaviv_gpu *gpu)
@@ -1010,7 +1008,6 @@ static void hangcheck_disable(struct etnaviv_gpu *gpu)
 /* fence object management */
 struct etnaviv_fence {
 	struct etnaviv_gpu *gpu;
-	int id;
 	struct dma_fence base;
 };
 
@@ -1047,11 +1044,6 @@ static void etnaviv_fence_release(struct dma_fence *fence)
 {
 	struct etnaviv_fence *f = to_etnaviv_fence(fence);
 
-	/* first remove from IDR, so fence can not be looked up anymore */
-	mutex_lock(&f->gpu->lock);
-	idr_remove(&f->gpu->fence_idr, f->id);
-	mutex_unlock(&f->gpu->lock);
-
 	kfree_rcu(f, base.rcu);
 }
 
@@ -1078,11 +1070,6 @@ static struct dma_fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
 	if (!f)
 		return NULL;
 
-	f->id = idr_alloc_cyclic(&gpu->fence_idr, &f->base, 0, INT_MAX, GFP_KERNEL);
-	if (f->id < 0) {
-		kfree(f);
-		return NULL;
-	}
 	f->gpu = gpu;
 
 	dma_fence_init(&f->base, &etnaviv_fence_ops, &gpu->fence_spinlock,
@@ -1205,31 +1192,6 @@ static void event_free(struct etnaviv_gpu *gpu, unsigned int event)
 /*
  * Cmdstream submission/retirement:
  */
-
-static void retire_worker(struct work_struct *work)
-{
-	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
-					       retire_work);
-	u32 fence = gpu->completed_fence;
-	struct etnaviv_gem_submit *submit, *tmp;
-	LIST_HEAD(retire_list);
-
-	mutex_lock(&gpu->lock);
-	list_for_each_entry_safe(submit, tmp, &gpu->active_submit_list, node) {
-		if (!dma_fence_is_signaled(submit->out_fence))
-			break;
-
-		list_move(&submit->node, &retire_list);
-	}
-
-	gpu->retired_fence = fence;
-
-	mutex_unlock(&gpu->lock);
-
-	list_for_each_entry_safe(submit, tmp, &retire_list, node)
-		etnaviv_submit_put(submit);
-}
-
 int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
 	u32 id, struct timespec *timeout)
 {
@@ -1237,18 +1199,15 @@ int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
 	int ret;
 
 	/*
-	 * Look up the fence and take a reference. The mutex only synchronizes
-	 * the IDR lookup with the fence release. We might still find a fence
+	 * Look up the fence and take a reference. We might still find a fence
 	 * whose refcount has already dropped to zero. dma_fence_get_rcu
 	 * pretends we didn't find a fence in that case.
 	 */
-	ret = mutex_lock_interruptible(&gpu->lock);
-	if (ret)
-		return ret;
+	rcu_read_lock();
 	fence = idr_find(&gpu->fence_idr, id);
 	if (fence)
 		fence = dma_fence_get_rcu(fence);
-	mutex_unlock(&gpu->lock);
+	rcu_read_unlock();
 
 	if (!fence)
 		return 0;
@@ -1273,7 +1232,7 @@ int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
 
 /*
  * Wait for an object to become inactive.  This, on it's own, is not race
- * free: the object is moved by the retire worker off the active list, and
+ * free: the object is moved by the scheduler off the active list, and
  * then the iova is put.  Moreover, the object could be re-submitted just
  * after we notice that it's become inactive.
  *
@@ -1362,15 +1321,16 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
 
 
 /* add bo's to gpu's ring, and kick gpu: */
-int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
-	struct etnaviv_gem_submit *submit)
+struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 {
+	struct etnaviv_gpu *gpu = submit->gpu;
+	struct dma_fence *gpu_fence;
 	unsigned int i, nr_events = 1, event[3];
 	int ret;
 
 	ret = pm_runtime_get_sync(gpu->dev);
 	if (ret < 0)
-		return ret;
+		return NULL;
 	submit->runtime_resumed = true;
 
 	/*
@@ -1386,22 +1346,20 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	ret = event_alloc(gpu, nr_events, event);
 	if (ret) {
 		DRM_ERROR("no free events\n");
-		return ret;
+		return NULL;
 	}
 
 	mutex_lock(&gpu->lock);
 
-	submit->out_fence = etnaviv_gpu_fence_alloc(gpu);
-	if (!submit->out_fence) {
+	gpu_fence = etnaviv_gpu_fence_alloc(gpu);
+	if (!gpu_fence) {
 		for (i = 0; i < nr_events; i++)
 			event_free(gpu, event[i]);
 
-		ret = -ENOMEM;
 		goto out_unlock;
 	}
-	submit->out_fence_id = to_etnaviv_fence(submit->out_fence)->id;
 
-	gpu->active_fence = submit->out_fence->seqno;
+	gpu->active_fence = gpu_fence->seqno;
 
 	if (submit->nr_pmrs) {
 		gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
@@ -1410,8 +1368,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 		etnaviv_sync_point_queue(gpu, event[1]);
 	}
 
-	kref_get(&submit->refcount);
-	gpu->event[event[0]].fence = submit->out_fence;
+	gpu->event[event[0]].fence = gpu_fence;
 	etnaviv_buffer_queue(gpu, submit->exec_state, event[0],
 			     &submit->cmdbuf);
 
@@ -1422,15 +1379,12 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 		etnaviv_sync_point_queue(gpu, event[2]);
 	}
 
-	list_add_tail(&submit->node, &gpu->active_submit_list);
-
 	hangcheck_timer_reset(gpu);
-	ret = 0;
 
 out_unlock:
 	mutex_unlock(&gpu->lock);
 
-	return ret;
+	return gpu_fence;
 }
 
 static void sync_point_worker(struct work_struct *work)
@@ -1521,9 +1475,6 @@ static irqreturn_t irq_handler(int irq, void *data)
 			event_free(gpu, event);
 		}
 
-		/* Retire the buffer objects in a work */
-		queue_work(gpu->wq, &gpu->retire_work);
-
 		ret = IRQ_HANDLED;
 	}
 
@@ -1694,23 +1645,21 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
 	}
 
 	gpu->wq = alloc_ordered_workqueue(dev_name(dev), 0);
-	if (!gpu->wq) {
-		if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL))
-			thermal_cooling_device_unregister(gpu->cooling);
-		return -ENOMEM;
-	}
+	if (!gpu->wq)
+		goto out_thermal;
+
+	ret = etnaviv_sched_init(gpu);
+	if (ret)
+		goto out_workqueue;
 
 #ifdef CONFIG_PM
 	ret = pm_runtime_get_sync(gpu->dev);
 #else
 	ret = etnaviv_gpu_clk_enable(gpu);
 #endif
-	if (ret < 0) {
-		destroy_workqueue(gpu->wq);
-		if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL))
-			thermal_cooling_device_unregister(gpu->cooling);
-		return ret;
-	}
+	if (ret < 0)
+		goto out_sched;
+
 
 	gpu->drm = drm;
 	gpu->fence_context = dma_fence_context_alloc(1);
@@ -1718,7 +1667,6 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
 	spin_lock_init(&gpu->fence_spinlock);
 
 	INIT_LIST_HEAD(&gpu->active_submit_list);
-	INIT_WORK(&gpu->retire_work, retire_worker);
 	INIT_WORK(&gpu->sync_point_work, sync_point_worker);
 	INIT_WORK(&gpu->recover_work, recover_worker);
 	init_waitqueue_head(&gpu->fence_event);
@@ -1731,6 +1679,18 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
 	pm_runtime_put_autosuspend(gpu->dev);
 
 	return 0;
+
+out_sched:
+	etnaviv_sched_fini(gpu);
+
+out_workqueue:
+	destroy_workqueue(gpu->wq);
+
+out_thermal:
+	if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL))
+		thermal_cooling_device_unregister(gpu->cooling);
+
+	return ret;
 }
 
 static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
@@ -1745,6 +1705,8 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 	flush_workqueue(gpu->wq);
 	destroy_workqueue(gpu->wq);
 
+	etnaviv_sched_fini(gpu);
+
 #ifdef CONFIG_PM
 	pm_runtime_get_sync(gpu->dev);
 	pm_runtime_put_sync_suspend(gpu->dev);
@@ -1797,6 +1759,7 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
 
 	gpu->dev = &pdev->dev;
 	mutex_init(&gpu->lock);
+	mutex_init(&gpu->fence_idr_lock);
 
 	/* Map registers: */
 	gpu->mmio = etnaviv_ioremap(pdev, NULL, dev_name(gpu->dev));
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 0170eb0a0923..02f7ffa34f3b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -108,6 +108,7 @@ struct etnaviv_gpu {
 	struct etnaviv_chip_identity identity;
 	struct etnaviv_file_private *lastctx;
 	struct workqueue_struct *wq;
+	struct drm_gpu_scheduler sched;
 
 	/* 'ring'-buffer: */
 	struct etnaviv_cmdbuf buffer;
@@ -128,18 +129,15 @@ struct etnaviv_gpu {
 	u32 idle_mask;
 
 	/* Fencing support */
+	struct mutex fence_idr_lock;
 	struct idr fence_idr;
 	u32 next_fence;
 	u32 active_fence;
 	u32 completed_fence;
-	u32 retired_fence;
 	wait_queue_head_t fence_event;
 	u64 fence_context;
 	spinlock_t fence_spinlock;
 
-	/* worker for handling active-list retiring: */
-	struct work_struct retire_work;
-
 	/* worker for handling 'sync' points: */
 	struct work_struct sync_point_work;
 	int sync_point_event;
@@ -182,11 +180,6 @@ static inline bool fence_completed(struct etnaviv_gpu *gpu, u32 fence)
 	return fence_after_eq(gpu->completed_fence, fence);
 }
 
-static inline bool fence_retired(struct etnaviv_gpu *gpu, u32 fence)
-{
-	return fence_after_eq(gpu->retired_fence, fence);
-}
-
 int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value);
 
 int etnaviv_gpu_init(struct etnaviv_gpu *gpu);
@@ -203,8 +196,7 @@ int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
 	u32 fence, struct timespec *timeout);
 int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
 	struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout);
-int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
-	struct etnaviv_gem_submit *submit);
+struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit);
 int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu);
 void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
new file mode 100644
index 000000000000..143c3eca80b0
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2017 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <drm/gpu_scheduler.h>
+#include <linux/kthread.h>
+
+#include "etnaviv_drv.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_gpu.h"
+
+static int etnaviv_job_hang_limit = 0;
+module_param_named(job_hang_limit, etnaviv_job_hang_limit, int , 0444);
+static int etnaviv_hw_jobs_limit = 2;
+module_param_named(hw_job_limit, etnaviv_hw_jobs_limit, int , 0444);
+
+static inline
+struct etnaviv_gem_submit *to_etnaviv_submit(struct drm_sched_job *sched_job)
+{
+	return container_of(sched_job, struct etnaviv_gem_submit, sched_job);
+}
+
+struct dma_fence *etnaviv_sched_dependency(struct drm_sched_job *sched_job,
+					   struct drm_sched_entity *entity)
+{
+	return NULL;
+}
+
+struct dma_fence *etnaviv_sched_run_job(struct drm_sched_job *sched_job)
+{
+	struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
+	struct dma_fence *fence;
+
+	mutex_lock(&submit->gpu->lock);
+	list_add_tail(&submit->node, &submit->gpu->active_submit_list);
+	mutex_unlock(&submit->gpu->lock);
+
+	fence = etnaviv_gpu_submit(submit);
+	if (!fence) {
+		etnaviv_submit_put(submit);
+		return NULL;
+	}
+
+	return fence;
+}
+
+static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
+{
+	/* this replaces the hangcheck */
+}
+
+static void etnaviv_sched_free_job(struct drm_sched_job *sched_job)
+{
+	struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
+
+	mutex_lock(&submit->gpu->lock);
+	list_del(&submit->node);
+	mutex_unlock(&submit->gpu->lock);
+
+	etnaviv_submit_put(submit);
+}
+
+static const struct drm_sched_backend_ops etnaviv_sched_ops = {
+	.dependency = etnaviv_sched_dependency,
+	.run_job = etnaviv_sched_run_job,
+	.timedout_job = etnaviv_sched_timedout_job,
+	.free_job = etnaviv_sched_free_job,
+};
+
+int etnaviv_sched_push_job(struct drm_sched_entity *sched_entity,
+			   struct etnaviv_gem_submit *submit)
+{
+	int ret;
+
+	ret = drm_sched_job_init(&submit->sched_job, &submit->gpu->sched,
+				 sched_entity, submit->cmdbuf.ctx);
+	if (ret)
+		return ret;
+
+	submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished);
+	mutex_lock(&submit->gpu->fence_idr_lock);
+	submit->out_fence_id = idr_alloc_cyclic(&submit->gpu->fence_idr,
+						submit->out_fence, 0,
+						INT_MAX, GFP_KERNEL);
+	mutex_unlock(&submit->gpu->fence_idr_lock);
+	if (submit->out_fence_id < 0)
+		return -ENOMEM;
+
+	/* the scheduler holds on to the job now */
+	kref_get(&submit->refcount);
+
+	drm_sched_entity_push_job(&submit->sched_job, sched_entity);
+
+	return 0;
+}
+
+int etnaviv_sched_init(struct etnaviv_gpu *gpu)
+{
+	int ret;
+
+	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
+			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
+			     msecs_to_jiffies(500), dev_name(gpu->dev));
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+void etnaviv_sched_fini(struct etnaviv_gpu *gpu)
+{
+	drm_sched_fini(&gpu->sched);
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.h b/drivers/gpu/drm/etnaviv/etnaviv_sched.h
new file mode 100644
index 000000000000..539556a95b65
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+struct etnaviv_gpu;
+
+int etnaviv_sched_init(struct etnaviv_gpu *gpu);
+void etnaviv_sched_fini(struct etnaviv_gpu *gpu);
+int etnaviv_sched_push_job(struct drm_sched_entity *sched_entity,
+			   struct etnaviv_gem_submit *submit);
-- 
2.11.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/4] drm/etnaviv: move dependency handling to scheduler
  2017-12-06 17:10 [PATCH 0/4] Etnaviv GPU scheduler Lucas Stach
  2017-12-06 17:10 ` [PATCH 1/4] drm/etnaviv: hook up DRM " Lucas Stach
@ 2017-12-06 17:10 ` Lucas Stach
  2017-12-06 17:10 ` [PATCH 3/4] drm/etnaviv: lock BOs after all other submit work is done Lucas Stach
  2017-12-06 17:10 ` [PATCH 4/4] drm/etnaviv: replace hangcheck with scheduler timeout Lucas Stach
  3 siblings, 0 replies; 5+ messages in thread
From: Lucas Stach @ 2017-12-06 17:10 UTC (permalink / raw)
  To: etnaviv; +Cc: patchwork-lst, kernel, dri-devel, Russell King

Move the fence dependency handling to the scheduler where it belongs.
Jobs with unsignaled dependencies just get to sit in the scheduler queue
without holding any locks.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem.h        |  3 ++
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 37 +++++++++++----------
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c        | 48 ----------------------------
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h        |  3 --
 drivers/gpu/drm/etnaviv/etnaviv_sched.c      | 45 ++++++++++++++++++++++++++
 5 files changed, 69 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index ae352f2a77f9..93e696fcc14f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -94,6 +94,9 @@ struct etnaviv_gem_submit_bo {
 	u32 flags;
 	struct etnaviv_gem_object *obj;
 	struct etnaviv_vram_mapping *mapping;
+	struct dma_fence *excl;
+	unsigned int nr_shared;
+	struct dma_fence **shared;
 };
 
 /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 0bc89e4daade..ae0bf3e94580 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -170,29 +170,34 @@ static int submit_lock_objects(struct etnaviv_gem_submit *submit,
 	return ret;
 }
 
-static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
+static int submit_fence_sync(struct etnaviv_gem_submit *submit)
 {
-	unsigned int context = submit->gpu->fence_context;
 	int i, ret = 0;
 
 	for (i = 0; i < submit->nr_bos; i++) {
-		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+		struct etnaviv_gem_submit_bo *bo = &submit->bos[i];
+		struct reservation_object *robj = bo->obj->resv;
 		bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
-		bool explicit = !!(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
 
-		ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write,
-						 explicit);
-		if (ret)
-			break;
-	}
+		if (!write) {
+			ret = reservation_object_reserve_shared(robj);
+			if (ret)
+				return ret;
+		}
+
+		if (submit->flags & ETNA_SUBMIT_NO_IMPLICIT)
+			return 0;
+
+		if (write) {
+			ret = reservation_object_get_fences_rcu(robj, &bo->excl,
+								&bo->nr_shared,
+								&bo->shared);
+			if (ret)
+				return ret;
+		} else {
+			submit->bos[i].excl = reservation_object_get_excl_rcu(robj);
+		}
 
-	if (submit->flags & ETNA_SUBMIT_FENCE_FD_IN) {
-		/*
-		 * Wait if the fence is from a foreign context, or if the fence
-		 * array contains any fence from a foreign context.
-		 */
-		if (!dma_fence_match_context(submit->in_fence, context))
-			ret = dma_fence_wait(submit->in_fence, true);
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index bfa54abbbdd1..d5d5cfab8477 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1078,54 +1078,6 @@ static struct dma_fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
 	return &f->base;
 }
 
-int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
-	unsigned int context, bool exclusive, bool explicit)
-{
-	struct reservation_object *robj = etnaviv_obj->resv;
-	struct reservation_object_list *fobj;
-	struct dma_fence *fence;
-	int i, ret;
-
-	if (!exclusive) {
-		ret = reservation_object_reserve_shared(robj);
-		if (ret)
-			return ret;
-	}
-
-	if (explicit)
-		return 0;
-
-	/*
-	 * If we have any shared fences, then the exclusive fence
-	 * should be ignored as it will already have been signalled.
-	 */
-	fobj = reservation_object_get_list(robj);
-	if (!fobj || fobj->shared_count == 0) {
-		/* Wait on any existing exclusive fence which isn't our own */
-		fence = reservation_object_get_excl(robj);
-		if (fence && fence->context != context) {
-			ret = dma_fence_wait(fence, true);
-			if (ret)
-				return ret;
-		}
-	}
-
-	if (!exclusive || !fobj)
-		return 0;
-
-	for (i = 0; i < fobj->shared_count; i++) {
-		fence = rcu_dereference_protected(fobj->shared[i],
-						reservation_object_held(robj));
-		if (fence->context != context) {
-			ret = dma_fence_wait(fence, true);
-			if (ret)
-				return ret;
-		}
-	}
-
-	return 0;
-}
-
 /*
  * event management:
  */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 02f7ffa34f3b..f5c6dbe026d6 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -188,9 +188,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m);
 #endif
 
-int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
-	unsigned int context, bool exclusive, bool implicit);
-
 void etnaviv_gpu_retire(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
 	u32 fence, struct timespec *timeout);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 143c3eca80b0..bb68c57e714f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -35,6 +35,51 @@ struct etnaviv_gem_submit *to_etnaviv_submit(struct drm_sched_job *sched_job)
 struct dma_fence *etnaviv_sched_dependency(struct drm_sched_job *sched_job,
 					   struct drm_sched_entity *entity)
 {
+	struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
+	struct dma_fence *fence;
+	int i;
+
+	if (unlikely(submit->in_fence)) {
+		fence = submit->in_fence;
+
+		if (!dma_fence_is_signaled(fence))
+			return fence;
+
+		dma_fence_put(fence);
+		submit->in_fence = NULL;
+	}
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_submit_bo *bo = &submit->bos[i];
+		int j;
+
+		if (bo->excl) {
+			fence = bo->excl;
+			bo->excl = NULL;
+
+			if (!dma_fence_is_signaled(fence))
+				return fence;
+
+			dma_fence_put(fence);
+		}
+
+		for (j = 0; j < bo->nr_shared; j++) {
+			if (!bo->shared[j])
+				continue;
+
+			fence = bo->shared[j];
+			bo->shared[j] = NULL;
+
+			if (!dma_fence_is_signaled(fence))
+				return fence;
+
+			dma_fence_put(fence);
+		}
+		kfree(bo->shared);
+		bo->nr_shared = 0;
+		bo->shared = NULL;
+	}
+
 	return NULL;
 }
 
-- 
2.11.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/4] drm/etnaviv: lock BOs after all other submit work is done
  2017-12-06 17:10 [PATCH 0/4] Etnaviv GPU scheduler Lucas Stach
  2017-12-06 17:10 ` [PATCH 1/4] drm/etnaviv: hook up DRM " Lucas Stach
  2017-12-06 17:10 ` [PATCH 2/4] drm/etnaviv: move dependency handling to scheduler Lucas Stach
@ 2017-12-06 17:10 ` Lucas Stach
  2017-12-06 17:10 ` [PATCH 4/4] drm/etnaviv: replace hangcheck with scheduler timeout Lucas Stach
  3 siblings, 0 replies; 5+ messages in thread
From: Lucas Stach @ 2017-12-06 17:10 UTC (permalink / raw)
  To: etnaviv; +Cc: patchwork-lst, kernel, dri-devel, Russell King

Populating objects, adding them to the GPU VM and patching/validating
the command stream might take a lot of CPU time. There is no reason to
hold all object reservations during that time.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index ae0bf3e94580..f3001476bc06 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -515,10 +515,6 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (ret)
 		goto err_submit_objects;
 
-	ret = submit_lock_objects(submit, &ticket);
-	if (ret)
-		goto err_submit_objects;
-
 	if (!etnaviv_cmd_validate_one(gpu, stream, args->stream_size / 4,
 				      relocs, args->nr_relocs)) {
 		ret = -EINVAL;
@@ -533,10 +529,6 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		}
 	}
 
-	ret = submit_fence_sync(submit);
-	if (ret)
-		goto err_submit_objects;
-
 	ret = submit_pin_objects(submit);
 	if (ret)
 		goto err_submit_objects;
@@ -553,6 +545,14 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	memcpy(submit->cmdbuf.vaddr, stream, args->stream_size);
 	submit->cmdbuf.user_size = ALIGN(args->stream_size, 8);
 
+	ret = submit_lock_objects(submit, &ticket);
+	if (ret)
+		goto err_submit_objects;
+
+	ret = submit_fence_sync(submit);
+	if (ret)
+		goto err_submit_objects;
+
 	ret = etnaviv_sched_push_job(&ctx->sched_entity[args->pipe], submit);
 	if (ret)
 		goto err_submit_objects;
-- 
2.11.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/4] drm/etnaviv: replace hangcheck with scheduler timeout
  2017-12-06 17:10 [PATCH 0/4] Etnaviv GPU scheduler Lucas Stach
                   ` (2 preceding siblings ...)
  2017-12-06 17:10 ` [PATCH 3/4] drm/etnaviv: lock BOs after all other submit work is done Lucas Stach
@ 2017-12-06 17:10 ` Lucas Stach
  3 siblings, 0 replies; 5+ messages in thread
From: Lucas Stach @ 2017-12-06 17:10 UTC (permalink / raw)
  To: etnaviv; +Cc: patchwork-lst, kernel, dri-devel, Russell King

This replaces the etnaviv internal hangcheck logic with the job timeout
handling provided by the DRM scheduler. This simplifies the driver further
and allows to replay jobs after a GPU reset, so only minimal state is lost.

This introduces a user-visible change in that we don't allow jobs to run
indefinitely as long as they make progress anymore, as this introduces
quality of service issues when multiple processes are using the GPU.
Userspace is now responsible to flush jobs in a way that the finish in a
reasonable time, where reasonable is currently defined as less than 500ms.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_dump.c       | 21 ++++++-
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c |  1 -
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c        | 89 ++++------------------------
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h        | 11 +---
 drivers/gpu/drm/etnaviv/etnaviv_sched.c      | 43 +++++++-------
 drivers/gpu/drm/etnaviv/etnaviv_sched.h      | 12 ++++
 6 files changed, 63 insertions(+), 114 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 6d0909c589d1..48aef6cf6a42 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -20,9 +20,13 @@
 #include "etnaviv_gem.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
+#include "etnaviv_sched.h"
 #include "state.xml.h"
 #include "state_hi.xml.h"
 
+static bool etnaviv_dump_core = true;
+module_param_named(dump_core, etnaviv_dump_core, bool, 0600);
+
 struct core_dump_iterator {
 	void *start;
 	struct etnaviv_dump_object_header *hdr;
@@ -121,10 +125,16 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	struct etnaviv_vram_mapping *vram;
 	struct etnaviv_gem_object *obj;
 	struct etnaviv_gem_submit *submit;
+	struct drm_sched_job *s_job;
 	unsigned int n_obj, n_bomap_pages;
 	size_t file_size, mmu_size;
 	__le64 *bomap, *bomap_start;
 
+	/* Only catch the first event, or when manually re-armed */
+	if (!etnaviv_dump_core)
+		return;
+	etnaviv_dump_core = false;
+
 	mmu_size = etnaviv_iommu_dump_size(gpu->mmu);
 
 	/* We always dump registers, mmu, ring and end marker */
@@ -135,10 +145,13 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 		    mmu_size + gpu->buffer.size;
 
 	/* Add in the active command buffers */
-	list_for_each_entry(submit, &gpu->active_submit_list, node) {
+	spin_lock(&gpu->sched.job_list_lock);
+	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
+		submit = to_etnaviv_submit(s_job);
 		file_size += submit->cmdbuf.size;
 		n_obj++;
 	}
+	spin_unlock(&gpu->sched.job_list_lock);
 
 	/* Add in the active buffer objects */
 	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
@@ -180,10 +193,14 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 			      gpu->buffer.size,
 			      etnaviv_cmdbuf_get_va(&gpu->buffer));
 
-	list_for_each_entry(submit, &gpu->active_submit_list, node)
+	spin_lock(&gpu->sched.job_list_lock);
+	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
+		submit = to_etnaviv_submit(s_job);
 		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
 				      submit->cmdbuf.vaddr, submit->cmdbuf.size,
 				      etnaviv_cmdbuf_get_va(&submit->cmdbuf));
+	}
+	spin_unlock(&gpu->sched.job_list_lock);
 
 	/* Reserve space for the bomap */
 	if (n_bomap_pages) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index f3001476bc06..bd5bf7910d12 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -543,7 +543,6 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		goto err_submit_objects;
 
 	memcpy(submit->cmdbuf.vaddr, stream, args->stream_size);
-	submit->cmdbuf.user_size = ALIGN(args->stream_size, 8);
 
 	ret = submit_lock_objects(submit, &ticket);
 	if (ret)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index d5d5cfab8477..6b22726f072d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -37,9 +37,6 @@ static const struct platform_device_id gpu_ids[] = {
 	{ },
 };
 
-static bool etnaviv_dump_core = true;
-module_param_named(dump_core, etnaviv_dump_core, bool, 0600);
-
 /*
  * Driver functions:
  */
@@ -913,38 +910,24 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
 }
 #endif
 
-/*
- * Hangcheck detection for locked gpu:
- */
-static void recover_worker(struct work_struct *work)
+void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
 {
-	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
-					       recover_work);
 	unsigned long flags;
 	unsigned int i = 0;
 
-	dev_err(gpu->dev, "hangcheck recover!\n");
+	dev_err(gpu->dev, "recover hung GPU!\n");
 
 	if (pm_runtime_get_sync(gpu->dev) < 0)
 		return;
 
 	mutex_lock(&gpu->lock);
 
-	/* Only catch the first event, or when manually re-armed */
-	if (etnaviv_dump_core) {
-		etnaviv_core_dump(gpu);
-		etnaviv_dump_core = false;
-	}
-
 	etnaviv_hw_reset(gpu);
 
 	/* complete all events, the GPU won't do it after the reset */
 	spin_lock_irqsave(&gpu->event_spinlock, flags);
-	for_each_set_bit_from(i, gpu->event_bitmap, ETNA_NR_EVENTS) {
-		dma_fence_signal(gpu->event[i].fence);
-		gpu->event[i].fence = NULL;
+	for_each_set_bit_from(i, gpu->event_bitmap, ETNA_NR_EVENTS)
 		complete(&gpu->event_free);
-	}
 	bitmap_zero(gpu->event_bitmap, ETNA_NR_EVENTS);
 	spin_unlock_irqrestore(&gpu->event_spinlock, flags);
 	gpu->completed_fence = gpu->active_fence;
@@ -958,53 +941,6 @@ static void recover_worker(struct work_struct *work)
 	pm_runtime_put_autosuspend(gpu->dev);
 }
 
-static void hangcheck_timer_reset(struct etnaviv_gpu *gpu)
-{
-	DBG("%s", dev_name(gpu->dev));
-	mod_timer(&gpu->hangcheck_timer,
-		  round_jiffies_up(jiffies + DRM_ETNAVIV_HANGCHECK_JIFFIES));
-}
-
-static void hangcheck_handler(struct timer_list *t)
-{
-	struct etnaviv_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
-	u32 fence = gpu->completed_fence;
-	bool progress = false;
-
-	if (fence != gpu->hangcheck_fence) {
-		gpu->hangcheck_fence = fence;
-		progress = true;
-	}
-
-	if (!progress) {
-		u32 dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
-		int change = dma_addr - gpu->hangcheck_dma_addr;
-
-		if (change < 0 || change > 16) {
-			gpu->hangcheck_dma_addr = dma_addr;
-			progress = true;
-		}
-	}
-
-	if (!progress && fence_after(gpu->active_fence, fence)) {
-		dev_err(gpu->dev, "hangcheck detected gpu lockup!\n");
-		dev_err(gpu->dev, "     completed fence: %u\n", fence);
-		dev_err(gpu->dev, "     active fence: %u\n",
-			gpu->active_fence);
-		queue_work(gpu->wq, &gpu->recover_work);
-	}
-
-	/* if still more pending work, reset the hangcheck timer: */
-	if (fence_after(gpu->active_fence, gpu->hangcheck_fence))
-		hangcheck_timer_reset(gpu);
-}
-
-static void hangcheck_disable(struct etnaviv_gpu *gpu)
-{
-	del_timer_sync(&gpu->hangcheck_timer);
-	cancel_work_sync(&gpu->recover_work);
-}
-
 /* fence object management */
 struct etnaviv_fence {
 	struct etnaviv_gpu *gpu;
@@ -1280,10 +1216,12 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 	unsigned int i, nr_events = 1, event[3];
 	int ret;
 
-	ret = pm_runtime_get_sync(gpu->dev);
-	if (ret < 0)
-		return NULL;
-	submit->runtime_resumed = true;
+	if (!submit->runtime_resumed) {
+		ret = pm_runtime_get_sync(gpu->dev);
+		if (ret < 0)
+			return NULL;
+		submit->runtime_resumed = true;
+	}
 
 	/*
 	 * if there are performance monitor requests we need to have
@@ -1321,6 +1259,7 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 	}
 
 	gpu->event[event[0]].fence = gpu_fence;
+	submit->cmdbuf.user_size = submit->cmdbuf.size - 8;
 	etnaviv_buffer_queue(gpu, submit->exec_state, event[0],
 			     &submit->cmdbuf);
 
@@ -1331,8 +1270,6 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 		etnaviv_sync_point_queue(gpu, event[2]);
 	}
 
-	hangcheck_timer_reset(gpu);
-
 out_unlock:
 	mutex_unlock(&gpu->lock);
 
@@ -1618,13 +1555,9 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
 	idr_init(&gpu->fence_idr);
 	spin_lock_init(&gpu->fence_spinlock);
 
-	INIT_LIST_HEAD(&gpu->active_submit_list);
 	INIT_WORK(&gpu->sync_point_work, sync_point_worker);
-	INIT_WORK(&gpu->recover_work, recover_worker);
 	init_waitqueue_head(&gpu->fence_event);
 
-	timer_setup(&gpu->hangcheck_timer, hangcheck_handler, TIMER_DEFERRABLE);
-
 	priv->gpu[priv->num_gpus++] = gpu;
 
 	pm_runtime_mark_last_busy(gpu->dev);
@@ -1652,8 +1585,6 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 
 	DBG("%s", dev_name(gpu->dev));
 
-	hangcheck_disable(gpu);
-
 	flush_workqueue(gpu->wq);
 	destroy_workqueue(gpu->wq);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index f5c6dbe026d6..186f7c7408b5 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -123,9 +123,6 @@ struct etnaviv_gpu {
 	struct completion event_free;
 	spinlock_t event_spinlock;
 
-	/* list of currently in-flight command buffers */
-	struct list_head active_submit_list;
-
 	u32 idle_mask;
 
 	/* Fencing support */
@@ -153,13 +150,6 @@ struct etnaviv_gpu {
 	struct clk *clk_core;
 	struct clk *clk_shader;
 
-	/* Hang Detction: */
-#define DRM_ETNAVIV_HANGCHECK_PERIOD 500 /* in ms */
-#define DRM_ETNAVIV_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_ETNAVIV_HANGCHECK_PERIOD)
-	struct timer_list hangcheck_timer;
-	u32 hangcheck_fence;
-	u32 hangcheck_dma_addr;
-	struct work_struct recover_work;
 	unsigned int freq_scale;
 	unsigned long base_rate_core;
 	unsigned long base_rate_shader;
@@ -188,6 +178,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m);
 #endif
 
+void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu);
 void etnaviv_gpu_retire(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
 	u32 fence, struct timespec *timeout);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index bb68c57e714f..ca6ef750cca7 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -14,24 +14,19 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <drm/gpu_scheduler.h>
 #include <linux/kthread.h>
 
 #include "etnaviv_drv.h"
+#include "etnaviv_dump.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_gpu.h"
+#include "etnaviv_sched.h"
 
 static int etnaviv_job_hang_limit = 0;
 module_param_named(job_hang_limit, etnaviv_job_hang_limit, int , 0444);
 static int etnaviv_hw_jobs_limit = 2;
 module_param_named(hw_job_limit, etnaviv_hw_jobs_limit, int , 0444);
 
-static inline
-struct etnaviv_gem_submit *to_etnaviv_submit(struct drm_sched_job *sched_job)
-{
-	return container_of(sched_job, struct etnaviv_gem_submit, sched_job);
-}
-
 struct dma_fence *etnaviv_sched_dependency(struct drm_sched_job *sched_job,
 					   struct drm_sched_entity *entity)
 {
@@ -86,34 +81,38 @@ struct dma_fence *etnaviv_sched_dependency(struct drm_sched_job *sched_job,
 struct dma_fence *etnaviv_sched_run_job(struct drm_sched_job *sched_job)
 {
 	struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
-	struct dma_fence *fence;
-
-	mutex_lock(&submit->gpu->lock);
-	list_add_tail(&submit->node, &submit->gpu->active_submit_list);
-	mutex_unlock(&submit->gpu->lock);
+	struct dma_fence *fence = NULL;
 
-	fence = etnaviv_gpu_submit(submit);
-	if (!fence) {
-		etnaviv_submit_put(submit);
-		return NULL;
-	}
+	if (likely(!sched_job->s_fence->finished.error))
+		fence = etnaviv_gpu_submit(submit);
+	else
+		dev_dbg(submit->gpu->dev, "skipping bad job\n");
 
 	return fence;
 }
 
 static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
 {
-	/* this replaces the hangcheck */
+	struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
+	struct etnaviv_gpu *gpu = submit->gpu;
+
+	/* block scheduler */
+	kthread_park(gpu->sched.thread);
+	drm_sched_hw_job_reset(&gpu->sched, sched_job);
+
+	/* get the GPU back into the init state */
+	etnaviv_core_dump(gpu);
+	etnaviv_gpu_recover_hang(gpu);
+
+	/* restart scheduler after GPU is usable again */
+	drm_sched_job_recovery(&gpu->sched);
+	kthread_unpark(gpu->sched.thread);
 }
 
 static void etnaviv_sched_free_job(struct drm_sched_job *sched_job)
 {
 	struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
 
-	mutex_lock(&submit->gpu->lock);
-	list_del(&submit->node);
-	mutex_unlock(&submit->gpu->lock);
-
 	etnaviv_submit_put(submit);
 }
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.h b/drivers/gpu/drm/etnaviv/etnaviv_sched.h
index 539556a95b65..dd336c4e4ca9 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.h
@@ -14,9 +14,21 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#ifndef __ETNAVIV_SCHED_H__
+#define __ETNAVIV_SCHED_H__
+
+#include <drm/gpu_scheduler.h>
+
 struct etnaviv_gpu;
 
+static inline
+struct etnaviv_gem_submit *to_etnaviv_submit(struct drm_sched_job *sched_job)
+{
+	return container_of(sched_job, struct etnaviv_gem_submit, sched_job);
+}
 int etnaviv_sched_init(struct etnaviv_gpu *gpu);
 void etnaviv_sched_fini(struct etnaviv_gpu *gpu);
 int etnaviv_sched_push_job(struct drm_sched_entity *sched_entity,
 			   struct etnaviv_gem_submit *submit);
+
+#endif /* __ETNAVIV_SCHED_H__ */
-- 
2.11.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-12-06 17:10 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-06 17:10 [PATCH 0/4] Etnaviv GPU scheduler Lucas Stach
2017-12-06 17:10 ` [PATCH 1/4] drm/etnaviv: hook up DRM " Lucas Stach
2017-12-06 17:10 ` [PATCH 2/4] drm/etnaviv: move dependency handling to scheduler Lucas Stach
2017-12-06 17:10 ` [PATCH 3/4] drm/etnaviv: lock BOs after all other submit work is done Lucas Stach
2017-12-06 17:10 ` [PATCH 4/4] drm/etnaviv: replace hangcheck with scheduler timeout Lucas Stach

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).