All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] drm/radeon: take a fence reference in the sync code
@ 2014-12-08 16:11 Christian König
  2014-12-08 16:11 ` [PATCH 2/6] drm/radeon: add fence owners Christian König
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: Christian König @ 2014-12-08 16:11 UTC (permalink / raw)
  To: dri-devel

From: Christian König <christian.koenig@amd.com>

Just to be sure that fences we sync to won't be released while accessed.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/radeon_sync.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c
index 02ac8a1..6fccaaf 100644
--- a/drivers/gpu/drm/radeon/radeon_sync.c
+++ b/drivers/gpu/drm/radeon/radeon_sync.c
@@ -69,11 +69,15 @@ void radeon_sync_fence(struct radeon_sync *sync,
 		return;
 
 	other = sync->sync_to[fence->ring];
-	sync->sync_to[fence->ring] = radeon_fence_later(fence, other);
+	sync->sync_to[fence->ring] = radeon_fence_ref(
+		radeon_fence_later(fence, other));
+	radeon_fence_unref(&other);
 
 	if (fence->is_vm_update) {
 		other = sync->last_vm_update;
-		sync->last_vm_update = radeon_fence_later(fence, other);
+		sync->last_vm_update = radeon_fence_ref(
+			radeon_fence_later(fence, other));
+		radeon_fence_unref(&other);
 	}
 }
 
@@ -217,4 +221,9 @@ void radeon_sync_free(struct radeon_device *rdev,
 
 	for (i = 0; i < RADEON_NUM_SYNCS; ++i)
 		radeon_semaphore_free(rdev, &sync->semaphores[i], fence);
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i)
+		radeon_fence_unref(&sync->sync_to[i]);
+
+	radeon_fence_unref(&sync->last_vm_update);
 }
-- 
1.9.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/6] drm/radeon: add fence owners
  2014-12-08 16:11 [PATCH 1/6] drm/radeon: take a fence reference in the sync code Christian König
@ 2014-12-08 16:11 ` Christian König
  2014-12-08 16:11 ` [PATCH 3/6] drm/radeon: add command submission IDs Christian König
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2014-12-08 16:11 UTC (permalink / raw)
  To: dri-devel

From: Christian König <christian.koenig@amd.com>

This way we can track who created the fence and then only wait
on fences that userspace doesn't knows about.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/cik.c           |  8 +++++---
 drivers/gpu/drm/radeon/cik_sdma.c      |  8 +++++---
 drivers/gpu/drm/radeon/evergreen_dma.c |  5 +++--
 drivers/gpu/drm/radeon/r100.c          |  6 ++++--
 drivers/gpu/drm/radeon/r200.c          |  3 ++-
 drivers/gpu/drm/radeon/r600.c          |  8 +++++---
 drivers/gpu/drm/radeon/r600_dma.c      |  8 +++++---
 drivers/gpu/drm/radeon/radeon.h        | 15 +++++++++++----
 drivers/gpu/drm/radeon/radeon_cs.c     | 14 +++++++++-----
 drivers/gpu/drm/radeon/radeon_fence.c  |  4 ++--
 drivers/gpu/drm/radeon/radeon_ib.c     |  5 +++--
 drivers/gpu/drm/radeon/radeon_sync.c   | 19 +++++++++++--------
 drivers/gpu/drm/radeon/radeon_test.c   |  3 ++-
 drivers/gpu/drm/radeon/radeon_uvd.c    |  3 ++-
 drivers/gpu/drm/radeon/radeon_vce.c    |  6 ++++--
 drivers/gpu/drm/radeon/radeon_vm.c     | 18 ++++++++++--------
 drivers/gpu/drm/radeon/rv770_dma.c     |  5 +++--
 drivers/gpu/drm/radeon/si_dma.c        |  5 +++--
 18 files changed, 89 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 6dcde37..7f15ec5 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4013,7 +4013,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
 		return ERR_PTR(r);
 	}
 
-	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED);
 	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -4035,7 +4035,8 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_bytes;
 	}
 
-	r = radeon_fence_emit(rdev, &fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx,
+			      RADEON_FENCE_OWNER_MOVE);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_sync_free(rdev, &sync, NULL);
@@ -4141,7 +4142,8 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
-	r = radeon_ib_schedule(rdev, &ib, NULL, false);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false,
+			       RADEON_FENCE_OWNER_UNDEFINED);
 	if (r) {
 		radeon_scratch_free(rdev, scratch);
 		radeon_ib_free(rdev, &ib);
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index dde5c7e..2261a88 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -560,7 +560,7 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
 		return ERR_PTR(r);
 	}
 
-	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED);
 	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -579,7 +579,8 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_bytes;
 	}
 
-	r = radeon_fence_emit(rdev, &fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx,
+			      RADEON_FENCE_OWNER_MOVE);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_sync_free(rdev, &sync, NULL);
@@ -691,7 +692,8 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[4] = 0xDEADBEEF;
 	ib.length_dw = 5;
 
-	r = radeon_ib_schedule(rdev, &ib, NULL, false);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false,
+			       RADEON_FENCE_OWNER_UNDEFINED);
 	if (r) {
 		radeon_ib_free(rdev, &ib);
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c
index 96535aa..094df95 100644
--- a/drivers/gpu/drm/radeon/evergreen_dma.c
+++ b/drivers/gpu/drm/radeon/evergreen_dma.c
@@ -129,7 +129,7 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
 		return ERR_PTR(r);
 	}
 
-	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED);
 	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -146,7 +146,8 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_dw * 4;
 	}
 
-	r = radeon_fence_emit(rdev, &fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx,
+			      RADEON_FENCE_OWNER_MOVE);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_sync_free(rdev, &sync, NULL);
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 74f06d5..81388d9 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -937,7 +937,8 @@ struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
 			  RADEON_WAIT_2D_IDLECLEAN |
 			  RADEON_WAIT_HOST_IDLECLEAN |
 			  RADEON_WAIT_DMA_GUI_IDLE);
-	r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
+	r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX,
+			      RADEON_FENCE_OWNER_UNDEFINED);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		return ERR_PTR(r);
@@ -3706,7 +3707,8 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[6] = PACKET2(0);
 	ib.ptr[7] = PACKET2(0);
 	ib.length_dw = 8;
-	r = radeon_ib_schedule(rdev, &ib, NULL, false);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false,
+			       RADEON_FENCE_OWNER_UNDEFINED);
 	if (r) {
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 		goto free_ib;
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index c70e6d5..d09fb3f 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -119,7 +119,8 @@ struct radeon_fence *r200_copy_dma(struct radeon_device *rdev,
 	}
 	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 	radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE);
-	r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
+	r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX,
+			      RADEON_FENCE_OWNER_UNDEFINED);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		return ERR_PTR(r);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index ef5d606..462cc36 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2908,7 +2908,7 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
 		return ERR_PTR(r);
 	}
 
-	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED);
 	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
@@ -2935,7 +2935,8 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
 	radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 	radeon_ring_write(ring, WAIT_CP_DMA_IDLE_bit);
 
-	r = radeon_fence_emit(rdev, &fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx,
+			      RADEON_FENCE_OWNER_MOVE);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_sync_free(rdev, &sync, NULL);
@@ -3302,7 +3303,8 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
-	r = radeon_ib_schedule(rdev, &ib, NULL, false);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false,
+			       RADEON_FENCE_OWNER_UNDEFINED);
 	if (r) {
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 		goto free_ib;
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index d2dd29a..013f939 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -362,7 +362,8 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[3] = 0xDEADBEEF;
 	ib.length_dw = 4;
 
-	r = radeon_ib_schedule(rdev, &ib, NULL, false);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false,
+			       RADEON_FENCE_OWNER_UNDEFINED);
 	if (r) {
 		radeon_ib_free(rdev, &ib);
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
@@ -460,7 +461,7 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
 		return ERR_PTR(r);
 	}
 
-	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED);
 	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -477,7 +478,8 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_dw * 4;
 	}
 
-	r = radeon_fence_emit(rdev, &fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx,
+			      RADEON_FENCE_OWNER_MOVE);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_sync_free(rdev, &sync, NULL);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 54529b8..3968f91 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -359,14 +359,20 @@ struct radeon_fence_driver {
 	struct delayed_work		lockup_work;
 };
 
+/* some special values for the owner field */
+#define RADEON_FENCE_OWNER_UNDEFINED	(0ul)
+#define RADEON_FENCE_OWNER_VM		(1ul)
+#define RADEON_FENCE_OWNER_MOVE		(2ul)
+
 struct radeon_fence {
 	struct fence		base;
 
 	struct radeon_device	*rdev;
 	uint64_t		seq;
+	/* filp or special value for fence creator */
+	long			owner;
 	/* RB, DMA, etc. */
 	unsigned		ring;
-	bool			is_vm_update;
 
 	wait_queue_t		fence_wake;
 };
@@ -375,7 +381,8 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
 int radeon_fence_driver_init(struct radeon_device *rdev);
 void radeon_fence_driver_fini(struct radeon_device *rdev);
 void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring);
-int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
+int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence,
+		      int ring, long owner);
 void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
@@ -614,7 +621,7 @@ void radeon_sync_fence(struct radeon_sync *sync,
 int radeon_sync_resv(struct radeon_device *rdev,
 		     struct radeon_sync *sync,
 		     struct reservation_object *resv,
-		     bool shared);
+		     long owner);
 int radeon_sync_rings(struct radeon_device *rdev,
 		      struct radeon_sync *sync,
 		      int waiting_ring);
@@ -1015,7 +1022,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
 		  unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
-		       struct radeon_ib *const_ib, bool hdp_flush);
+		       struct radeon_ib *const_ib, bool hdp_flush, long owner);
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
 int radeon_ib_ring_tests(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 9648e28..3c3b7d9 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -237,10 +237,11 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 
 	list_for_each_entry(reloc, &p->validated, tv.head) {
 		struct reservation_object *resv;
+		long owner = reloc->tv.shared ? (long)p->filp :
+			RADEON_FENCE_OWNER_UNDEFINED;
 
 		resv = reloc->robj->tbo.resv;
-		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
-				     reloc->tv.shared);
+		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, owner);
 
 		if (r)
 			return r;
@@ -467,7 +468,8 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
 		radeon_vce_note_usage(rdev);
 
-	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
+	r = radeon_ib_schedule(rdev, &parser->ib, NULL, true,
+			       (long)parser->filp);
 	if (r) {
 		DRM_ERROR("Failed to schedule IB !\n");
 	}
@@ -561,9 +563,11 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 
 	if ((rdev->family >= CHIP_TAHITI) &&
 	    (parser->chunk_const_ib != NULL)) {
-		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
+		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib,
+				       true, (long)parser->filp);
 	} else {
-		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
+		r = radeon_ib_schedule(rdev, &parser->ib, NULL, true,
+				       (long)parser->filp);
 	}
 
 out:
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index d13d1b5..89e8c5f 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -128,7 +128,7 @@ static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
  */
 int radeon_fence_emit(struct radeon_device *rdev,
 		      struct radeon_fence **fence,
-		      int ring)
+		      int ring, long owner)
 {
 	u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
 
@@ -138,9 +138,9 @@ int radeon_fence_emit(struct radeon_device *rdev,
 		return -ENOMEM;
 	}
 	(*fence)->rdev = rdev;
+	(*fence)->owner = owner;
 	(*fence)->seq = seq;
 	(*fence)->ring = ring;
-	(*fence)->is_vm_update = false;
 	fence_init(&(*fence)->base, &radeon_fence_ops,
 		   &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
 	radeon_fence_ring_emit(rdev, ring, *fence);
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
index c39ce1f..525416a 100644
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@@ -105,6 +105,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
  * @ib: IB object to schedule
  * @const_ib: Const IB to schedule (SI only)
  * @hdp_flush: Whether or not to perform an HDP cache flush
+ * @owner: owner for creating the fence
  *
  * Schedule an IB on the associated ring (all asics).
  * Returns 0 on success, error on failure.
@@ -120,7 +121,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
  * to SI there was just a DE IB.
  */
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
-		       struct radeon_ib *const_ib, bool hdp_flush)
+		       struct radeon_ib *const_ib, bool hdp_flush, long owner)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
 	int r = 0;
@@ -162,7 +163,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 		radeon_sync_free(rdev, &const_ib->sync, NULL);
 	}
 	radeon_ring_ib_execute(rdev, ib->ring, ib);
-	r = radeon_fence_emit(rdev, &ib->fence, ib->ring);
+	r = radeon_fence_emit(rdev, &ib->fence, ib->ring, owner);
 	if (r) {
 		dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r);
 		radeon_ring_unlock_undo(rdev, ring);
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c
index 6fccaaf..ca98d4b 100644
--- a/drivers/gpu/drm/radeon/radeon_sync.c
+++ b/drivers/gpu/drm/radeon/radeon_sync.c
@@ -73,7 +73,7 @@ void radeon_sync_fence(struct radeon_sync *sync,
 		radeon_fence_later(fence, other));
 	radeon_fence_unref(&other);
 
-	if (fence->is_vm_update) {
+	if (fence->owner == RADEON_FENCE_OWNER_VM) {
 		other = sync->last_vm_update;
 		sync->last_vm_update = radeon_fence_ref(
 			radeon_fence_later(fence, other));
@@ -93,7 +93,7 @@ void radeon_sync_fence(struct radeon_sync *sync,
 int radeon_sync_resv(struct radeon_device *rdev,
 		     struct radeon_sync *sync,
 		     struct reservation_object *resv,
-		     bool shared)
+		     long owner)
 {
 	struct reservation_object_list *flist;
 	struct fence *f;
@@ -110,20 +110,23 @@ int radeon_sync_resv(struct radeon_device *rdev,
 		r = fence_wait(f, true);
 
 	flist = reservation_object_get_list(resv);
-	if (shared || !flist || r)
+	if (!flist || r)
 		return r;
 
 	for (i = 0; i < flist->shared_count; ++i) {
 		f = rcu_dereference_protected(flist->shared[i],
 					      reservation_object_held(resv));
 		fence = to_radeon_fence(f);
-		if (fence && fence->rdev == rdev)
-			radeon_sync_fence(sync, fence);
-		else
+		if (fence && fence->rdev == rdev) {
+			if (fence->owner != owner ||
+			    fence->owner == RADEON_FENCE_OWNER_UNDEFINED)
+				radeon_sync_fence(sync, fence);
+		} else {
 			r = fence_wait(f, true);
 
-		if (r)
-			break;
+			if (r)
+				break;
+		}
 	}
 	return r;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 07b506b..5e38b95 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -298,7 +298,8 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev,
 			DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
 			return r;
 		}
-		radeon_fence_emit(rdev, fence, ring->idx);
+		radeon_fence_emit(rdev, fence, ring->idx,
+				  RADEON_FENCE_OWNER_UNDEFINED);
 		radeon_ring_unlock_commit(rdev, ring, false);
 	}
 	return 0;
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index c10b2ae..1ee9ac3 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -671,7 +671,8 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev,
 		ib.ptr[i] = PACKET2(0);
 	ib.length_dw = 16;
 
-	r = radeon_ib_schedule(rdev, &ib, NULL, false);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false,
+			       RADEON_FENCE_OWNER_UNDEFINED);
 
 	if (fence)
 		*fence = radeon_fence_ref(ib.fence);
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 976fe43..e64bbcb 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -369,7 +369,8 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
 	for (i = ib.length_dw; i < ib_size_dw; ++i)
 		ib.ptr[i] = 0x0;
 
-	r = radeon_ib_schedule(rdev, &ib, NULL, false);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false,
+			       RADEON_FENCE_OWNER_UNDEFINED);
 	if (r) {
 	        DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 	}
@@ -426,7 +427,8 @@ int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
 	for (i = ib.length_dw; i < ib_size_dw; ++i)
 		ib.ptr[i] = 0x0;
 
-	r = radeon_ib_schedule(rdev, &ib, NULL, false);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false,
+			       RADEON_FENCE_OWNER_UNDEFINED);
 	if (r) {
 	        DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index cde48c4..d9074bb 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -414,11 +414,11 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,
 	radeon_asic_vm_pad_ib(rdev, &ib);
 	WARN_ON(ib.length_dw > 64);
 
-	r = radeon_ib_schedule(rdev, &ib, NULL, false);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false,
+			       RADEON_FENCE_OWNER_VM);
 	if (r)
 		goto error_free;
 
-	ib.fence->is_vm_update = true;
 	radeon_bo_fence(bo, ib.fence, false);
 
 error_free:
@@ -693,14 +693,15 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
 	if (ib.length_dw != 0) {
 		radeon_asic_vm_pad_ib(rdev, &ib);
 
-		radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, true);
+		radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv,
+				 RADEON_FENCE_OWNER_VM);
 		WARN_ON(ib.length_dw > ndw);
-		r = radeon_ib_schedule(rdev, &ib, NULL, false);
+		r = radeon_ib_schedule(rdev, &ib, NULL, false,
+				       RADEON_FENCE_OWNER_VM);
 		if (r) {
 			radeon_ib_free(rdev, &ib);
 			return r;
 		}
-		ib.fence->is_vm_update = true;
 		radeon_bo_fence(pd, ib.fence, false);
 	}
 	radeon_ib_free(rdev, &ib);
@@ -819,7 +820,8 @@ static int radeon_vm_update_ptes(struct radeon_device *rdev,
 		uint64_t pte;
 		int r;
 
-		radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, true);
+		radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv,
+				 RADEON_FENCE_OWNER_VM);
 		r = reservation_object_reserve_shared(pt->tbo.resv);
 		if (r)
 			return r;
@@ -1004,12 +1006,12 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 	radeon_asic_vm_pad_ib(rdev, &ib);
 	WARN_ON(ib.length_dw > ndw);
 
-	r = radeon_ib_schedule(rdev, &ib, NULL, false);
+	r = radeon_ib_schedule(rdev, &ib, NULL, false,
+			       RADEON_FENCE_OWNER_VM);
 	if (r) {
 		radeon_ib_free(rdev, &ib);
 		return r;
 	}
-	ib.fence->is_vm_update = true;
 	radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence);
 	radeon_fence_unref(&bo_va->last_pt_update);
 	bo_va->last_pt_update = radeon_fence_ref(ib.fence);
diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c
index acff6e0..fd274d1 100644
--- a/drivers/gpu/drm/radeon/rv770_dma.c
+++ b/drivers/gpu/drm/radeon/rv770_dma.c
@@ -63,7 +63,7 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev,
 		return ERR_PTR(r);
 	}
 
-	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED);
 	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -80,7 +80,8 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_dw * 4;
 	}
 
-	r = radeon_fence_emit(rdev, &fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx,
+			      RADEON_FENCE_OWNER_MOVE);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_sync_free(rdev, &sync, NULL);
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c
index f5cc777..6420a19 100644
--- a/drivers/gpu/drm/radeon/si_dma.c
+++ b/drivers/gpu/drm/radeon/si_dma.c
@@ -245,7 +245,7 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
 		return ERR_PTR(r);
 	}
 
-	radeon_sync_resv(rdev, &sync, resv, false);
+	radeon_sync_resv(rdev, &sync, resv, RADEON_FENCE_OWNER_UNDEFINED);
 	radeon_sync_rings(rdev, &sync, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -262,7 +262,8 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_bytes;
 	}
 
-	r = radeon_fence_emit(rdev, &fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx,
+			      RADEON_FENCE_OWNER_MOVE);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_sync_free(rdev, &sync, NULL);
-- 
1.9.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/6] drm/radeon: add command submission IDs
  2014-12-08 16:11 [PATCH 1/6] drm/radeon: take a fence reference in the sync code Christian König
  2014-12-08 16:11 ` [PATCH 2/6] drm/radeon: add fence owners Christian König
@ 2014-12-08 16:11 ` Christian König
  2014-12-09  9:01   ` Michel Dänzer
  2014-12-08 16:11 ` [PATCH 4/6] drm/radeon: add explicit command submission sync Christian König
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2014-12-08 16:11 UTC (permalink / raw)
  To: dri-devel

From: Christian König <christian.koenig@amd.com>

This patch adds a new 64bit ID as a result to each command submission.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/Makefile     |   2 +-
 drivers/gpu/drm/radeon/radeon.h     |  13 +-
 drivers/gpu/drm/radeon/radeon_cs.c  |  13 ++
 drivers/gpu/drm/radeon/radeon_kms.c |  41 +++----
 drivers/gpu/drm/radeon/radeon_seq.c | 229 ++++++++++++++++++++++++++++++++++++
 include/uapi/drm/radeon_drm.h       |   1 +
 6 files changed, 277 insertions(+), 22 deletions(-)
 create mode 100644 drivers/gpu/drm/radeon/radeon_seq.c

diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index 12bc212..7145f15 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -81,7 +81,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
 	rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
 	trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
 	ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \
-	radeon_sync.o
+	radeon_sync.o radeon_seq.o
 
 # add async DMA block
 radeon-y += \
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 3968f91..b9fde1d 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -433,6 +433,15 @@ static inline bool radeon_fence_is_earlier(struct radeon_fence *a,
 }
 
 /*
+ * Userspace command submission identifier generation
+ */
+struct radeon_seq;
+
+uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence);
+struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id);
+void radeon_seq_destroy(struct radeon_seq **seq);
+
+/*
  * Tiling registers
  */
 struct radeon_surface_reg {
@@ -975,7 +984,9 @@ struct radeon_vm_manager {
  * file private structure
  */
 struct radeon_fpriv {
-	struct radeon_vm		vm;
+	struct radeon_vm	vm;
+	struct mutex		seq_lock;
+	struct radeon_seq	*seq;
 };
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 3c3b7d9..c0fc8d8 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -398,6 +398,19 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
 	unsigned i;
 
 	if (!error) {
+		if (parser->chunk_flags &&
+		    parser->chunk_flags->length_dw > 4) {
+			struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+			uint32_t __user *to = parser->chunk_flags->user_ptr;
+			uint64_t id;
+
+			mutex_lock(&fpriv->seq_lock);
+			id = radeon_seq_push(&fpriv->seq, parser->ib.fence);
+			mutex_unlock(&fpriv->seq_lock);
+
+			copy_to_user(&to[3], &id, sizeof(uint64_t));
+		}
+
 		/* Sort the buffer list from the smallest to largest buffer,
 		 * which affects the order of buffers in the LRU list.
 		 * This assures that the smallest buffers are added first
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index f4dd26a..db5c986 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -585,39 +585,34 @@ void radeon_driver_lastclose_kms(struct drm_device *dev)
  */
 int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 {
+	struct radeon_fpriv *fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
 	struct radeon_device *rdev = dev->dev_private;
 	int r;
 
-	file_priv->driver_priv = NULL;
+	if (unlikely(!fpriv))
+		return -ENOMEM;
+
+	file_priv->driver_priv = fpriv;
 
 	r = pm_runtime_get_sync(dev->dev);
 	if (r < 0)
-		return r;
+		goto error;
 
 	/* new gpu have virtual address space support */
 	if (rdev->family >= CHIP_CAYMAN) {
-		struct radeon_fpriv *fpriv;
 		struct radeon_vm *vm;
 		int r;
 
-		fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
-		if (unlikely(!fpriv)) {
-			return -ENOMEM;
-		}
-
 		vm = &fpriv->vm;
 		r = radeon_vm_init(rdev, vm);
-		if (r) {
-			kfree(fpriv);
-			return r;
-		}
+		if (r)
+			goto error;
 
 		if (rdev->accel_working) {
 			r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
 			if (r) {
 				radeon_vm_fini(rdev, vm);
-				kfree(fpriv);
-				return r;
+				goto error;
 			}
 
 			/* map the ib pool buffer read only into
@@ -630,16 +625,20 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 						  RADEON_VM_PAGE_SNOOPED);
 			if (r) {
 				radeon_vm_fini(rdev, vm);
-				kfree(fpriv);
-				return r;
+				goto error;
 			}
 		}
-		file_priv->driver_priv = fpriv;
 	}
 
+	mutex_init(&fpriv->seq_lock);
+
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
 	return 0;
+
+error:
+	kfree(fpriv);
+	return r;
 }
 
 /**
@@ -653,11 +652,13 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 void radeon_driver_postclose_kms(struct drm_device *dev,
 				 struct drm_file *file_priv)
 {
+	struct radeon_fpriv *fpriv = file_priv->driver_priv;
 	struct radeon_device *rdev = dev->dev_private;
 
+	radeon_seq_destroy(&fpriv->seq);
+
 	/* new gpu have virtual address space support */
 	if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) {
-		struct radeon_fpriv *fpriv = file_priv->driver_priv;
 		struct radeon_vm *vm = &fpriv->vm;
 		int r;
 
@@ -671,9 +672,9 @@ void radeon_driver_postclose_kms(struct drm_device *dev,
 		}
 
 		radeon_vm_fini(rdev, vm);
-		kfree(fpriv);
-		file_priv->driver_priv = NULL;
 	}
+	kfree(fpriv);
+	file_priv->driver_priv = NULL;
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/radeon_seq.c b/drivers/gpu/drm/radeon/radeon_seq.c
new file mode 100644
index 0000000..d8857f1
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_seq.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <christian.koenig@amd.com>
+ */
+
+#include <drm/drmP.h>
+#include "radeon.h"
+
+/*
+ * ID sequences
+ * This code generates a 64bit identifier for a command submission.
+ * It works by adding the fence of the command submission to a automatically
+ * resizing ring buffer.
+ */
+
+struct radeon_seq {
+	uint64_t		start;
+	uint64_t		end;
+	uint64_t		mask;
+	struct radeon_seq	*replacement;
+};
+
+/**
+ * radeon_seq_create - create a new sequence object
+ *
+ * @start: start value for this sequence
+ * @size: size of the ring buffer, must be power of two
+ *
+ * Allocate and initialize a new ring buffer and header.
+ * Returns NULL if allocation fails, new object otherwise.
+ */
+static struct radeon_seq *radeon_seq_create(uint64_t start, unsigned size)
+{
+	unsigned bytes = sizeof(struct radeon_seq) +
+		size * sizeof(struct radeon_fence *);
+
+	struct radeon_seq *seq;
+
+	seq = kmalloc(bytes, GFP_KERNEL);
+	if (!seq)
+		return NULL;
+
+	seq->start = start;
+	seq->end = start;
+	seq->mask = size - 1;
+	seq->replacement = NULL;
+
+	return seq;
+}
+
+/**
+ * radeon_seq_ring - get pointer to ring buffer
+ *
+ * @seq: sequence object
+ *
+ * Calculate the address of the ring buffer.
+ */
+static struct radeon_fence **radeon_seq_ring(struct radeon_seq *seq)
+{
+	return (struct radeon_fence **)&seq[1];
+}
+
+/**
+ * radeon_seq_try_free - try to free fences from the ring buffer
+ *
+ * @seq: sequence object
+ *
+ * Try to free fences from the start of the ring buffer.
+ */
+static void radeon_seq_try_free(struct radeon_seq *seq)
+{
+	struct radeon_fence **ring = radeon_seq_ring(seq);
+
+	while (seq->start != seq->end) {
+		unsigned idx = seq->start & seq->mask;
+		struct radeon_fence *fence = ring[idx];
+
+		if (!radeon_fence_signaled(fence))
+			break;
+
+		radeon_fence_unref(&fence);
+		++seq->start;
+	}
+}
+
+/**
+ * radeon_seq_add - add new fence to the end of the ring buffer
+ *
+ * @seq: sequence object
+ * @f: the fence object
+ *
+ * Add the fence and return the generated ID.
+ */
+static uint64_t radeon_seq_add(struct radeon_seq *seq, struct radeon_fence *f)
+{
+	struct radeon_fence **ring = radeon_seq_ring(seq);
+
+	ring[seq->end & seq->mask] = radeon_fence_ref(f);
+	return seq->end++;
+}
+
+/**
+ * radeon_seq_push - check for room and add the fence
+ *
+ * @seq: sequence object
+ * @fence: the fence object
+ *
+ * Check for room on the ring buffer, if there isn't enough
+ * reallocate the sequence object and add the fence.
+ * Returns the generated ID.
+ */
+uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence)
+{
+	unsigned size_for_new_seq = 4;
+	uint64_t start_for_new_seq = 1;
+
+	if (*seq) {
+		/* try to release old replacements */
+		while ((*seq)->replacement) {
+			radeon_seq_try_free(*seq);
+			if ((*seq)->start == (*seq)->end) {
+				struct radeon_seq *repl = (*seq)->replacement;
+
+				kfree(*seq);
+				*seq = repl;
+			} else {
+				/* move on to the current container */
+				seq = &(*seq)->replacement;
+			}
+		}
+
+		/* check if we have enough room for one more fence */
+		radeon_seq_try_free(*seq);
+		if (((*seq)->end - (*seq)->start) <= (*seq)->mask)
+			return radeon_seq_add(*seq, fence);
+
+		/* not enough room, let's allocate a replacement */
+		size_for_new_seq = ((*seq)->mask + 1) * 2;
+		start_for_new_seq = (*seq)->end + 1;
+		seq = &(*seq)->replacement;
+	}
+
+	*seq = radeon_seq_create(start_for_new_seq, size_for_new_seq);
+	if (!*seq) {
+		/* not enough memory for a new sequence object, but failing
+		   here isn't a good idea either cause the commands are already
+		   submitted to the hardware. So just block on the fence. */
+		int r = radeon_fence_wait(fence, false);
+		if (r)
+			DRM_ERROR("Error waiting for fence (%d)\n", r);
+		return 0;
+	}
+	return radeon_seq_add(*seq, fence);
+}
+
+/**
+ * radeon_seq_query - lockup fence by it's ID
+ *
+ * @seq: sequence object
+ * @id: the generated ID
+ *
+ * Lockup the associated fence by it's ID.
+ * Returns fence object or NULL if it couldn't be found.
+ */
+struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id)
+{
+	struct radeon_fence **ring;
+
+	while (seq && id > seq->end)
+		seq = seq->replacement;
+
+	if (!seq || id < seq->start)
+		return NULL;
+
+	ring = radeon_seq_ring(seq);
+	return ring[id & seq->mask];
+}
+
+/**
+ * radeon_seq_destroy - destroy the sequence object
+ *
+ * @seq_ptr: pointer to sequence object
+ *
+ * Destroy the sequence objects and release all fence references taken.
+ */
+void radeon_seq_destroy(struct radeon_seq **seq_ptr)
+{
+	struct radeon_seq *seq = *seq_ptr;
+	while (seq) {
+		struct radeon_seq *repl = seq->replacement;
+		unsigned start = seq->start & seq->mask;
+		unsigned end = seq->end & seq->mask;
+		struct radeon_fence **ring;
+		unsigned i;
+
+		ring = radeon_seq_ring(seq);
+		for (i = start; i < end; ++i)
+			radeon_fence_unref(&ring[i]);
+
+		kfree(seq);
+		seq = repl;
+	}
+	*seq_ptr = NULL;
+}
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 50d0fb4..6b2b2e7 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -959,6 +959,7 @@ struct drm_radeon_gem_va {
 #define RADEON_CS_RING_VCE          4
 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
 /* 0 = normal, + = higher priority, - = lower priority */
+/* The fourth and fives dword are a 64bit fence ID generated for this CS */
 
 struct drm_radeon_cs_chunk {
 	uint32_t		chunk_id;
-- 
1.9.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/6] drm/radeon: add explicit command submission sync
  2014-12-08 16:11 [PATCH 1/6] drm/radeon: take a fence reference in the sync code Christian König
  2014-12-08 16:11 ` [PATCH 2/6] drm/radeon: add fence owners Christian König
  2014-12-08 16:11 ` [PATCH 3/6] drm/radeon: add command submission IDs Christian König
@ 2014-12-08 16:11 ` Christian König
  2014-12-09  9:00   ` Michel Dänzer
  2014-12-08 16:11 ` [PATCH 5/6] drm/radeon: optionally return an ID for the last PT update Christian König
  2014-12-08 16:11 ` [PATCH 6/6] drm/radeon: add IOCTL to wait for a specific CS Christian König
  4 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2014-12-08 16:11 UTC (permalink / raw)
  To: dri-devel

From: Christian König <christian.koenig@amd.com>

The driver falls back to explicit synchronization as soon as
buffers move between clients or are moved by TTM.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h    |  1 +
 drivers/gpu/drm/radeon/radeon_cs.c | 24 +++++++++++++++++++++++-
 include/uapi/drm/radeon_drm.h      |  7 ++++---
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b9fde1d..1529afb 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1101,6 +1101,7 @@ struct radeon_cs_parser {
 	struct radeon_cs_chunk  *chunk_relocs;
 	struct radeon_cs_chunk  *chunk_flags;
 	struct radeon_cs_chunk  *chunk_const_ib;
+	struct radeon_cs_chunk	*chunk_wait_for;
 	struct radeon_ib	ib;
 	struct radeon_ib	const_ib;
 	void			*track;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index c0fc8d8..a73f9da 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -165,7 +165,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		}
 
 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
-		p->relocs[i].tv.shared = !r->write_domain;
+		p->relocs[i].tv.shared = !r->write_domain ||
+					 !!p->chunk_wait_for;
 
 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
 				      priority);
@@ -235,6 +236,23 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 	struct radeon_bo_list *reloc;
 	int r;
 
+	if (p->chunk_wait_for) {
+		struct radeon_fpriv *fpriv = p->filp->driver_priv;
+		unsigned i;
+
+		for (i = 0; i < p->chunk_wait_for->length_dw; i += 2) {
+			struct radeon_fence *fence;
+			uint64_t *id;
+
+			id = (uint64_t *)&p->chunk_wait_for->kdata[i];
+
+			mutex_lock(&fpriv->seq_lock);
+			fence = radeon_seq_query(fpriv->seq, *id);
+			radeon_sync_fence(&p->ib.sync, fence);
+			mutex_unlock(&fpriv->seq_lock);
+		}
+	}
+
 	list_for_each_entry(reloc, &p->validated, tv.head) {
 		struct reservation_object *resv;
 		long owner = reloc->tv.shared ? (long)p->filp :
@@ -317,6 +335,10 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 			if (p->chunks[i].length_dw == 0)
 				return -EINVAL;
 		}
+		if (user_chunk.chunk_id == RADEON_CHUNK_ID_WAIT_FOR) {
+			p->chunk_wait_for = &p->chunks[i];
+			/* zero length wait for list is actually useful */
+		}
 
 		size = p->chunks[i].length_dw;
 		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 6b2b2e7..a34e3db 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -942,10 +942,11 @@ struct drm_radeon_gem_va {
 	uint64_t		offset;
 };
 
-#define RADEON_CHUNK_ID_RELOCS	0x01
-#define RADEON_CHUNK_ID_IB	0x02
-#define RADEON_CHUNK_ID_FLAGS	0x03
+#define RADEON_CHUNK_ID_RELOCS		0x01
+#define RADEON_CHUNK_ID_IB		0x02
+#define RADEON_CHUNK_ID_FLAGS		0x03
 #define RADEON_CHUNK_ID_CONST_IB	0x04
+#define RADEON_CHUNK_ID_WAIT_FOR	0x05
 
 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
 #define RADEON_CS_KEEP_TILING_FLAGS 0x01
-- 
1.9.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 5/6] drm/radeon: optionally return an ID for the last PT update
  2014-12-08 16:11 [PATCH 1/6] drm/radeon: take a fence reference in the sync code Christian König
                   ` (2 preceding siblings ...)
  2014-12-08 16:11 ` [PATCH 4/6] drm/radeon: add explicit command submission sync Christian König
@ 2014-12-08 16:11 ` Christian König
  2014-12-08 16:11 ` [PATCH 6/6] drm/radeon: add IOCTL to wait for a specific CS Christian König
  4 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2014-12-08 16:11 UTC (permalink / raw)
  To: dri-devel

From: Christian König <christian.koenig@amd.com>

PT updates can be seen as command submissions as well,
and we don't necessary need to wait on all of them.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/radeon_gem.c | 12 +++++++++++-
 include/uapi/drm/radeon_drm.h       |  1 +
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index fe48f22..dd45611 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -691,8 +691,18 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
 	default:
 		break;
 	}
-	if (!r)
+	args->id = 0;
+	if (!r) {
+		struct radeon_fence *fence;
+
 		radeon_gem_va_update_vm(rdev, bo_va);
+		fence = bo_va->last_pt_update;
+		if (fence) {
+			mutex_lock(&fpriv->seq_lock);
+			args->id = radeon_seq_push(&fpriv->seq, fence);
+			mutex_unlock(&fpriv->seq_lock);
+		}
+	}
 	args->operation = RADEON_VA_RESULT_OK;
 	if (r) {
 		args->operation = RADEON_VA_RESULT_ERROR;
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index a34e3db..2c50838 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -940,6 +940,7 @@ struct drm_radeon_gem_va {
 	uint32_t		vm_id;
 	uint32_t		flags;
 	uint64_t		offset;
+	uint64_t		id;
 };
 
 #define RADEON_CHUNK_ID_RELOCS		0x01
-- 
1.9.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 6/6] drm/radeon: add IOCTL to wait for a specific CS
  2014-12-08 16:11 [PATCH 1/6] drm/radeon: take a fence reference in the sync code Christian König
                   ` (3 preceding siblings ...)
  2014-12-08 16:11 ` [PATCH 5/6] drm/radeon: optionally return an ID for the last PT update Christian König
@ 2014-12-08 16:11 ` Christian König
  4 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2014-12-08 16:11 UTC (permalink / raw)
  To: dri-devel

From: Christian König <christian.koenig@amd.com>

At least inside the same client we should stop waiting for a buffer to be
idle, but rather wait for a specific command submission to complete.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h     |  2 ++
 drivers/gpu/drm/radeon/radeon_gem.c | 26 ++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_kms.c |  1 +
 include/uapi/drm/radeon_drm.h       |  7 +++++++
 4 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 1529afb..d8bf3a7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2230,6 +2230,8 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp);
 int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *filp);
+int radeon_gem_wait_cs_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *filp);
 int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp);
 int radeon_gem_op_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index dd45611..297f327 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -494,6 +494,32 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	return r;
 }
 
+int radeon_gem_wait_cs_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *filp)
+{
+	struct radeon_fpriv *fpriv = filp->driver_priv;
+	struct drm_radeon_gem_wait_cs *args = data;
+	struct radeon_fence *fence;
+	unsigned long timeout;
+	long r;
+
+	mutex_lock(&fpriv->seq_lock);
+	fence = radeon_fence_ref(radeon_seq_query(fpriv->seq, args->id));
+	mutex_unlock(&fpriv->seq_lock);
+
+	timeout = nsecs_to_jiffies(args->timeout);
+	r = fence_wait_timeout(&fence->base, true, timeout);
+	radeon_fence_unref(&fence);
+
+	if (r == 0)
+		return -EBUSY;
+
+	if (r < 0)
+		return r;
+
+	return 0;
+}
+
 int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp)
 {
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index db5c986..69b74a8 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -892,5 +892,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_USERPTR, radeon_gem_userptr_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_CS, radeon_gem_wait_cs_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 2c50838..d700c06 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -512,6 +512,7 @@ typedef struct {
 #define DRM_RADEON_GEM_VA		0x2b
 #define DRM_RADEON_GEM_OP		0x2c
 #define DRM_RADEON_GEM_USERPTR		0x2d
+#define DRM_RADEON_GEM_WAIT_CS		0x2e
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -556,6 +557,7 @@ typedef struct {
 #define DRM_IOCTL_RADEON_GEM_VA		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va)
 #define DRM_IOCTL_RADEON_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op)
 #define DRM_IOCTL_RADEON_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, struct drm_radeon_gem_userptr)
+#define DRM_IOCTL_RADEON_GEM_WAIT_CS	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_CS, struct drm_radeon_gem_wait_cs)
 
 typedef struct drm_radeon_init {
 	enum {
@@ -880,6 +882,11 @@ struct drm_radeon_gem_wait_idle {
 	uint32_t	pad;
 };
 
+struct drm_radeon_gem_wait_cs {
+	uint64_t	id;
+	uint64_t	timeout;
+};
+
 struct drm_radeon_gem_busy {
 	uint32_t	handle;
 	uint32_t        domain;
-- 
1.9.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/6] drm/radeon: add explicit command submission sync
  2014-12-08 16:11 ` [PATCH 4/6] drm/radeon: add explicit command submission sync Christian König
@ 2014-12-09  9:00   ` Michel Dänzer
  0 siblings, 0 replies; 8+ messages in thread
From: Michel Dänzer @ 2014-12-09  9:00 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On 09.12.2014 01:11, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
> 
> The driver falls back to explicit synchronization as soon as
> buffers move between clients or are moved by TTM.

I assume this should say 'falls back to implicit synchronization'.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/6] drm/radeon: add command submission IDs
  2014-12-08 16:11 ` [PATCH 3/6] drm/radeon: add command submission IDs Christian König
@ 2014-12-09  9:01   ` Michel Dänzer
  0 siblings, 0 replies; 8+ messages in thread
From: Michel Dänzer @ 2014-12-09  9:01 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On 09.12.2014 01:11, Christian König wrote:
> From: Christian König <christian.koenig@amd.com>
> 
> This patch adds a new 64bit ID as a result to each command submission.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

I noticed a few spelling mistakes, see below.

Other than these minor nits, I haven't noticed any problems in this
series, though I haven't looked at it in too much detail.


> +/*
> + * ID sequences
> + * This code generates a 64bit identifier for a command submission.
> + * It works by adding the fence of the command submission to a automatically

'to an automatically'


> +/**
> + * radeon_seq_query - lockup fence by it's ID

'look up fence by its ID'

> + * @seq: sequence object
> + * @id: the generated ID
> + *
> + * Lockup the associated fence by it's ID.

'Look up the associated fence by its ID.'


> +/* The fourth and fives dword are a 64bit fence ID generated for this CS */

'fourth and fifth'


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-12-09  9:01 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-08 16:11 [PATCH 1/6] drm/radeon: take a fence reference in the sync code Christian König
2014-12-08 16:11 ` [PATCH 2/6] drm/radeon: add fence owners Christian König
2014-12-08 16:11 ` [PATCH 3/6] drm/radeon: add command submission IDs Christian König
2014-12-09  9:01   ` Michel Dänzer
2014-12-08 16:11 ` [PATCH 4/6] drm/radeon: add explicit command submission sync Christian König
2014-12-09  9:00   ` Michel Dänzer
2014-12-08 16:11 ` [PATCH 5/6] drm/radeon: optionally return an ID for the last PT update Christian König
2014-12-08 16:11 ` [PATCH 6/6] drm/radeon: add IOCTL to wait for a specific CS Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.