All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] Convert fence to use 64bits sequence
@ 2012-05-02 20:20 j.glisse
  2012-05-02 20:20 ` [PATCH 1/4] drm/radeon: allow to allocate adjacent scratch reg j.glisse
                   ` (3 more replies)
  0 siblings, 4 replies; 17+ messages in thread
From: j.glisse @ 2012-05-02 20:20 UTC (permalink / raw)
  To: dri-devel

So this patchset convert the fence to use 64bits sequence and simplify
the fence code (dropping fence lock). I am still convinced that the
best solution is to have the various helper code deals with fence
cleanup/processing. The last patch show an example of what can be
done to improve sa allocator taking advantage of u64 fence.

Cheers,
Jerome

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/4] drm/radeon: allow to allocate adjacent scratch reg
  2012-05-02 20:20 [RFC] Convert fence to use 64bits sequence j.glisse
@ 2012-05-02 20:20 ` j.glisse
  2012-05-02 20:20 ` [PATCH 2/4] drm/radeon: convert fence to uint64_t j.glisse
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 17+ messages in thread
From: j.glisse @ 2012-05-02 20:20 UTC (permalink / raw)
  To: dri-devel; +Cc: Jerome Glisse

From: Jerome Glisse <jglisse@redhat.com>

This add the number of adjacent scratch reg you want to allocate
or free to the scratch alloc/free function.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 drivers/gpu/drm/radeon/r100.c          |   12 ++++++------
 drivers/gpu/drm/radeon/r420.c          |    4 ++--
 drivers/gpu/drm/radeon/r600.c          |   12 ++++++------
 drivers/gpu/drm/radeon/radeon.h        |    4 ++--
 drivers/gpu/drm/radeon/radeon_device.c |   31 +++++++++++++++++++++++--------
 drivers/gpu/drm/radeon/radeon_fence.c  |    6 +++---
 6 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index d47ffd5..80b57c5 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -3636,7 +3636,7 @@ int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	unsigned i;
 	int r;
 
-	r = radeon_scratch_get(rdev, &scratch);
+	r = radeon_scratch_get(rdev, &scratch, 1);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
 		return r;
@@ -3645,7 +3645,7 @@ int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	r = radeon_ring_lock(rdev, ring, 2);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
-		radeon_scratch_free(rdev, scratch);
+		radeon_scratch_free(rdev, scratch, 1);
 		return r;
 	}
 	radeon_ring_write(ring, PACKET0(scratch, 0));
@@ -3665,7 +3665,7 @@ int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 			  scratch, tmp);
 		r = -EINVAL;
 	}
-	radeon_scratch_free(rdev, scratch);
+	radeon_scratch_free(rdev, scratch, 1);
 	return r;
 }
 
@@ -3686,7 +3686,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	unsigned i;
 	int r;
 
-	r = radeon_scratch_get(rdev, &scratch);
+	r = radeon_scratch_get(rdev, &scratch, 1);
 	if (r) {
 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
 		return r;
@@ -3707,7 +3707,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib->length_dw = 8;
 	r = radeon_ib_schedule(rdev, ib);
 	if (r) {
-		radeon_scratch_free(rdev, scratch);
+		radeon_scratch_free(rdev, scratch, 1);
 		radeon_ib_free(rdev, &ib);
 		return r;
 	}
@@ -3729,7 +3729,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 			  scratch, tmp);
 		r = -EINVAL;
 	}
-	radeon_scratch_free(rdev, scratch);
+	radeon_scratch_free(rdev, scratch, 1);
 	radeon_ib_free(rdev, &ib);
 	return r;
 }
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 99137be..5ba459b 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -207,7 +207,7 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
 	 * The proper workaround is to queue a RESYNC at the beginning
 	 * of the CP init, apparently.
 	 */
-	radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch);
+	radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch, 1);
 	radeon_ring_lock(rdev, ring, 8);
 	radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
 	radeon_ring_write(ring, rdev->config.r300.resync_scratch);
@@ -226,7 +226,7 @@ static void r420_cp_errata_fini(struct radeon_device *rdev)
 	radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 	radeon_ring_write(ring, R300_RB3D_DC_FINISH);
 	radeon_ring_unlock_commit(rdev, ring);
-	radeon_scratch_free(rdev, rdev->config.r300.resync_scratch);
+	radeon_scratch_free(rdev, rdev->config.r300.resync_scratch, 1);
 }
 
 static int r420_startup(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 0cbcd3a..02abf32 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2261,7 +2261,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	unsigned i, ridx = radeon_ring_index(rdev, ring);
 	int r;
 
-	r = radeon_scratch_get(rdev, &scratch);
+	r = radeon_scratch_get(rdev, &scratch, 1);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
 		return r;
@@ -2270,7 +2270,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	r = radeon_ring_lock(rdev, ring, 3);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ridx, r);
-		radeon_scratch_free(rdev, scratch);
+		radeon_scratch_free(rdev, scratch, 1);
 		return r;
 	}
 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
@@ -2290,7 +2290,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 			  ridx, scratch, tmp);
 		r = -EINVAL;
 	}
-	radeon_scratch_free(rdev, scratch);
+	radeon_scratch_free(rdev, scratch, 1);
 	return r;
 }
 
@@ -2693,7 +2693,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	int r;
 	int ring_index = radeon_ring_index(rdev, ring);
 
-	r = radeon_scratch_get(rdev, &scratch);
+	r = radeon_scratch_get(rdev, &scratch, 1);
 	if (r) {
 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
 		return r;
@@ -2710,7 +2710,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib->length_dw = 3;
 	r = radeon_ib_schedule(rdev, ib);
 	if (r) {
-		radeon_scratch_free(rdev, scratch);
+		radeon_scratch_free(rdev, scratch, 1);
 		radeon_ib_free(rdev, &ib);
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 		return r;
@@ -2733,7 +2733,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 			  scratch, tmp);
 		r = -EINVAL;
 	}
-	radeon_scratch_free(rdev, scratch);
+	radeon_scratch_free(rdev, scratch, 1);
 	radeon_ib_free(rdev, &ib);
 	return r;
 }
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b899cec..4f21b68 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -533,8 +533,8 @@ struct radeon_scratch {
 	uint32_t		reg[32];
 };
 
-int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg);
-void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg);
+int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg, unsigned n);
+void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg, unsigned n);
 
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 1dac27d..3880aad 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -136,13 +136,26 @@ void radeon_scratch_init(struct radeon_device *rdev)
 	}
 }
 
-int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg)
+int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg, unsigned n)
 {
-	int i;
+	unsigned i, j, c;
 
-	for (i = 0; i < rdev->scratch.num_reg; i++) {
-		if (rdev->scratch.free[i]) {
-			rdev->scratch.free[i] = false;
+	if (n >= rdev->scratch.num_reg) {
+		dump_stack();
+		dev_err(rdev->dev, "trying to allocate %d scratch reg out of %d\n",
+			n, rdev->scratch.num_reg);
+		return -EINVAL;
+	}
+	for (i = 0; i < rdev->scratch.num_reg - n; i++) {
+		for (j = 0, c = 0; j < n; j++) {
+			if (rdev->scratch.free[i+j]) {
+				c++;
+			}
+		}
+		if (c == n) {
+			for (j = 0; j < n; j++) {
+				rdev->scratch.free[i+j] = false;
+			}
 			*reg = rdev->scratch.reg[i];
 			return 0;
 		}
@@ -150,13 +163,15 @@ int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg)
 	return -EINVAL;
 }
 
-void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg)
+void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg, unsigned n)
 {
-	int i;
+	unsigned i, j;
 
 	for (i = 0; i < rdev->scratch.num_reg; i++) {
 		if (rdev->scratch.reg[i] == reg) {
-			rdev->scratch.free[i] = true;
+			for (j = 0; j < n; j++) {
+				rdev->scratch.free[i+j] = true;
+			}
 			return;
 		}
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 7d0c331..7733429 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -371,12 +371,12 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 	int r;
 
 	write_lock_irqsave(&rdev->fence_lock, irq_flags);
-	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
+	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 1);
 	if (rdev->wb.use_event) {
 		rdev->fence_drv[ring].scratch_reg = 0;
 		index = R600_WB_EVENT_OFFSET + ring * 4;
 	} else {
-		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
+		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg, 1);
 		if (r) {
 			dev_err(rdev->dev, "fence failed to get scratch register\n");
 			write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
@@ -435,7 +435,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
 		radeon_fence_wait_empty(rdev, ring);
 		wake_up_all(&rdev->fence_drv[ring].queue);
 		write_lock_irqsave(&rdev->fence_lock, irq_flags);
-		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
+		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 1);
 		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 		rdev->fence_drv[ring].initialized = false;
 	}
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/4] drm/radeon: convert fence to uint64_t
  2012-05-02 20:20 [RFC] Convert fence to use 64bits sequence j.glisse
  2012-05-02 20:20 ` [PATCH 1/4] drm/radeon: allow to allocate adjacent scratch reg j.glisse
@ 2012-05-02 20:20 ` j.glisse
  2012-05-03  7:21   ` Michel Dänzer
  2012-05-02 20:20 ` [PATCH 3/4] drm/radeon: rework fence handling, drop fence list j.glisse
  2012-05-02 20:20 ` [PATCH 4/4] drm/radeon: improve sa allocator to agressivly free idle bo j.glisse
  3 siblings, 1 reply; 17+ messages in thread
From: j.glisse @ 2012-05-02 20:20 UTC (permalink / raw)
  To: dri-devel; +Cc: Jerome Glisse

From: Jerome Glisse <jglisse@redhat.com>

This convert fence to use uint64_t sequence number intention is
to use the fact that uin64_t is big enough that we don't need to
care about wrap around.

Tested with and without writeback using 0xFFFFF000 as initial
fence sequence and thus allowing to test the wrap around from
32bits to 64bits.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 drivers/gpu/drm/radeon/ni.c           |    6 ++--
 drivers/gpu/drm/radeon/r100.c         |    5 ++-
 drivers/gpu/drm/radeon/r300.c         |    5 ++-
 drivers/gpu/drm/radeon/r600.c         |   11 ++++----
 drivers/gpu/drm/radeon/radeon.h       |   20 +++++++-------
 drivers/gpu/drm/radeon/radeon_fence.c |   43 +++++++++++++++++----------------
 drivers/gpu/drm/radeon/si.c           |    6 ++--
 7 files changed, 50 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 3160a74..416d7c2 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1120,9 +1120,9 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
 	radeon_ring_write(ring, addr & 0xffffffff);
-	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
-	radeon_ring_write(ring, fence->seq);
-	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(2) | INT_SEL(2));
+	radeon_ring_write(ring, lower_32_bits(fence->seq));
+	radeon_ring_write(ring, upper_32_bits(fence->seq));
 }
 
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 80b57c5..d6bd9ea 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -864,8 +864,9 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 	radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
 	/* Emit fence sequence & fire IRQ */
-	radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
-	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 1));
+	radeon_ring_write(ring, lower_32_bits(fence->seq));
+	radeon_ring_write(ring, upper_32_bits(fence->seq));
 	radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
 	radeon_ring_write(ring, RADEON_SW_INT_FIRE);
 }
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 6419a59..7c9e30d 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -200,8 +200,9 @@ void r300_fence_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
 	radeon_ring_write(ring, rdev->config.r300.hdp_cntl);
 	/* Emit fence sequence & fire IRQ */
-	radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
-	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 1));
+	radeon_ring_write(ring, lower_32_bits(fence->seq));
+	radeon_ring_write(ring, upper_32_bits(fence->seq));
 	radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
 	radeon_ring_write(ring, RADEON_SW_INT_FIRE);
 }
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 02abf32..4fbc590 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2313,9 +2313,9 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
 		radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
 		radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
 		radeon_ring_write(ring, addr & 0xffffffff);
-		radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
-		radeon_ring_write(ring, fence->seq);
-		radeon_ring_write(ring, 0);
+		radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(2) | INT_SEL(2));
+		radeon_ring_write(ring, lower_32_bits(fence->seq));
+		radeon_ring_write(ring, upper_32_bits(fence->seq));
 	} else {
 		/* flush read cache over gart */
 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
@@ -2332,9 +2332,10 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
 		radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 		radeon_ring_write(ring, WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
 		/* Emit fence sequence & fire IRQ */
-		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 2));
 		radeon_ring_write(ring, ((rdev->fence_drv[fence->ring].scratch_reg - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
-		radeon_ring_write(ring, fence->seq);
+		radeon_ring_write(ring, lower_32_bits(fence->seq));
+		radeon_ring_write(ring, upper_32_bits(fence->seq));
 		/* CP_INTERRUPT packet 3 no longer exists, use packet 0 */
 		radeon_ring_write(ring, PACKET0(CP_INT_STATUS, 0));
 		radeon_ring_write(ring, RB_INT_STAT);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 4f21b68..4c5a667 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -281,9 +281,9 @@ void radeon_semaphore_free(struct radeon_device *rdev,
 struct radeon_fence_driver {
 	uint32_t			scratch_reg;
 	uint64_t			gpu_addr;
-	volatile uint32_t		*cpu_addr;
-	atomic_t			seq;
-	uint32_t			last_seq;
+	volatile uint64_t		*cpu_addr;
+	atomic64_t			seq;
+	uint64_t			last_seq;
 	unsigned long			last_activity;
 	wait_queue_head_t		queue;
 	struct list_head		emitted;
@@ -296,7 +296,7 @@ struct radeon_fence {
 	struct kref			kref;
 	struct list_head		list;
 	/* protected by radeon_fence.lock */
-	uint32_t			seq;
+	uint64_t			seq;
 	bool				emitted;
 	bool				signaled;
 	/* RB, DMA, etc. */
@@ -884,12 +884,12 @@ struct radeon_wb {
 	bool                    use_event;
 };
 
-#define RADEON_WB_SCRATCH_OFFSET 0
-#define RADEON_WB_CP_RPTR_OFFSET 1024
-#define RADEON_WB_CP1_RPTR_OFFSET 1280
-#define RADEON_WB_CP2_RPTR_OFFSET 1536
-#define R600_WB_IH_WPTR_OFFSET   2048
-#define R600_WB_EVENT_OFFSET     3072
+#define RADEON_WB_SCRATCH_OFFSET	0
+#define RADEON_WB_CP_RPTR_OFFSET	1024
+#define RADEON_WB_CP1_RPTR_OFFSET	1280
+#define RADEON_WB_CP2_RPTR_OFFSET	1536
+#define R600_WB_IH_WPTR_OFFSET		2048
+#define R600_WB_EVENT_OFFSET		3072
 
 /**
  * struct radeon_pm - power management datas
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 7733429..6da1535 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -40,23 +40,25 @@
 #include "radeon.h"
 #include "radeon_trace.h"
 
-static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
+static void radeon_fence_write(struct radeon_device *rdev, u64 seq, int ring)
 {
 	if (rdev->wb.enabled) {
-		*rdev->fence_drv[ring].cpu_addr = cpu_to_le32(seq);
+		*rdev->fence_drv[ring].cpu_addr = cpu_to_le64(seq);
 	} else {
-		WREG32(rdev->fence_drv[ring].scratch_reg, seq);
+		WREG32(rdev->fence_drv[ring].scratch_reg, lower_32_bits(seq));
+		WREG32(rdev->fence_drv[ring].scratch_reg + 4, upper_32_bits(seq));
 	}
 }
 
-static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
+static u64 radeon_fence_read(struct radeon_device *rdev, int ring)
 {
-	u32 seq = 0;
+	u64 seq = 0;
 
 	if (rdev->wb.enabled) {
-		seq = le32_to_cpu(*rdev->fence_drv[ring].cpu_addr);
+		seq = le64_to_cpu(*rdev->fence_drv[ring].cpu_addr);
 	} else {
 		seq = RREG32(rdev->fence_drv[ring].scratch_reg);
+		seq |= ((u64)RREG32(rdev->fence_drv[ring].scratch_reg + 4)) << 32ULL;
 	}
 	return seq;
 }
@@ -70,7 +72,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
 		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 		return 0;
 	}
-	fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq);
+	fence->seq = atomic64_inc_return(&rdev->fence_drv[fence->ring].seq);
 	radeon_fence_ring_emit(rdev, fence->ring, fence);
 	trace_radeon_fence_emit(rdev->ddev, fence->seq);
 	fence->emitted = true;
@@ -87,7 +89,7 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
 {
 	struct radeon_fence *fence;
 	struct list_head *i, *n;
-	uint32_t seq;
+	uint64_t seq;
 	bool wake = false;
 
 	seq = radeon_fence_read(rdev, ring);
@@ -185,7 +187,7 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
 	struct radeon_device *rdev;
 	unsigned long irq_flags, timeout;
-	u32 seq;
+	u64 seq;
 	int i, r;
 	bool signaled;
 
@@ -252,8 +254,8 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 			if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
 
 				/* good news we believe it's a lockup */
-				printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
-				     fence->seq, seq);
+				dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n",
+					 fence->seq, seq);
 
 				/* mark the ring as not ready any more */
 				rdev->ring[fence->ring].ready = false;
@@ -371,12 +373,12 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 	int r;
 
 	write_lock_irqsave(&rdev->fence_lock, irq_flags);
-	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 1);
+	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 2);
 	if (rdev->wb.use_event) {
 		rdev->fence_drv[ring].scratch_reg = 0;
-		index = R600_WB_EVENT_OFFSET + ring * 4;
+		index = R600_WB_EVENT_OFFSET + ring * 8;
 	} else {
-		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg, 1);
+		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg, 2);
 		if (r) {
 			dev_err(rdev->dev, "fence failed to get scratch register\n");
 			write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
@@ -386,9 +388,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 			rdev->fence_drv[ring].scratch_reg -
 			rdev->scratch.reg_base;
 	}
-	rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
+	rdev->fence_drv[ring].cpu_addr = (u64*)&rdev->wb.wb[index/4];
 	rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
-	radeon_fence_write(rdev, atomic_read(&rdev->fence_drv[ring].seq), ring);
+	radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].seq), ring);
 	rdev->fence_drv[ring].initialized = true;
 	DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n",
 		 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
@@ -401,7 +403,7 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 	rdev->fence_drv[ring].scratch_reg = -1;
 	rdev->fence_drv[ring].cpu_addr = NULL;
 	rdev->fence_drv[ring].gpu_addr = 0;
-	atomic_set(&rdev->fence_drv[ring].seq, 0);
+	atomic64_set(&rdev->fence_drv[ring].seq, 0);
 	INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted);
 	INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled);
 	init_waitqueue_head(&rdev->fence_drv[ring].queue);
@@ -435,7 +437,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
 		radeon_fence_wait_empty(rdev, ring);
 		wake_up_all(&rdev->fence_drv[ring].queue);
 		write_lock_irqsave(&rdev->fence_lock, irq_flags);
-		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 1);
+		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 2);
 		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 		rdev->fence_drv[ring].initialized = false;
 	}
@@ -459,12 +461,11 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
 			continue;
 
 		seq_printf(m, "--- ring %d ---\n", i);
-		seq_printf(m, "Last signaled fence 0x%08X\n",
-			   radeon_fence_read(rdev, i));
+		seq_printf(m, "Last signaled fence 0x%016llx\n", radeon_fence_read(rdev, i));
 		if (!list_empty(&rdev->fence_drv[i].emitted)) {
 			fence = list_entry(rdev->fence_drv[i].emitted.prev,
 					   struct radeon_fence, list);
-			seq_printf(m, "Last emitted fence %p with 0x%08X\n",
+			seq_printf(m, "Last emitted fence %p with 0x%016llx\n",
 				   fence,  fence->seq);
 		}
 	}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index eb49483..6de9610 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1905,9 +1905,9 @@ void si_fence_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
 	radeon_ring_write(ring, addr & 0xffffffff);
-	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
-	radeon_ring_write(ring, fence->seq);
-	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(2) | INT_SEL(2));
+	radeon_ring_write(ring, lower_32_bits(fence->seq));
+	radeon_ring_write(ring, upper_32_bits(fence->seq));
 }
 
 /*
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/4] drm/radeon: rework fence handling, drop fence list
  2012-05-02 20:20 [RFC] Convert fence to use 64bits sequence j.glisse
  2012-05-02 20:20 ` [PATCH 1/4] drm/radeon: allow to allocate adjacent scratch reg j.glisse
  2012-05-02 20:20 ` [PATCH 2/4] drm/radeon: convert fence to uint64_t j.glisse
@ 2012-05-02 20:20 ` j.glisse
  2012-05-02 20:43   ` Adam Jackson
  2012-05-02 20:20 ` [PATCH 4/4] drm/radeon: improve sa allocator to agressivly free idle bo j.glisse
  3 siblings, 1 reply; 17+ messages in thread
From: j.glisse @ 2012-05-02 20:20 UTC (permalink / raw)
  To: dri-devel; +Cc: Jerome Glisse

From: Jerome Glisse <jglisse@redhat.com>

Using 64bits fence sequence we can directly compare sequence
number to know if a fence is signaled or not. Thus the fence
list became useless, so does the fence lock that mainly
protected the fence list.

Things like ring.ready are no longer behind a lock, this should
be ok as ring.ready is initialized once and will only change
when facing lockup. Worst case is that we return an -EBUSY just
after a successfull GPU reset, or we go into wait state instead
of returning -EBUSY (thus delaying reporting -EBUSY to fence
wait caller).

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h        |    6 +-
 drivers/gpu/drm/radeon/radeon_device.c |    1 -
 drivers/gpu/drm/radeon/radeon_fence.c  |  278 ++++++++++++--------------------
 3 files changed, 102 insertions(+), 183 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 4c5a667..141aee2 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -286,15 +286,12 @@ struct radeon_fence_driver {
 	uint64_t			last_seq;
 	unsigned long			last_activity;
 	wait_queue_head_t		queue;
-	struct list_head		emitted;
-	struct list_head		signaled;
 	bool				initialized;
 };
 
 struct radeon_fence {
 	struct radeon_device		*rdev;
 	struct kref			kref;
-	struct list_head		list;
 	/* protected by radeon_fence.lock */
 	uint64_t			seq;
 	bool				emitted;
@@ -316,7 +313,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring);
 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
-int radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
+unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
 
 /*
  * Tiling registers
@@ -1488,7 +1485,6 @@ struct radeon_device {
 	struct radeon_mode_info		mode_info;
 	struct radeon_scratch		scratch;
 	struct radeon_mman		mman;
-	rwlock_t			fence_lock;
 	struct radeon_fence_driver	fence_drv[RADEON_NUM_RINGS];
 	struct radeon_ring		ring[RADEON_NUM_RINGS];
 	bool				ib_pool_ready;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 3880aad..290b8d0 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -746,7 +746,6 @@ int radeon_device_init(struct radeon_device *rdev,
 	mutex_init(&rdev->gem.mutex);
 	mutex_init(&rdev->pm.mutex);
 	mutex_init(&rdev->vram_mutex);
-	rwlock_init(&rdev->fence_lock);
 	INIT_LIST_HEAD(&rdev->gem.objects);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
 	init_waitqueue_head(&rdev->irq.idle_queue);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 6da1535..3f34f7b 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -65,32 +65,22 @@ static u64 radeon_fence_read(struct radeon_device *rdev, int ring)
 
 int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
 {
-	unsigned long irq_flags;
-
-	write_lock_irqsave(&rdev->fence_lock, irq_flags);
 	if (fence->emitted) {
-		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 		return 0;
 	}
 	fence->seq = atomic64_inc_return(&rdev->fence_drv[fence->ring].seq);
+	/* there is small chance that we overwritte a bigger last_emited
+	 * value, but in normal usage this
+	 */
 	radeon_fence_ring_emit(rdev, fence->ring, fence);
 	trace_radeon_fence_emit(rdev->ddev, fence->seq);
 	fence->emitted = true;
-	/* are we the first fence on a previusly idle ring? */
-	if (list_empty(&rdev->fence_drv[fence->ring].emitted)) {
-		rdev->fence_drv[fence->ring].last_activity = jiffies;
-	}
-	list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted);
-	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	return 0;
 }
 
-static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
+static bool radeon_fence_poll(struct radeon_device *rdev, int ring)
 {
-	struct radeon_fence *fence;
-	struct list_head *i, *n;
 	uint64_t seq;
-	bool wake = false;
 
 	seq = radeon_fence_read(rdev, ring);
 	if (seq == rdev->fence_drv[ring].last_seq)
@@ -98,40 +88,15 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
 
 	rdev->fence_drv[ring].last_seq = seq;
 	rdev->fence_drv[ring].last_activity = jiffies;
-
-	n = NULL;
-	list_for_each(i, &rdev->fence_drv[ring].emitted) {
-		fence = list_entry(i, struct radeon_fence, list);
-		if (fence->seq == seq) {
-			n = i;
-			break;
-		}
-	}
-	/* all fence previous to this one are considered as signaled */
-	if (n) {
-		i = n;
-		do {
-			n = i->prev;
-			list_move_tail(i, &rdev->fence_drv[ring].signaled);
-			fence = list_entry(i, struct radeon_fence, list);
-			fence->signaled = true;
-			i = n;
-		} while (i != &rdev->fence_drv[ring].emitted);
-		wake = true;
-	}
-	return wake;
+	return true;
 }
 
 static void radeon_fence_destroy(struct kref *kref)
 {
-	unsigned long irq_flags;
-        struct radeon_fence *fence;
+	struct radeon_fence *fence;
 
 	fence = container_of(kref, struct radeon_fence, kref);
-	write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
-	list_del(&fence->list);
 	fence->emitted = false;
-	write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
 	if (fence->semaphore.sa_bo) {
 		radeon_semaphore_free(fence->rdev, fence);
 	}
@@ -153,78 +118,95 @@ int radeon_fence_create(struct radeon_device *rdev,
 	(*fence)->seq = 0;
 	(*fence)->ring = ring;
 	(*fence)->semaphore.sa_bo = NULL;
-	INIT_LIST_HEAD(&(*fence)->list);
 	return 0;
 }
 
 bool radeon_fence_signaled(struct radeon_fence *fence)
 {
-	unsigned long irq_flags;
-	bool signaled = false;
+	struct radeon_device *rdev;
 
-	if (!fence)
+	if (!fence) {
 		return true;
+	}
 
-	write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
-	signaled = fence->signaled;
+	rdev = fence->rdev;
+	if (!fence->emitted) {
+		dev_warn(rdev->dev, "Querying an unemitted fence : %p !\n", fence);
+		fence->signaled = true;
+		return true;
+	}
+	if (fence->signaled) {
+		return true;
+	}
+	if (rdev->fence_drv[fence->ring].last_seq >= fence->seq) {
+		fence->signaled = true;
+		return true;
+	}
 	/* if we are shuting down report all fence as signaled */
-	if (fence->rdev->shutdown) {
-		signaled = true;
+	if (rdev->shutdown) {
+		fence->signaled = true;
+		return true;
 	}
-	if (!fence->emitted) {
-		WARN(1, "Querying an unemitted fence : %p !\n", fence);
-		signaled = true;
+	if (!fence->signaled) {
+		radeon_fence_poll(fence->rdev, fence->ring);
+		if (rdev->fence_drv[fence->ring].last_seq >= fence->seq) {
+			fence->signaled = true;
+			return true;
+		}
 	}
-	if (!signaled) {
-		radeon_fence_poll_locked(fence->rdev, fence->ring);
-		signaled = fence->signaled;
+	return fence->signaled;
+}
+
+bool radeon_seq_signaled(struct radeon_device *rdev, u64 seq, unsigned ring)
+{
+	if (rdev->fence_drv[ring].last_seq >= seq) {
+		return true;
+	}
+	radeon_fence_poll(rdev, ring);
+	if (rdev->fence_drv[ring].last_seq >= seq) {
+		return true;
 	}
-	write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
-	return signaled;
+	return false;
 }
 
-int radeon_fence_wait(struct radeon_fence *fence, bool intr)
+static int radeon_wait_seq(struct radeon_device *rdev, u64 target_seq,
+			   unsigned ring, bool intr)
 {
-	struct radeon_device *rdev;
-	unsigned long irq_flags, timeout;
-	u64 seq;
-	int i, r;
+	unsigned long timeout;
+	uint64_t seq;
 	bool signaled;
+	int r;
 
-	if (fence == NULL) {
-		WARN(1, "Querying an invalid fence : %p !\n", fence);
-		return -EINVAL;
-	}
+	while (target_seq > rdev->fence_drv[ring].last_seq) {
+		if (!rdev->ring[ring].ready) {
+			return -EBUSY;
+		}
 
-	rdev = fence->rdev;
-	signaled = radeon_fence_signaled(fence);
-	while (!signaled) {
-		read_lock_irqsave(&rdev->fence_lock, irq_flags);
 		timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
-		if (time_after(rdev->fence_drv[fence->ring].last_activity, timeout)) {
+		if (time_after(rdev->fence_drv[ring].last_activity, timeout)) {
 			/* the normal case, timeout is somewhere before last_activity */
-			timeout = rdev->fence_drv[fence->ring].last_activity - timeout;
+			timeout = rdev->fence_drv[ring].last_activity - timeout;
 		} else {
 			/* either jiffies wrapped around, or no fence was signaled in the last 500ms
-			 * anyway we will just wait for the minimum amount and then check for a lockup */
+			 * anyway we will just wait for the minimum amount and then check for a lockup
+			 */
 			timeout = 1;
 		}
 		/* save current sequence value used to check for GPU lockups */
-		seq = rdev->fence_drv[fence->ring].last_seq;
-		read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+		seq = rdev->fence_drv[ring].last_seq;
 
 		trace_radeon_fence_wait_begin(rdev->ddev, seq);
-		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
+		radeon_irq_kms_sw_irq_get(rdev, ring);
 		if (intr) {
-			r = wait_event_interruptible_timeout(
-				rdev->fence_drv[fence->ring].queue,
-				(signaled = radeon_fence_signaled(fence)), timeout);
+			r = wait_event_interruptible_timeout(rdev->fence_drv[ring].queue,
+							     (signaled = radeon_seq_signaled(rdev, target_seq, ring)),
+							     timeout);
 		} else {
-			r = wait_event_timeout(
-				rdev->fence_drv[fence->ring].queue,
-				(signaled = radeon_fence_signaled(fence)), timeout);
+			r = wait_event_timeout(rdev->fence_drv[ring].queue,
+					       (signaled = radeon_seq_signaled(rdev, target_seq, ring)),
+					       timeout);
 		}
-		radeon_irq_kms_sw_irq_put(rdev, fence->ring);
+		radeon_irq_kms_sw_irq_put(rdev, ring);
 		if (unlikely(r < 0)) {
 			return r;
 		}
@@ -237,28 +219,18 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 				continue;
 			}
 
-			write_lock_irqsave(&rdev->fence_lock, irq_flags);
 			/* check if sequence value has changed since last_activity */
-			if (seq != rdev->fence_drv[fence->ring].last_seq) {
-				write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+			if (seq != rdev->fence_drv[ring].last_seq) {
 				continue;
 			}
 
-			/* change sequence value on all rings, so nobody else things there is a lockup */
-			for (i = 0; i < RADEON_NUM_RINGS; ++i)
-				rdev->fence_drv[i].last_seq -= 0x10000;
-
-			rdev->fence_drv[fence->ring].last_activity = jiffies;
-			write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-
-			if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
-
+			if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
 				/* good news we believe it's a lockup */
 				dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n",
-					 fence->seq, seq);
+					 target_seq, seq);
 
 				/* mark the ring as not ready any more */
-				rdev->ring[fence->ring].ready = false;
+				rdev->ring[ring].ready = false;
 				return -EDEADLK;
 			}
 		}
@@ -266,52 +238,31 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 	return 0;
 }
 
-int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
+int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
-	unsigned long irq_flags;
-	struct radeon_fence *fence;
 	int r;
 
-	write_lock_irqsave(&rdev->fence_lock, irq_flags);
-	if (!rdev->ring[ring].ready) {
-		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-		return -EBUSY;
+	if (fence == NULL) {
+		WARN(1, "Querying an invalid fence : %p !\n", fence);
+		return -EINVAL;
 	}
-	if (list_empty(&rdev->fence_drv[ring].emitted)) {
-		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-		return -ENOENT;
+
+	r = radeon_wait_seq(fence->rdev, fence->seq, fence->ring, intr);
+	if (r) {
+		return r;
 	}
-	fence = list_entry(rdev->fence_drv[ring].emitted.next,
-			   struct radeon_fence, list);
-	radeon_fence_ref(fence);
-	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-	r = radeon_fence_wait(fence, false);
-	radeon_fence_unref(&fence);
-	return r;
+	fence->signaled = true;
+	return 0;
 }
 
-int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
+int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 {
-	unsigned long irq_flags;
-	struct radeon_fence *fence;
-	int r;
+	return radeon_wait_seq(rdev, rdev->fence_drv[ring].last_seq + 1ULL, ring, false);
+}
 
-	write_lock_irqsave(&rdev->fence_lock, irq_flags);
-	if (!rdev->ring[ring].ready) {
-		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-		return -EBUSY;
-	}
-	if (list_empty(&rdev->fence_drv[ring].emitted)) {
-		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-		return 0;
-	}
-	fence = list_entry(rdev->fence_drv[ring].emitted.prev,
-			   struct radeon_fence, list);
-	radeon_fence_ref(fence);
-	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-	r = radeon_fence_wait(fence, false);
-	radeon_fence_unref(&fence);
-	return r;
+int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
+{
+	return radeon_wait_seq(rdev, atomic64_read(&rdev->fence_drv[ring].seq), ring, false);
 }
 
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
@@ -332,47 +283,32 @@ void radeon_fence_unref(struct radeon_fence **fence)
 
 void radeon_fence_process(struct radeon_device *rdev, int ring)
 {
-	unsigned long irq_flags;
 	bool wake;
 
-	write_lock_irqsave(&rdev->fence_lock, irq_flags);
-	wake = radeon_fence_poll_locked(rdev, ring);
-	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+	wake = radeon_fence_poll(rdev, ring);
 	if (wake) {
 		wake_up_all(&rdev->fence_drv[ring].queue);
 	}
 }
 
-int radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
+unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
 {
-	unsigned long irq_flags;
-	int not_processed = 0;
-
-	read_lock_irqsave(&rdev->fence_lock, irq_flags);
-	if (!rdev->fence_drv[ring].initialized) {
-		read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-		return 0;
-	}
+	uint64_t emitted;
 
-	if (!list_empty(&rdev->fence_drv[ring].emitted)) {
-		struct list_head *ptr;
-		list_for_each(ptr, &rdev->fence_drv[ring].emitted) {
-			/* count up to 3, that's enought info */
-			if (++not_processed >= 3)
-				break;
-		}
+	radeon_fence_poll(rdev, ring);
+	emitted = atomic64_read(&rdev->fence_drv[ring].seq) - rdev->fence_drv[ring].last_seq;
+	/* to avoid 32bits warp around */
+	if (emitted > 0x10000000) {
+		emitted = 0x10000000;
 	}
-	read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-	return not_processed;
+	return (unsigned)emitted;
 }
 
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 {
-	unsigned long irq_flags;
 	uint64_t index;
 	int r;
 
-	write_lock_irqsave(&rdev->fence_lock, irq_flags);
 	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 2);
 	if (rdev->wb.use_event) {
 		rdev->fence_drv[ring].scratch_reg = 0;
@@ -381,7 +317,6 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg, 2);
 		if (r) {
 			dev_err(rdev->dev, "fence failed to get scratch register\n");
-			write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 			return r;
 		}
 		index = RADEON_WB_SCRATCH_OFFSET +
@@ -392,9 +327,8 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 	rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
 	radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].seq), ring);
 	rdev->fence_drv[ring].initialized = true;
-	DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n",
+	dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n",
 		 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
-	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	return 0;
 }
 
@@ -403,23 +337,20 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 	rdev->fence_drv[ring].scratch_reg = -1;
 	rdev->fence_drv[ring].cpu_addr = NULL;
 	rdev->fence_drv[ring].gpu_addr = 0;
+	rdev->fence_drv[ring].last_activity = jiffies;
 	atomic64_set(&rdev->fence_drv[ring].seq, 0);
-	INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted);
-	INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled);
+	rdev->fence_drv[ring].last_seq = 0;
 	init_waitqueue_head(&rdev->fence_drv[ring].queue);
 	rdev->fence_drv[ring].initialized = false;
 }
 
 int radeon_fence_driver_init(struct radeon_device *rdev)
 {
-	unsigned long irq_flags;
 	int ring;
 
-	write_lock_irqsave(&rdev->fence_lock, irq_flags);
 	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
 		radeon_fence_driver_init_ring(rdev, ring);
 	}
-	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	if (radeon_debugfs_fence_init(rdev)) {
 		dev_err(rdev->dev, "fence debugfs file creation failed\n");
 	}
@@ -428,7 +359,6 @@ int radeon_fence_driver_init(struct radeon_device *rdev)
 
 void radeon_fence_driver_fini(struct radeon_device *rdev)
 {
-	unsigned long irq_flags;
 	int ring;
 
 	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
@@ -436,9 +366,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
 			continue;
 		radeon_fence_wait_empty(rdev, ring);
 		wake_up_all(&rdev->fence_drv[ring].queue);
-		write_lock_irqsave(&rdev->fence_lock, irq_flags);
 		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg, 2);
-		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 		rdev->fence_drv[ring].initialized = false;
 	}
 }
@@ -453,7 +381,6 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_fence *fence;
 	int i;
 
 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@ -461,13 +388,10 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
 			continue;
 
 		seq_printf(m, "--- ring %d ---\n", i);
-		seq_printf(m, "Last signaled fence 0x%016llx\n", radeon_fence_read(rdev, i));
-		if (!list_empty(&rdev->fence_drv[i].emitted)) {
-			fence = list_entry(rdev->fence_drv[i].emitted.prev,
-					   struct radeon_fence, list);
-			seq_printf(m, "Last emitted fence %p with 0x%016llx\n",
-				   fence,  fence->seq);
-		}
+		seq_printf(m, "Last signaled 0x%016llx\n",
+			   radeon_fence_read(rdev, i));
+		seq_printf(m, "Last emitted  0x%016lx\n",
+			   atomic64_read(&rdev->fence_drv[i].seq));
 	}
 	return 0;
 }
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 4/4] drm/radeon: improve sa allocator to agressivly free idle bo
  2012-05-02 20:20 [RFC] Convert fence to use 64bits sequence j.glisse
                   ` (2 preceding siblings ...)
  2012-05-02 20:20 ` [PATCH 3/4] drm/radeon: rework fence handling, drop fence list j.glisse
@ 2012-05-02 20:20 ` j.glisse
  3 siblings, 0 replies; 17+ messages in thread
From: j.glisse @ 2012-05-02 20:20 UTC (permalink / raw)
  To: dri-devel; +Cc: Jerome Glisse

From: Jerome Glisse <jglisse@redhat.com>

With fence rework it's now easier to agressivly free idle bo
when there is no hole to satisfy current allocation request.
The hit of some cs ioctl to have to go through the sa bo list
and free them is minimal, it happens once in while and avoid
some fence waiting.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h       |    1 +
 drivers/gpu/drm/radeon/radeon_fence.c |    2 +-
 drivers/gpu/drm/radeon/radeon_sa.c    |   51 +++++++++++++++++++++++++++++---
 3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 141aee2..5459722 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -314,6 +314,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int ring);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
+bool radeon_fence_poll(struct radeon_device *rdev, int ring);
 
 /*
  * Tiling registers
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 3f34f7b..043f431 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -78,7 +78,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
 	return 0;
 }
 
-static bool radeon_fence_poll(struct radeon_device *rdev, int ring)
+bool radeon_fence_poll(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq;
 
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index e758aaa..2cbf5ba 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -48,6 +48,10 @@
 #include "drm.h"
 #include "radeon.h"
 
+static bool radeon_sa_manager_try_free(struct radeon_device *rdev,
+				       struct radeon_sa_manager *sa_manager,
+				       struct radeon_sa_bo *oldest);
+
 int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 			      struct radeon_sa_manager *sa_manager,
 			      unsigned size, u32 domain)
@@ -77,7 +81,16 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
 	struct radeon_sa_bo *sa_bo, *tmp;
 
 	if (!list_empty(&sa_manager->sa_bo)) {
-		dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
+		struct radeon_sa_bo *oldest;
+
+		/* try to free them */
+		oldest =  list_entry(sa_manager->sa_bo.next, struct radeon_sa_bo, list);
+		radeon_sa_manager_try_free(rdev, sa_manager, oldest);
+
+		if (!list_empty(&sa_manager->sa_bo)) {
+			/* something went wrong */
+			dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
+		}
 	}
 	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) {
 		list_del_init(&sa_bo->list);
@@ -171,15 +184,43 @@ static void radeon_sa_bo_free_locked(struct radeon_device *rdev, struct radeon_s
 }
 
 static bool radeon_sa_manager_try_free(struct radeon_device *rdev,
+				       struct radeon_sa_manager *sa_manager,
 				       struct radeon_sa_bo *oldest)
 {
-	if (oldest->fence && oldest->fence->emitted) {
-		if (radeon_fence_signaled(oldest->fence)) {
+	struct radeon_sa_bo *tmp, *sa_bo;
+	unsigned ring, free_count = 0;
+
+	if (oldest->fence == NULL || !oldest->fence->emitted) {
+		return false;
+	}
+	ring = oldest->fence->ring;
+	radeon_fence_poll(rdev, ring);
+	if (rdev->fence_drv[ring].last_seq < oldest->fence->seq) {
+		return false;
+	}
+	free_count++;
+	/* go over the remaining of the list and try to free as much
+	 * as possible
+	 */
+	sa_bo = oldest;
+	list_for_each_entry_safe_continue(sa_bo, tmp, &sa_manager->sa_bo, list) {
+		if (sa_bo->fence == NULL || !sa_bo->fence->emitted) {
+			radeon_sa_bo_free_locked(rdev, oldest);
+			return true;
+		}
+		if (ring != sa_bo->fence->ring) {
+			ring = sa_bo->fence->ring;
+			radeon_fence_poll(rdev, ring);
+		}
+		if (rdev->fence_drv[ring].last_seq < sa_bo->fence->seq) {
 			radeon_sa_bo_free_locked(rdev, oldest);
 			return true;
 		}
+		radeon_sa_bo_free_locked(rdev, sa_bo);
+		free_count++;
 	}
-	return false;
+	radeon_sa_bo_free_locked(rdev, oldest);
+	return true;
 }
 
 /*
@@ -260,7 +301,7 @@ retry:
 		}
 	}
 	/* try to be optimist and free the oldest one */
-	if (radeon_sa_manager_try_free(rdev, oldest)) {
+	if (radeon_sa_manager_try_free(rdev, sa_manager, oldest)) {
 		goto retry;
 	}
 
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/4] drm/radeon: rework fence handling, drop fence list
  2012-05-02 20:20 ` [PATCH 3/4] drm/radeon: rework fence handling, drop fence list j.glisse
@ 2012-05-02 20:43   ` Adam Jackson
  0 siblings, 0 replies; 17+ messages in thread
From: Adam Jackson @ 2012-05-02 20:43 UTC (permalink / raw)
  To: j.glisse; +Cc: Jerome Glisse, dri-devel

On 5/2/12 4:20 PM, j.glisse@gmail.com wrote:
> +	/* there is small chance that we overwritte a bigger last_emited
> +	 * value, but in normal usage this
> +	 */

Seems unfinished.  Also "overwrite".

- ajax

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t
  2012-05-02 20:20 ` [PATCH 2/4] drm/radeon: convert fence to uint64_t j.glisse
@ 2012-05-03  7:21   ` Michel Dänzer
  2012-05-03 11:39     ` Christian König
  0 siblings, 1 reply; 17+ messages in thread
From: Michel Dänzer @ 2012-05-03  7:21 UTC (permalink / raw)
  To: j.glisse; +Cc: dri-devel

On Mit, 2012-05-02 at 16:20 -0400, j.glisse@gmail.com wrote: 
> From: Jerome Glisse <jglisse@redhat.com>
> 
> This convert fence to use uint64_t sequence number intention is
> to use the fact that uin64_t is big enough that we don't need to
> care about wrap around.
> 
> Tested with and without writeback using 0xFFFFF000 as initial
> fence sequence and thus allowing to test the wrap around from
> 32bits to 64bits.
> 
> Signed-off-by: Jerome Glisse <jglisse@redhat.com>

[...]

> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
> index 7733429..6da1535 100644
> --- a/drivers/gpu/drm/radeon/radeon_fence.c
> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
> @@ -386,9 +388,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
>  			rdev->fence_drv[ring].scratch_reg -
>  			rdev->scratch.reg_base;
>  	}
> -	rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
> +	rdev->fence_drv[ring].cpu_addr = (u64*)&rdev->wb.wb[index/4];

Might want to ensure cpu_addr is 64 bit aligned, or there might be
trouble on some architectures.


With this change, Cayman cards will already use six scratch registers
for the rings. It won't be possible to extend this scheme for even one
additional ring, will it?


-- 
Earthling Michel Dänzer           |                   http://www.amd.com
Libre software enthusiast         |          Debian, X and DRI developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t
  2012-05-03  7:21   ` Michel Dänzer
@ 2012-05-03 11:39     ` Christian König
  2012-05-03 15:56       ` Jerome Glisse
  0 siblings, 1 reply; 17+ messages in thread
From: Christian König @ 2012-05-03 11:39 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: dri-devel

On 03.05.2012 09:21, Michel Dänzer wrote:
> On Mit, 2012-05-02 at 16:20 -0400, j.glisse@gmail.com wrote:
>> From: Jerome Glisse<jglisse@redhat.com>
>>
>> This convert fence to use uint64_t sequence number intention is
>> to use the fact that uin64_t is big enough that we don't need to
>> care about wrap around.
>>
>> Tested with and without writeback using 0xFFFFF000 as initial
>> fence sequence and thus allowing to test the wrap around from
>> 32bits to 64bits.
>>
>> Signed-off-by: Jerome Glisse<jglisse@redhat.com>
> [...]
>
>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
>> index 7733429..6da1535 100644
>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>> @@ -386,9 +388,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
>>   			rdev->fence_drv[ring].scratch_reg -
>>   			rdev->scratch.reg_base;
>>   	}
>> -	rdev->fence_drv[ring].cpu_addr =rdev->wb.wb[index/4];
>> +	rdev->fence_drv[ring].cpu_addr =u64*)&rdev->wb.wb[index/4];
> Might want to ensure cpu_addr is 64 bit aligned, or there might be
> trouble on some architectures.
>
>
> With this change, Cayman cards will already use six scratch registers
> for the rings. It won't be possible to extend this scheme for even one
> additional ring, will it?

That won't work anyway, since not all rings can deal with 64 bit fences, 
so we need to still use 32 bit signaling and extend them to 64 bit while 
processing the fence value.

Already working on that.

Christian.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t
  2012-05-03 11:39     ` Christian König
@ 2012-05-03 15:56       ` Jerome Glisse
  2012-05-03 16:29         ` Alex Deucher
  0 siblings, 1 reply; 17+ messages in thread
From: Jerome Glisse @ 2012-05-03 15:56 UTC (permalink / raw)
  To: Christian König; +Cc: Michel Dänzer, dri-devel

On Thu, May 3, 2012 at 7:39 AM, Christian König <deathsimple@vodafone.de> wrote:
> On 03.05.2012 09:21, Michel Dänzer wrote:
>>
>> On Mit, 2012-05-02 at 16:20 -0400, j.glisse@gmail.com wrote:
>>>
>>> From: Jerome Glisse<jglisse@redhat.com>
>>>
>>> This convert fence to use uint64_t sequence number intention is
>>> to use the fact that uin64_t is big enough that we don't need to
>>> care about wrap around.
>>>
>>> Tested with and without writeback using 0xFFFFF000 as initial
>>> fence sequence and thus allowing to test the wrap around from
>>> 32bits to 64bits.
>>>
>>> Signed-off-by: Jerome Glisse<jglisse@redhat.com>
>>
>> [...]
>>
>>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c
>>> b/drivers/gpu/drm/radeon/radeon_fence.c
>>> index 7733429..6da1535 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>>> @@ -386,9 +388,9 @@ int radeon_fence_driver_start_ring(struct
>>> radeon_device *rdev, int ring)
>>>                        rdev->fence_drv[ring].scratch_reg -
>>>                        rdev->scratch.reg_base;
>>>        }
>>> -       rdev->fence_drv[ring].cpu_addr =rdev->wb.wb[index/4];
>>> +       rdev->fence_drv[ring].cpu_addr =u64*)&rdev->wb.wb[index/4];
>>
>> Might want to ensure cpu_addr is 64 bit aligned, or there might be
>> trouble on some architectures.
>>
>>
>> With this change, Cayman cards will already use six scratch registers
>> for the rings. It won't be possible to extend this scheme for even one
>> additional ring, will it?
>
>
> That won't work anyway, since not all rings can deal with 64 bit fences, so
> we need to still use 32 bit signaling and extend them to 64 bit while
> processing the fence value.
>
> Already working on that.
>
> Christian.

This patch is fine with ring that can't emit directly 64bits, all you
have to do is fix the emit_fence callback to properly handle it and
then you have to fix the radeon_fence_read which can be move to a ring
specific callback. Anyway point is that patchset works and is fine on
current set of ring we have and it can work as easily for ring without
easy 64bits value emitting. So please explain further why those patch
can't work because as i just explained i don't see why.

I have updated some v2 version of those patchset to handle the cayman
and newer possibly running out of scratch reg and i also fix the
alignment issue to be 64bits
http://people.freedesktop.org/~glisse/reset3/

Cheers,
Jerome

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t
  2012-05-03 15:56       ` Jerome Glisse
@ 2012-05-03 16:29         ` Alex Deucher
  2012-05-03 16:34           ` Jerome Glisse
  0 siblings, 1 reply; 17+ messages in thread
From: Alex Deucher @ 2012-05-03 16:29 UTC (permalink / raw)
  To: Jerome Glisse; +Cc: Christian König, Michel Dänzer, dri-devel

On Thu, May 3, 2012 at 11:56 AM, Jerome Glisse <j.glisse@gmail.com> wrote:
> On Thu, May 3, 2012 at 7:39 AM, Christian König <deathsimple@vodafone.de> wrote:
>> On 03.05.2012 09:21, Michel Dänzer wrote:
>>>
>>> On Mit, 2012-05-02 at 16:20 -0400, j.glisse@gmail.com wrote:
>>>>
>>>> From: Jerome Glisse<jglisse@redhat.com>
>>>>
>>>> This convert fence to use uint64_t sequence number intention is
>>>> to use the fact that uin64_t is big enough that we don't need to
>>>> care about wrap around.
>>>>
>>>> Tested with and without writeback using 0xFFFFF000 as initial
>>>> fence sequence and thus allowing to test the wrap around from
>>>> 32bits to 64bits.
>>>>
>>>> Signed-off-by: Jerome Glisse<jglisse@redhat.com>
>>>
>>> [...]
>>>
>>>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c
>>>> b/drivers/gpu/drm/radeon/radeon_fence.c
>>>> index 7733429..6da1535 100644
>>>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>>>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>>>> @@ -386,9 +388,9 @@ int radeon_fence_driver_start_ring(struct
>>>> radeon_device *rdev, int ring)
>>>>                        rdev->fence_drv[ring].scratch_reg -
>>>>                        rdev->scratch.reg_base;
>>>>        }
>>>> -       rdev->fence_drv[ring].cpu_addr =rdev->wb.wb[index/4];
>>>> +       rdev->fence_drv[ring].cpu_addr =u64*)&rdev->wb.wb[index/4];
>>>
>>> Might want to ensure cpu_addr is 64 bit aligned, or there might be
>>> trouble on some architectures.
>>>
>>>
>>> With this change, Cayman cards will already use six scratch registers
>>> for the rings. It won't be possible to extend this scheme for even one
>>> additional ring, will it?
>>
>>
>> That won't work anyway, since not all rings can deal with 64 bit fences, so
>> we need to still use 32 bit signaling and extend them to 64 bit while
>> processing the fence value.
>>
>> Already working on that.
>>
>> Christian.
>
> This patch is fine with ring that can't emit directly 64bits, all you
> have to do is fix the emit_fence callback to properly handle it and
> then you have to fix the radeon_fence_read which can be move to a ring
> specific callback. Anyway point is that patchset works and is fine on
> current set of ring we have and it can work as easily for ring without
> easy 64bits value emitting. So please explain further why those patch
> can't work because as i just explained i don't see why.
>
> I have updated some v2 version of those patchset to handle the cayman
> and newer possibly running out of scratch reg and i also fix the
> alignment issue to be 64bits

FWIW, we don't actually use scratch regs any more on r6xx+ (non-AGP at
least), it's just memory writes so we could make the scratch pool
bigger.

Alex

> http://people.freedesktop.org/~glisse/reset3/
>
> Cheers,
> Jerome
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t
  2012-05-03 16:29         ` Alex Deucher
@ 2012-05-03 16:34           ` Jerome Glisse
  2012-05-03 16:45             ` Christian König
  2012-05-03 20:46             ` Jerome Glisse
  0 siblings, 2 replies; 17+ messages in thread
From: Jerome Glisse @ 2012-05-03 16:34 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Christian König, Michel Dänzer, dri-devel

On Thu, May 3, 2012 at 12:29 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
> On Thu, May 3, 2012 at 11:56 AM, Jerome Glisse <j.glisse@gmail.com> wrote:
>> On Thu, May 3, 2012 at 7:39 AM, Christian König <deathsimple@vodafone.de> wrote:
>>> On 03.05.2012 09:21, Michel Dänzer wrote:
>>>>
>>>> On Mit, 2012-05-02 at 16:20 -0400, j.glisse@gmail.com wrote:
>>>>>
>>>>> From: Jerome Glisse<jglisse@redhat.com>
>>>>>
>>>>> This convert fence to use uint64_t sequence number intention is
>>>>> to use the fact that uin64_t is big enough that we don't need to
>>>>> care about wrap around.
>>>>>
>>>>> Tested with and without writeback using 0xFFFFF000 as initial
>>>>> fence sequence and thus allowing to test the wrap around from
>>>>> 32bits to 64bits.
>>>>>
>>>>> Signed-off-by: Jerome Glisse<jglisse@redhat.com>
>>>>
>>>> [...]
>>>>
>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c
>>>>> b/drivers/gpu/drm/radeon/radeon_fence.c
>>>>> index 7733429..6da1535 100644
>>>>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>>>>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>>>>> @@ -386,9 +388,9 @@ int radeon_fence_driver_start_ring(struct
>>>>> radeon_device *rdev, int ring)
>>>>>                        rdev->fence_drv[ring].scratch_reg -
>>>>>                        rdev->scratch.reg_base;
>>>>>        }
>>>>> -       rdev->fence_drv[ring].cpu_addr =rdev->wb.wb[index/4];
>>>>> +       rdev->fence_drv[ring].cpu_addr =u64*)&rdev->wb.wb[index/4];
>>>>
>>>> Might want to ensure cpu_addr is 64 bit aligned, or there might be
>>>> trouble on some architectures.
>>>>
>>>>
>>>> With this change, Cayman cards will already use six scratch registers
>>>> for the rings. It won't be possible to extend this scheme for even one
>>>> additional ring, will it?
>>>
>>>
>>> That won't work anyway, since not all rings can deal with 64 bit fences, so
>>> we need to still use 32 bit signaling and extend them to 64 bit while
>>> processing the fence value.
>>>
>>> Already working on that.
>>>
>>> Christian.
>>
>> This patch is fine with ring that can't emit directly 64bits, all you
>> have to do is fix the emit_fence callback to properly handle it and
>> then you have to fix the radeon_fence_read which can be move to a ring
>> specific callback. Anyway point is that patchset works and is fine on
>> current set of ring we have and it can work as easily for ring without
>> easy 64bits value emitting. So please explain further why those patch
>> can't work because as i just explained i don't see why.
>>
>> I have updated some v2 version of those patchset to handle the cayman
>> and newer possibly running out of scratch reg and i also fix the
>> alignment issue to be 64bits
>
> FWIW, we don't actually use scratch regs any more on r6xx+ (non-AGP at
> least), it's just memory writes so we could make the scratch pool
> bigger.
>
> Alex
>

That's what my v2 does, just drop scratch reg for cayman and newer.

Cheers,
Jerome

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t
  2012-05-03 16:34           ` Jerome Glisse
@ 2012-05-03 16:45             ` Christian König
  2012-05-03 20:46             ` Jerome Glisse
  1 sibling, 0 replies; 17+ messages in thread
From: Christian König @ 2012-05-03 16:45 UTC (permalink / raw)
  To: Jerome Glisse; +Cc: r, dri-devel, =?ISO-8859-1?Q?Michel_D=E4nze?=

On 03.05.2012 18:34, Jerome Glisse wrote:
> On Thu, May 3, 2012 at 12:29 PM, Alex Deucher<alexdeucher@gmail.com>  wrote:
>> On Thu, May 3, 2012 at 11:56 AM, Jerome Glisse<j.glisse@gmail.com>  wrote:
>>> On Thu, May 3, 2012 at 7:39 AM, Christian König<deathsimple@vodafone.de>  wrote:
>>>> On 03.05.2012 09:21, Michel Dänzer wrote:
>>>>> On Mit, 2012-05-02 at 16:20 -0400, j.glisse@gmail.com wrote:
>>>>>> From: Jerome Glisse<jglisse@redhat.com>
>>>>>>
>>>>>> This convert fence to use uint64_t sequence number intention is
>>>>>> to use the fact that uin64_t is big enough that we don't need to
>>>>>> care about wrap around.
>>>>>>
>>>>>> Tested with and without writeback using 0xFFFFF000 as initial
>>>>>> fence sequence and thus allowing to test the wrap around from
>>>>>> 32bits to 64bits.
>>>>>>
>>>>>> Signed-off-by: Jerome Glisse<jglisse@redhat.com>
>>>>> [...]
>>>>>
>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>> b/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>> index 7733429..6da1535 100644
>>>>>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>> @@ -386,9 +388,9 @@ int radeon_fence_driver_start_ring(struct
>>>>>> radeon_device *rdev, int ring)
>>>>>>                         rdev->fence_drv[ring].scratch_reg -
>>>>>>                         rdev->scratch.reg_base;
>>>>>>         }
>>>>>> -       rdev->fence_drv[ring].cpu_addr =rdev->wb.wb[index/4];
>>>>>> +       rdev->fence_drv[ring].cpu_addr =u64*)&rdev->wb.wb[index/4];
>>>>> Might want to ensure cpu_addr is 64 bit aligned, or there might be
>>>>> trouble on some architectures.
>>>>>
>>>>>
>>>>> With this change, Cayman cards will already use six scratch registers
>>>>> for the rings. It won't be possible to extend this scheme for even one
>>>>> additional ring, will it?
>>>>
>>>> That won't work anyway, since not all rings can deal with 64 bit fences, so
>>>> we need to still use 32 bit signaling and extend them to 64 bit while
>>>> processing the fence value.
>>>>
>>>> Already working on that.
>>>>
>>>> Christian.
>>> This patch is fine with ring that can't emit directly 64bits, all you
>>> have to do is fix the emit_fence callback to properly handle it and
>>> then you have to fix the radeon_fence_read which can be move to a ring
>>> specific callback. Anyway point is that patchset works and is fine on
>>> current set of ring we have and it can work as easily for ring without
>>> easy 64bits value emitting. So please explain further why those patch
>>> can't work because as i just explained i don't see why.
>>>
>>> I have updated some v2 version of those patchset to handle the cayman
>>> and newer possibly running out of scratch reg and i also fix the
>>> alignment issue to be 64bits
>> FWIW, we don't actually use scratch regs any more on r6xx+ (non-AGP at
>> least), it's just memory writes so we could make the scratch pool
>> bigger.
>>
>> Alex
>>
> That's what my v2 does, just drop scratch reg for cayman and newer.
>
> Cheers,
> Jerome
>
I actually always wanted to change that in a way that scratch regs are 
only allocated if wb is really disabled, just never had time to do so 
(sigh).

Cheers,
Christian.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t
  2012-05-03 16:34           ` Jerome Glisse
  2012-05-03 16:45             ` Christian König
@ 2012-05-03 20:46             ` Jerome Glisse
  2012-05-03 21:04               ` Alex Deucher
  1 sibling, 1 reply; 17+ messages in thread
From: Jerome Glisse @ 2012-05-03 20:46 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Christian König, Michel Dänzer, dri-devel

On Thu, May 3, 2012 at 12:34 PM, Jerome Glisse <j.glisse@gmail.com> wrote:
> On Thu, May 3, 2012 at 12:29 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
>> On Thu, May 3, 2012 at 11:56 AM, Jerome Glisse <j.glisse@gmail.com> wrote:
>>> On Thu, May 3, 2012 at 7:39 AM, Christian König <deathsimple@vodafone.de> wrote:
>>>> On 03.05.2012 09:21, Michel Dänzer wrote:
>>>>>
>>>>> On Mit, 2012-05-02 at 16:20 -0400, j.glisse@gmail.com wrote:
>>>>>>
>>>>>> From: Jerome Glisse<jglisse@redhat.com>
>>>>>>
>>>>>> This convert fence to use uint64_t sequence number intention is
>>>>>> to use the fact that uin64_t is big enough that we don't need to
>>>>>> care about wrap around.
>>>>>>
>>>>>> Tested with and without writeback using 0xFFFFF000 as initial
>>>>>> fence sequence and thus allowing to test the wrap around from
>>>>>> 32bits to 64bits.
>>>>>>
>>>>>> Signed-off-by: Jerome Glisse<jglisse@redhat.com>
>>>>>
>>>>> [...]
>>>>>
>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>> b/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>> index 7733429..6da1535 100644
>>>>>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>> @@ -386,9 +388,9 @@ int radeon_fence_driver_start_ring(struct
>>>>>> radeon_device *rdev, int ring)
>>>>>>                        rdev->fence_drv[ring].scratch_reg -
>>>>>>                        rdev->scratch.reg_base;
>>>>>>        }
>>>>>> -       rdev->fence_drv[ring].cpu_addr =rdev->wb.wb[index/4];
>>>>>> +       rdev->fence_drv[ring].cpu_addr =u64*)&rdev->wb.wb[index/4];
>>>>>
>>>>> Might want to ensure cpu_addr is 64 bit aligned, or there might be
>>>>> trouble on some architectures.
>>>>>
>>>>>
>>>>> With this change, Cayman cards will already use six scratch registers
>>>>> for the rings. It won't be possible to extend this scheme for even one
>>>>> additional ring, will it?
>>>>
>>>>
>>>> That won't work anyway, since not all rings can deal with 64 bit fences, so
>>>> we need to still use 32 bit signaling and extend them to 64 bit while
>>>> processing the fence value.
>>>>
>>>> Already working on that.
>>>>
>>>> Christian.
>>>
>>> This patch is fine with ring that can't emit directly 64bits, all you
>>> have to do is fix the emit_fence callback to properly handle it and
>>> then you have to fix the radeon_fence_read which can be move to a ring
>>> specific callback. Anyway point is that patchset works and is fine on
>>> current set of ring we have and it can work as easily for ring without
>>> easy 64bits value emitting. So please explain further why those patch
>>> can't work because as i just explained i don't see why.
>>>
>>> I have updated some v2 version of those patchset to handle the cayman
>>> and newer possibly running out of scratch reg and i also fix the
>>> alignment issue to be 64bits
>>
>> FWIW, we don't actually use scratch regs any more on r6xx+ (non-AGP at
>> least), it's just memory writes so we could make the scratch pool
>> bigger.
>>
>> Alex
>>
>
> That's what my v2 does, just drop scratch reg for cayman and newer.
>
> Cheers,
> Jerome

Btw uploaded a v3 with some more thing like warnononce for extra
safety, better comment and trying to mitigate race btw cpu reading and
gpu writing fence.
http://people.freedesktop.org/~glisse/reset3/

Cheers,
Jerome

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t
  2012-05-03 20:46             ` Jerome Glisse
@ 2012-05-03 21:04               ` Alex Deucher
  2012-05-03 21:06                 ` [PATCH] drm/radeon: clarify and extend wb setup on APUs and NI+ asics alexdeucher
  2012-05-03 21:36                 ` Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t Jerome Glisse
  0 siblings, 2 replies; 17+ messages in thread
From: Alex Deucher @ 2012-05-03 21:04 UTC (permalink / raw)
  To: Jerome Glisse; +Cc: Christian König, Michel Dänzer, dri-devel

On Thu, May 3, 2012 at 4:46 PM, Jerome Glisse <j.glisse@gmail.com> wrote:
> On Thu, May 3, 2012 at 12:34 PM, Jerome Glisse <j.glisse@gmail.com> wrote:
>> On Thu, May 3, 2012 at 12:29 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
>>> On Thu, May 3, 2012 at 11:56 AM, Jerome Glisse <j.glisse@gmail.com> wrote:
>>>> On Thu, May 3, 2012 at 7:39 AM, Christian König <deathsimple@vodafone.de> wrote:
>>>>> On 03.05.2012 09:21, Michel Dänzer wrote:
>>>>>>
>>>>>> On Mit, 2012-05-02 at 16:20 -0400, j.glisse@gmail.com wrote:
>>>>>>>
>>>>>>> From: Jerome Glisse<jglisse@redhat.com>
>>>>>>>
>>>>>>> This convert fence to use uint64_t sequence number intention is
>>>>>>> to use the fact that uin64_t is big enough that we don't need to
>>>>>>> care about wrap around.
>>>>>>>
>>>>>>> Tested with and without writeback using 0xFFFFF000 as initial
>>>>>>> fence sequence and thus allowing to test the wrap around from
>>>>>>> 32bits to 64bits.
>>>>>>>
>>>>>>> Signed-off-by: Jerome Glisse<jglisse@redhat.com>
>>>>>>
>>>>>> [...]
>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>>> b/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>>> index 7733429..6da1535 100644
>>>>>>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>>> @@ -386,9 +388,9 @@ int radeon_fence_driver_start_ring(struct
>>>>>>> radeon_device *rdev, int ring)
>>>>>>>                        rdev->fence_drv[ring].scratch_reg -
>>>>>>>                        rdev->scratch.reg_base;
>>>>>>>        }
>>>>>>> -       rdev->fence_drv[ring].cpu_addr =rdev->wb.wb[index/4];
>>>>>>> +       rdev->fence_drv[ring].cpu_addr =u64*)&rdev->wb.wb[index/4];
>>>>>>
>>>>>> Might want to ensure cpu_addr is 64 bit aligned, or there might be
>>>>>> trouble on some architectures.
>>>>>>
>>>>>>
>>>>>> With this change, Cayman cards will already use six scratch registers
>>>>>> for the rings. It won't be possible to extend this scheme for even one
>>>>>> additional ring, will it?
>>>>>
>>>>>
>>>>> That won't work anyway, since not all rings can deal with 64 bit fences, so
>>>>> we need to still use 32 bit signaling and extend them to 64 bit while
>>>>> processing the fence value.
>>>>>
>>>>> Already working on that.
>>>>>
>>>>> Christian.
>>>>
>>>> This patch is fine with ring that can't emit directly 64bits, all you
>>>> have to do is fix the emit_fence callback to properly handle it and
>>>> then you have to fix the radeon_fence_read which can be move to a ring
>>>> specific callback. Anyway point is that patchset works and is fine on
>>>> current set of ring we have and it can work as easily for ring without
>>>> easy 64bits value emitting. So please explain further why those patch
>>>> can't work because as i just explained i don't see why.
>>>>
>>>> I have updated some v2 version of those patchset to handle the cayman
>>>> and newer possibly running out of scratch reg and i also fix the
>>>> alignment issue to be 64bits
>>>
>>> FWIW, we don't actually use scratch regs any more on r6xx+ (non-AGP at
>>> least), it's just memory writes so we could make the scratch pool
>>> bigger.
>>>
>>> Alex
>>>
>>
>> That's what my v2 does, just drop scratch reg for cayman and newer.
>>
>> Cheers,
>> Jerome
>
> Btw uploaded a v3 with some more thing like warnononce for extra
> safety, better comment and trying to mitigate race btw cpu reading and
> gpu writing fence.
> http://people.freedesktop.org/~glisse/reset3/

In patch:
http://people.freedesktop.org/~glisse/reset3/0003-drm-radeon-rework-fence-handling-drop-fence-list-v3.patch

You can drop this hunk:
+	if (rdev->family >= CHIP_CAYMAN) {
+		/* because there is so many ring on newer GPU we can't use
+		 * scratch reg thus we need to use event, on those GPU there
+		 * is no AGP version and writting to system ram have always
+		 * been reliable so far
+		 */
+		rdev->wb.enabled = true;
+		rdev->wb.use_event = true;
+	}

As the code right below it does the exact same thing.  It could
probably be extended to APUs as well since they will never have AGP
support.  I'll send out a patch.

Alex

>
> Cheers,
> Jerome

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH] drm/radeon: clarify and extend wb setup on APUs and NI+ asics
  2012-05-03 21:04               ` Alex Deucher
@ 2012-05-03 21:06                 ` alexdeucher
  2012-05-04  5:47                   ` Michel Dänzer
  2012-05-03 21:36                 ` Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t Jerome Glisse
  1 sibling, 1 reply; 17+ messages in thread
From: alexdeucher @ 2012-05-03 21:06 UTC (permalink / raw)
  To: airlied, dri-devel; +Cc: Alex Deucher

From: Alex Deucher <alexander.deucher@amd.com>

Use family rather than DCE check for clarity, also always use
wb on APUs, there will never be AGP variants.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_device.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 94f8561..8479617 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -241,8 +241,8 @@ int radeon_wb_init(struct radeon_device *rdev)
 				rdev->wb.use_event = true;
 		}
 	}
-	/* always use writeback/events on NI */
-	if (ASIC_IS_DCE5(rdev)) {
+	/* always use writeback/events on NI, APUs */
+	if (rdev->family >= CHIP_PALM) {
 		rdev->wb.enabled = true;
 		rdev->wb.use_event = true;
 	}
-- 
1.7.7.5

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t
  2012-05-03 21:04               ` Alex Deucher
  2012-05-03 21:06                 ` [PATCH] drm/radeon: clarify and extend wb setup on APUs and NI+ asics alexdeucher
@ 2012-05-03 21:36                 ` Jerome Glisse
  1 sibling, 0 replies; 17+ messages in thread
From: Jerome Glisse @ 2012-05-03 21:36 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Christian König, Michel Dänzer, dri-devel

On Thu, May 3, 2012 at 5:04 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
> On Thu, May 3, 2012 at 4:46 PM, Jerome Glisse <j.glisse@gmail.com> wrote:
>> On Thu, May 3, 2012 at 12:34 PM, Jerome Glisse <j.glisse@gmail.com> wrote:
>>> On Thu, May 3, 2012 at 12:29 PM, Alex Deucher <alexdeucher@gmail.com> wrote:
>>>> On Thu, May 3, 2012 at 11:56 AM, Jerome Glisse <j.glisse@gmail.com> wrote:
>>>>> On Thu, May 3, 2012 at 7:39 AM, Christian König <deathsimple@vodafone.de> wrote:
>>>>>> On 03.05.2012 09:21, Michel Dänzer wrote:
>>>>>>>
>>>>>>> On Mit, 2012-05-02 at 16:20 -0400, j.glisse@gmail.com wrote:
>>>>>>>>
>>>>>>>> From: Jerome Glisse<jglisse@redhat.com>
>>>>>>>>
>>>>>>>> This convert fence to use uint64_t sequence number intention is
>>>>>>>> to use the fact that uin64_t is big enough that we don't need to
>>>>>>>> care about wrap around.
>>>>>>>>
>>>>>>>> Tested with and without writeback using 0xFFFFF000 as initial
>>>>>>>> fence sequence and thus allowing to test the wrap around from
>>>>>>>> 32bits to 64bits.
>>>>>>>>
>>>>>>>> Signed-off-by: Jerome Glisse<jglisse@redhat.com>
>>>>>>>
>>>>>>> [...]
>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>>>> b/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>>>> index 7733429..6da1535 100644
>>>>>>>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>>>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>>>>>>>> @@ -386,9 +388,9 @@ int radeon_fence_driver_start_ring(struct
>>>>>>>> radeon_device *rdev, int ring)
>>>>>>>>                        rdev->fence_drv[ring].scratch_reg -
>>>>>>>>                        rdev->scratch.reg_base;
>>>>>>>>        }
>>>>>>>> -       rdev->fence_drv[ring].cpu_addr =rdev->wb.wb[index/4];
>>>>>>>> +       rdev->fence_drv[ring].cpu_addr =u64*)&rdev->wb.wb[index/4];
>>>>>>>
>>>>>>> Might want to ensure cpu_addr is 64 bit aligned, or there might be
>>>>>>> trouble on some architectures.
>>>>>>>
>>>>>>>
>>>>>>> With this change, Cayman cards will already use six scratch registers
>>>>>>> for the rings. It won't be possible to extend this scheme for even one
>>>>>>> additional ring, will it?
>>>>>>
>>>>>>
>>>>>> That won't work anyway, since not all rings can deal with 64 bit fences, so
>>>>>> we need to still use 32 bit signaling and extend them to 64 bit while
>>>>>> processing the fence value.
>>>>>>
>>>>>> Already working on that.
>>>>>>
>>>>>> Christian.
>>>>>
>>>>> This patch is fine with ring that can't emit directly 64bits, all you
>>>>> have to do is fix the emit_fence callback to properly handle it and
>>>>> then you have to fix the radeon_fence_read which can be move to a ring
>>>>> specific callback. Anyway point is that patchset works and is fine on
>>>>> current set of ring we have and it can work as easily for ring without
>>>>> easy 64bits value emitting. So please explain further why those patch
>>>>> can't work because as i just explained i don't see why.
>>>>>
>>>>> I have updated some v2 version of those patchset to handle the cayman
>>>>> and newer possibly running out of scratch reg and i also fix the
>>>>> alignment issue to be 64bits
>>>>
>>>> FWIW, we don't actually use scratch regs any more on r6xx+ (non-AGP at
>>>> least), it's just memory writes so we could make the scratch pool
>>>> bigger.
>>>>
>>>> Alex
>>>>
>>>
>>> That's what my v2 does, just drop scratch reg for cayman and newer.
>>>
>>> Cheers,
>>> Jerome
>>
>> Btw uploaded a v3 with some more thing like warnononce for extra
>> safety, better comment and trying to mitigate race btw cpu reading and
>> gpu writing fence.
>> http://people.freedesktop.org/~glisse/reset3/
>
> In patch:
> http://people.freedesktop.org/~glisse/reset3/0003-drm-radeon-rework-fence-handling-drop-fence-list-v3.patch
>
> You can drop this hunk:
> +       if (rdev->family >= CHIP_CAYMAN) {
> +               /* because there is so many ring on newer GPU we can't use
> +                * scratch reg thus we need to use event, on those GPU there
> +                * is no AGP version and writting to system ram have always
> +                * been reliable so far
> +                */
> +               rdev->wb.enabled = true;
> +               rdev->wb.use_event = true;
> +       }
>
> As the code right below it does the exact same thing.  It could
> probably be extended to APUs as well since they will never have AGP
> support.  I'll send out a patch.
>
> Alex
>

Good catch updated patch
http://people.freedesktop.org/~glisse/reset3/

Cheers,
Jerome

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] drm/radeon: clarify and extend wb setup on APUs and NI+ asics
  2012-05-03 21:06                 ` [PATCH] drm/radeon: clarify and extend wb setup on APUs and NI+ asics alexdeucher
@ 2012-05-04  5:47                   ` Michel Dänzer
  0 siblings, 0 replies; 17+ messages in thread
From: Michel Dänzer @ 2012-05-04  5:47 UTC (permalink / raw)
  To: alexdeucher; +Cc: dri-devel

On Don, 2012-05-03 at 17:06 -0400, alexdeucher@gmail.com wrote: 
> From: Alex Deucher <alexander.deucher@amd.com>
> 
> Use family rather than DCE check for clarity, also always use
> wb on APUs, there will never be AGP variants.
> 
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>


-- 
Earthling Michel Dänzer           |                   http://www.amd.com
Libre software enthusiast         |          Debian, X and DRI developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2012-05-04  5:47 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-02 20:20 [RFC] Convert fence to use 64bits sequence j.glisse
2012-05-02 20:20 ` [PATCH 1/4] drm/radeon: allow to allocate adjacent scratch reg j.glisse
2012-05-02 20:20 ` [PATCH 2/4] drm/radeon: convert fence to uint64_t j.glisse
2012-05-03  7:21   ` Michel Dänzer
2012-05-03 11:39     ` Christian König
2012-05-03 15:56       ` Jerome Glisse
2012-05-03 16:29         ` Alex Deucher
2012-05-03 16:34           ` Jerome Glisse
2012-05-03 16:45             ` Christian König
2012-05-03 20:46             ` Jerome Glisse
2012-05-03 21:04               ` Alex Deucher
2012-05-03 21:06                 ` [PATCH] drm/radeon: clarify and extend wb setup on APUs and NI+ asics alexdeucher
2012-05-04  5:47                   ` Michel Dänzer
2012-05-03 21:36                 ` Re: [PATCH 2/4] drm/radeon: convert fence to uint64_t Jerome Glisse
2012-05-02 20:20 ` [PATCH 3/4] drm/radeon: rework fence handling, drop fence list j.glisse
2012-05-02 20:43   ` Adam Jackson
2012-05-02 20:20 ` [PATCH 4/4] drm/radeon: improve sa allocator to agressivly free idle bo j.glisse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.