All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Reserve space improvements
@ 2015-06-24 17:03 John.C.Harrison
  2015-06-25 18:38 ` Tomas Elf
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: John.C.Harrison @ 2015-06-24 17:03 UTC (permalink / raw)
  To: Intel-GFX

From: John Harrison <John.C.Harrison@Intel.com>

An earlier patch was added to reserve space in the ring buffer for the
commands issued during 'add_request()'. The initial version was
pessimistic in the way it handled buffer wrapping and would cause
premature wraps and thus waste ring space.

This patch updates the code to better handle the wrap case. It no
longer enforces that the space being asked for and the reserved space
are a single contiguous block. Instead, it allows the reserve to be on
the far end of a wrap operation. It still guarantees that the space is
available so when the wrap occurs, no wait will happen. Thus the wrap
cannot fail which is the whole point of the exercise.

Also fixed a merge failure with some comments from the original patch.

For: VIZ-5115
CC: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 54 +++++++++++++-----------
 drivers/gpu/drm/i915/intel_ringbuffer.c | 73 +++++++++++++++++++++------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +-
 3 files changed, 79 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b36e064..e998a54 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -663,12 +663,12 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
 	unsigned space;
 	int ret;
 
-	/* The whole point of reserving space is to not wait! */
-	WARN_ON(ringbuf->reserved_in_use);
-
 	if (intel_ring_space(ringbuf) >= bytes)
 		return 0;
 
+	/* The whole point of reserving space is to not wait! */
+	WARN_ON(ringbuf->reserved_in_use);
+
 	list_for_each_entry(target, &ring->request_list, list) {
 		/*
 		 * The request queue is per-engine, so can contain requests
@@ -724,12 +724,13 @@ static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
 	uint32_t __iomem *virt;
 	int rem = ringbuf->size - ringbuf->tail;
 
-	/* Can't wrap if space has already been reserved! */
-	WARN_ON(ringbuf->reserved_in_use);
-
 	if (ringbuf->space < rem) {
-		int ret = logical_ring_wait_for_space(req, rem);
+		int ret;
+
+		/* Can't wait if space has already been reserved! */
+		WARN_ON(ringbuf->reserved_in_use);
 
+		ret = logical_ring_wait_for_space(req, rem);
 		if (ret)
 			return ret;
 	}
@@ -748,31 +749,36 @@ static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
 static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
 {
 	struct intel_ringbuffer *ringbuf = req->ringbuf;
-	int ret;
-
-	/*
-	 * Add on the reserved size to the request to make sure that after
-	 * the intended commands have been emitted, there is guaranteed to
-	 * still be enough free space to send them to the hardware.
-	 */
-	if (!ringbuf->reserved_in_use)
-		bytes += ringbuf->reserved_size;
+	int ret, max_bytes;
 
 	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
 		ret = logical_ring_wrap_buffer(req);
 		if (unlikely(ret))
 			return ret;
+	}
 
-		if(ringbuf->reserved_size) {
-			uint32_t size = ringbuf->reserved_size;
+	/*
+	 * Add on the reserved size to the request to make sure that after
+	 * the intended commands have been emitted, there is guaranteed to
+	 * still be enough free space to send them to the hardware.
+	 */
+	max_bytes = bytes + ringbuf->reserved_size;
 
-			intel_ring_reserved_space_cancel(ringbuf);
-			intel_ring_reserved_space_reserve(ringbuf, size);
-		}
-	}
+	if (unlikely(ringbuf->space < max_bytes)) {
+		/*
+		 * Bytes is guaranteed to fit within the tail of the buffer,
+		 * but the reserved space may push it off the end. If so then
+		 * need to wait for the whole of the tail plus the reserved
+		 * size. That should guarantee that the actual request
+		 * (bytes) will fit between here and the end and the reserved
+		 * usage will fit either in the same or at the start. Either
+		 * way, if a wrap occurs it will not involve a wait and thus
+		 * cannot fail.
+		 */
+		if (unlikely(ringbuf->tail + max_bytes + I915_RING_FREE_SPACE > ringbuf->effective_size))
+			max_bytes = ringbuf->reserved_size + I915_RING_FREE_SPACE + ringbuf->size - ringbuf->tail;
 
-	if (unlikely(ringbuf->space < bytes)) {
-		ret = logical_ring_wait_for_space(req, bytes);
+		ret = logical_ring_wait_for_space(req, max_bytes);
 		if (unlikely(ret))
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index af7c12e..7c5b4c2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2121,12 +2121,12 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	unsigned space;
 	int ret;
 
-	/* The whole point of reserving space is to not wait! */
-	WARN_ON(ringbuf->reserved_in_use);
-
 	if (intel_ring_space(ringbuf) >= n)
 		return 0;
 
+	/* The whole point of reserving space is to not wait! */
+	WARN_ON(ringbuf->reserved_in_use);
+
 	list_for_each_entry(request, &ring->request_list, list) {
 		space = __intel_ring_space(request->postfix, ringbuf->tail,
 					   ringbuf->size);
@@ -2151,11 +2151,13 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 	struct intel_ringbuffer *ringbuf = ring->buffer;
 	int rem = ringbuf->size - ringbuf->tail;
 
-	/* Can't wrap if space has already been reserved! */
-	WARN_ON(ringbuf->reserved_in_use);
-
 	if (ringbuf->space < rem) {
-		int ret = ring_wait_for_space(ring, rem);
+		int ret;
+
+		/* Can't wait if space has already been reserved! */
+		WARN_ON(ringbuf->reserved_in_use);
+
+		ret = ring_wait_for_space(ring, rem);
 		if (ret)
 			return ret;
 	}
@@ -2238,9 +2240,21 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
 void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
 {
 	WARN_ON(!ringbuf->reserved_in_use);
-	WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
-	     "request reserved size too small: %d vs %d!\n",
-	     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
+	if (ringbuf->tail > ringbuf->reserved_tail) {
+		WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
+		     "request reserved size too small: %d vs %d!\n",
+		     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
+	} else {
+		/*
+		 * The ring was wrapped while the reserved space was in use.
+		 * That means that some unknown amount of the ring tail was
+		 * no-op filled and skipped. Thus simply adding the ring size
+		 * to the tail and doing the above space check will not work.
+		 * Rather than attempt to track how much tail was skipped,
+		 * it is much simpler to say that also skipping the sanity
+		 * check every once in a while is not a big issue.
+		 */
+	}
 
 	ringbuf->reserved_size   = 0;
 	ringbuf->reserved_in_use = false;
@@ -2249,31 +2263,36 @@ void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
 static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
-	int ret;
-
-	/*
-	 * Add on the reserved size to the request to make sure that after
-	 * the intended commands have been emitted, there is guaranteed to
-	 * still be enough free space to send them to the hardware.
-	 */
-	if (!ringbuf->reserved_in_use)
-		bytes += ringbuf->reserved_size;
+	int ret, max_bytes;
 
 	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
 		ret = intel_wrap_ring_buffer(ring);
 		if (unlikely(ret))
 			return ret;
+	}
 
-		if(ringbuf->reserved_size) {
-			uint32_t size = ringbuf->reserved_size;
+	/*
+	 * Add on the reserved size to the request to make sure that after
+	 * the intended commands have been emitted, there is guaranteed to
+	 * still be enough free space to send them to the hardware.
+	 */
+	max_bytes = bytes + ringbuf->reserved_size;
 
-			intel_ring_reserved_space_cancel(ringbuf);
-			intel_ring_reserved_space_reserve(ringbuf, size);
-		}
-	}
+	if (unlikely(ringbuf->space < max_bytes)) {
+		/*
+		 * Bytes is guaranteed to fit within the tail of the buffer,
+		 * but the reserved space may push it off the end. If so then
+		 * need to wait for the whole of the tail plus the reserved
+		 * size. That should guarantee that the actual request
+		 * (bytes) will fit between here and the end and the reserved
+		 * usage will fit either in the same or at the start. Either
+		 * way, if a wrap occurs it will not involve a wait and thus
+		 * cannot fail.
+		 */
+		if (unlikely(ringbuf->tail + max_bytes > ringbuf->effective_size))
+			max_bytes = ringbuf->reserved_size + I915_RING_FREE_SPACE + ringbuf->size - ringbuf->tail;
 
-	if (unlikely(ringbuf->space < bytes)) {
-		ret = ring_wait_for_space(ring, bytes);
+		ret = ring_wait_for_space(ring, max_bytes);
 		if (unlikely(ret))
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0e2bbc6..304cac4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -473,7 +473,6 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
  * will always have sufficient room to do its stuff. The request creation
  * code calls this automatically.
  */
-int intel_ring_reserve_space(struct drm_i915_gem_request *request);
 void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size);
 /* Cancel the reservation, e.g. because the request is being discarded. */
 void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf);
@@ -482,4 +481,7 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf);
 /* Finish with the reserved space - for use by i915_add_request() only. */
 void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
 
+/* Legacy ringbuffer specific portion of reservation code: */
+int intel_ring_reserve_space(struct drm_i915_gem_request *request);
+
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Reserve space improvements
  2015-06-24 17:03 [PATCH] drm/i915: Reserve space improvements John.C.Harrison
@ 2015-06-25 18:38 ` Tomas Elf
  2015-06-26 18:04   ` Dave Gordon
  2015-06-28 21:11 ` shuang.he
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: Tomas Elf @ 2015-06-25 18:38 UTC (permalink / raw)
  To: John.C.Harrison, Intel-GFX

On 24/06/2015 18:03, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> An earlier patch was added to reserve space in the ring buffer for the
> commands issued during 'add_request()'. The initial version was
> pessimistic in the way it handled buffer wrapping and would cause
> premature wraps and thus waste ring space.
>
> This patch updates the code to better handle the wrap case. It no
> longer enforces that the space being asked for and the reserved space
> are a single contiguous block. Instead, it allows the reserve to be on
> the far end of a wrap operation. It still guarantees that the space is
> available so when the wrap occurs, no wait will happen. Thus the wrap
> cannot fail which is the whole point of the exercise.
>
> Also fixed a merge failure with some comments from the original patch.
>
> For: VIZ-5115
> CC: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c        | 54 +++++++++++++-----------
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 73 +++++++++++++++++++++------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +-
>   3 files changed, 79 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b36e064..e998a54 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -663,12 +663,12 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
>   	unsigned space;
>   	int ret;
>
> -	/* The whole point of reserving space is to not wait! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
>   	if (intel_ring_space(ringbuf) >= bytes)
>   		return 0;
>
> +	/* The whole point of reserving space is to not wait! */
> +	WARN_ON(ringbuf->reserved_in_use);
> +
>   	list_for_each_entry(target, &ring->request_list, list) {
>   		/*
>   		 * The request queue is per-engine, so can contain requests
> @@ -724,12 +724,13 @@ static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
>   	uint32_t __iomem *virt;
>   	int rem = ringbuf->size - ringbuf->tail;
>
> -	/* Can't wrap if space has already been reserved! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
>   	if (ringbuf->space < rem) {
> -		int ret = logical_ring_wait_for_space(req, rem);
> +		int ret;
> +
> +		/* Can't wait if space has already been reserved! */
> +		WARN_ON(ringbuf->reserved_in_use);

You already do this WARN_ON in logical_ring_wait_for_space() .

>
> +		ret = logical_ring_wait_for_space(req, rem);
>   		if (ret)
>   			return ret;
>   	}
> @@ -748,31 +749,36 @@ static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
>   static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
>   {
>   	struct intel_ringbuffer *ringbuf = req->ringbuf;
> -	int ret;
> -
> -	/*
> -	 * Add on the reserved size to the request to make sure that after
> -	 * the intended commands have been emitted, there is guaranteed to
> -	 * still be enough free space to send them to the hardware.
> -	 */
> -	if (!ringbuf->reserved_in_use)
> -		bytes += ringbuf->reserved_size;
> +	int ret, max_bytes;
>

It would be helpful if we could flesh out the flow through the 
ring_prepare functions and be more explicit about what is actually going 
on. Largely this is because there is a distinct lack of documentation 
for the entire ring buffer management code on top of a quite 
counter-intuitive legacy design, so this is not due to your changes. 
However, your changes make things even more complex and hard to 
understand. So I've suggested a few comments below. Feel free to reword 
or do whatever with them. It would be nice if we could be slightly more 
clear about what is going on here, though. The same comments apply to 
both legacy and execlist function implementations obviously.
	
	/*
	 * If the request minus the reserved request size will not fit
	 * before the buffer wrap point we need to wrap the buffer here.
	 * Past this block we know for sure that the request minus
	 * the reserved request size will fit in the buffer without
	 * further wrapping.
	 */

>   	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
>   		ret = logical_ring_wrap_buffer(req);
>   		if (unlikely(ret))
>   			return ret;
> +	}
>
> -		if(ringbuf->reserved_size) {
> -			uint32_t size = ringbuf->reserved_size;
> +	/*
> +	 * Add on the reserved size to the request to make sure that after
> +	 * the intended commands have been emitted, there is guaranteed to
> +	 * still be enough free space to send them to the hardware.
> +	 */
> +	max_bytes = bytes + ringbuf->reserved_size;
>
> -			intel_ring_reserved_space_cancel(ringbuf);
> -			intel_ring_reserved_space_reserve(ringbuf, size);
> -		}
> -	}
> +	if (unlikely(ringbuf->space < max_bytes)) {
> +		/*
> +		 * Bytes is guaranteed to fit within the tail of the buffer,
> +		 * but the reserved space may push it off the end. If so then
> +		 * need to wait for the whole of the tail plus the reserved
> +		 * size. That should guarantee that the actual request
> +		 * (bytes) will fit between here and the end and the reserved
> +		 * usage will fit either in the same or at the start. Either
> +		 * way, if a wrap occurs it will not involve a wait and thus
> +		 * cannot fail.
> +		 */

How about replacing the above comment with:

		/*
		 * At this point we know that the request minus the
		 * reserved size (= bytes) is guaranteed to fit within
		 * the space left before the buffer wrap point.
		 * However, adding the reserved request space on top of
		 * it will not fit within the remaining space in the
  		 * buffer - we need to wait for space to free up.
		 */

> +		if (unlikely(ringbuf->tail + max_bytes + I915_RING_FREE_SPACE > ringbuf->effective_size))
> +			max_bytes = ringbuf->reserved_size + I915_RING_FREE_SPACE + ringbuf->size - ringbuf->tail;

You could augment the two lines above like the following:

                 if (unlikely(ringbuf->tail + max_bytes + 
I915_RING_FREE_SPACE > ringbuf->effective_size)) {
		/*
		 * We may have to wrap the buffer at some point in
		 * order to fit the reserved request space. This
		 * requires us not only to wait for the reserved
		 * request space but on top of that also wait
		 * for enough space to cover the buffer space that is
		 * discarded during a potential buffer wrap. The lost
		 * buffer space is unknown at this point so we do a
		 * pessimistic estimation. Since we know that the
		 * remaining buffer space before the wrap point
		 * is large enough to contain the request minus the
		 * reserved buffer size we don't have to think about
		 * that (= bytes). Instead we just have to add the
		 * remaining reserved request size to the estimated
		 * buffer space lost during wrapping.
		 */
		int lost_wrap_space = I915_RING_FREE_SPACE + ringbuf->size - 
ringbuf->tail;

                 max_bytes = ringbuf->reserved_size + lost_wrap_space;
		}
>
> -	if (unlikely(ringbuf->space < bytes)) {
> -		ret = logical_ring_wait_for_space(req, bytes);

How about adding something like this at this point? :

		/*
		 * Since we are waiting up front at this point there is
		 * no need to wait again in the case of a wrap. Thus,
		 * removing the only point of failure.
		 */
> +		ret = logical_ring_wait_for_space(req, max_bytes);
>   		if (unlikely(ret))
>   			return ret;
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index af7c12e..7c5b4c2 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2121,12 +2121,12 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
>   	unsigned space;
>   	int ret;
>
> -	/* The whole point of reserving space is to not wait! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
>   	if (intel_ring_space(ringbuf) >= n)
>   		return 0;
>
> +	/* The whole point of reserving space is to not wait! */
> +	WARN_ON(ringbuf->reserved_in_use);
> +
>   	list_for_each_entry(request, &ring->request_list, list) {
>   		space = __intel_ring_space(request->postfix, ringbuf->tail,
>   					   ringbuf->size);
> @@ -2151,11 +2151,13 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
>   	struct intel_ringbuffer *ringbuf = ring->buffer;
>   	int rem = ringbuf->size - ringbuf->tail;
>
> -	/* Can't wrap if space has already been reserved! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
>   	if (ringbuf->space < rem) {
> -		int ret = ring_wait_for_space(ring, rem);
> +		int ret;
> +
> +		/* Can't wait if space has already been reserved! */
> +		WARN_ON(ringbuf->reserved_in_use);
> +
> +		ret = ring_wait_for_space(ring, rem);
>   		if (ret)
>   			return ret;
>   	}
> @@ -2238,9 +2240,21 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
>   {
>   	WARN_ON(!ringbuf->reserved_in_use);
> -	WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
> -	     "request reserved size too small: %d vs %d!\n",
> -	     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
> +	if (ringbuf->tail > ringbuf->reserved_tail) {
> +		WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
> +		     "request reserved size too small: %d vs %d!\n",
> +		     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
> +	} else {
> +		/*
> +		 * The ring was wrapped while the reserved space was in use.
> +		 * That means that some unknown amount of the ring tail was
> +		 * no-op filled and skipped. Thus simply adding the ring size
> +		 * to the tail and doing the above space check will not work.
> +		 * Rather than attempt to track how much tail was skipped,
> +		 * it is much simpler to say that also skipping the sanity
> +		 * check every once in a while is not a big issue.
> +		 */
> +	}
>
>   	ringbuf->reserved_size   = 0;
>   	ringbuf->reserved_in_use = false;
> @@ -2249,31 +2263,36 @@ void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
>   static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
>   {
>   	struct intel_ringbuffer *ringbuf = ring->buffer;
> -	int ret;
> -
> -	/*
> -	 * Add on the reserved size to the request to make sure that after
> -	 * the intended commands have been emitted, there is guaranteed to
> -	 * still be enough free space to send them to the hardware.
> -	 */
> -	if (!ringbuf->reserved_in_use)
> -		bytes += ringbuf->reserved_size;
> +	int ret, max_bytes;
>
>   	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
>   		ret = intel_wrap_ring_buffer(ring);
>   		if (unlikely(ret))
>   			return ret;
> +	}
>
> -		if(ringbuf->reserved_size) {
> -			uint32_t size = ringbuf->reserved_size;
> +	/*
> +	 * Add on the reserved size to the request to make sure that after
> +	 * the intended commands have been emitted, there is guaranteed to
> +	 * still be enough free space to send them to the hardware.
> +	 */
> +	max_bytes = bytes + ringbuf->reserved_size;
>
> -			intel_ring_reserved_space_cancel(ringbuf);
> -			intel_ring_reserved_space_reserve(ringbuf, size);
> -		}
> -	}
> +	if (unlikely(ringbuf->space < max_bytes)) {
> +		/*
> +		 * Bytes is guaranteed to fit within the tail of the buffer,
> +		 * but the reserved space may push it off the end. If so then
> +		 * need to wait for the whole of the tail plus the reserved
> +		 * size. That should guarantee that the actual request
> +		 * (bytes) will fit between here and the end and the reserved
> +		 * usage will fit either in the same or at the start. Either
> +		 * way, if a wrap occurs it will not involve a wait and thus
> +		 * cannot fail.
> +		 */
> +		if (unlikely(ringbuf->tail + max_bytes > ringbuf->effective_size))
> +			max_bytes = ringbuf->reserved_size + I915_RING_FREE_SPACE + ringbuf->size - ringbuf->tail;
>
> -	if (unlikely(ringbuf->space < bytes)) {
> -		ret = ring_wait_for_space(ring, bytes);
> +		ret = ring_wait_for_space(ring, max_bytes);
>   		if (unlikely(ret))
>   			return ret;
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 0e2bbc6..304cac4 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -473,7 +473,6 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>    * will always have sufficient room to do its stuff. The request creation
>    * code calls this automatically.
>    */
> -int intel_ring_reserve_space(struct drm_i915_gem_request *request);
>   void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size);
>   /* Cancel the reservation, e.g. because the request is being discarded. */
>   void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf);
> @@ -482,4 +481,7 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf);
>   /* Finish with the reserved space - for use by i915_add_request() only. */
>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
>
> +/* Legacy ringbuffer specific portion of reservation code: */
> +int intel_ring_reserve_space(struct drm_i915_gem_request *request);
> +
>   #endif /* _INTEL_RINGBUFFER_H_ */
>

Thanks,
Tomas

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Reserve space improvements
  2015-06-25 18:38 ` Tomas Elf
@ 2015-06-26 18:04   ` Dave Gordon
  0 siblings, 0 replies; 14+ messages in thread
From: Dave Gordon @ 2015-06-26 18:04 UTC (permalink / raw)
  To: intel-gfx, Harrison, John C, Elf, Tomas

On 25/06/15 19:38, Tomas Elf wrote:
> On 24/06/2015 18:03, John.C.Harrison@Intel.com wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> An earlier patch was added to reserve space in the ring buffer for the
>> commands issued during 'add_request()'. The initial version was
>> pessimistic in the way it handled buffer wrapping and would cause
>> premature wraps and thus waste ring space.
>>
>> This patch updates the code to better handle the wrap case. It no
>> longer enforces that the space being asked for and the reserved space
>> are a single contiguous block. Instead, it allows the reserve to be on
>> the far end of a wrap operation. It still guarantees that the space is
>> available so when the wrap occurs, no wait will happen. Thus the wrap
>> cannot fail which is the whole point of the exercise.
>>
>> Also fixed a merge failure with some comments from the original patch.
>>
>> For: VIZ-5115
>> CC: Daniel Vetter <daniel@ffwll.ch>
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_lrc.c        | 54 +++++++++++++-----------
>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 73
>> +++++++++++++++++++++------------
>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +-
>>   3 files changed, 79 insertions(+), 52 deletions(-)

[snip]

>> @@ -748,31 +749,36 @@ static int logical_ring_wrap_buffer(struct
>> drm_i915_gem_request *req)
>>   static int logical_ring_prepare(struct drm_i915_gem_request *req,
>> int bytes)
>>   {
>>       struct intel_ringbuffer *ringbuf = req->ringbuf;
>> -    int ret;
>> -
>> -    /*
>> -     * Add on the reserved size to the request to make sure that after
>> -     * the intended commands have been emitted, there is guaranteed to
>> -     * still be enough free space to send them to the hardware.
>> -     */
>> -    if (!ringbuf->reserved_in_use)
>> -        bytes += ringbuf->reserved_size;
>> +    int ret, max_bytes;
>>
> 
> It would be helpful if we could flesh out the flow through the
> ring_prepare functions and be more explicit about what is actually going
> on. Largely this is because there is a distinct lack of documentation
> for the entire ring buffer management code on top of a quite
> counter-intuitive legacy design, so this is not due to your changes.
> However, your changes make things even more complex and hard to
> understand. So I've suggested a few comments below. Feel free to reword
> or do whatever with them. It would be nice if we could be slightly more
> clear about what is going on here, though. The same comments apply to
> both legacy and execlist function implementations obviously.

It would be simpler to understand if the unnecessary complication of
unlikely(wait-for-space-and-wrap) followed by unlikely(wait-for-space)
were simplified into a single process of "precalculate space required,
allowing for wrapping where necessary; wait for that much space; finally
fill the tail of the ring if we determined that wrapping was necessary".

This change can be applied before any of the complications of "reserving
space". See
http://lists.freedesktop.org/archives/intel-gfx/2015-June/068545.html

Secondly, a simpler way to implement "reserved space" is just to have
the "calculate remaining space" function intel_ring_space() deduct the
amount-reserved-for-add-request from its result UNLESS the
"use-reserved-space" flag has been set. It already deducts
I915_RING_FREE_SPACE, which represents the h/w limitation of the minimum
space required between tail and head, so conditionally deducting the
extra amount would be easy; then all other code could continue to use
the value it returns when checking for enough space. Nothing else would
have to know about "reserved space" at all.

.Dave.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Reserve space improvements
  2015-06-24 17:03 [PATCH] drm/i915: Reserve space improvements John.C.Harrison
  2015-06-25 18:38 ` Tomas Elf
@ 2015-06-28 21:11 ` shuang.he
  2015-06-29 16:36 ` John.C.Harrison
  2015-06-30 11:40 ` John.C.Harrison
  3 siblings, 0 replies; 14+ messages in thread
From: shuang.he @ 2015-06-28 21:11 UTC (permalink / raw)
  To: shuang.he, lei.a.liu, intel-gfx, John.C.Harrison

Tested-By: Intel Graphics QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6575
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
ILK                 -4              303/303              299/303
SNB                 -5              312/312              307/312
IVB                 -9              343/343              334/343
BYT                 -5              284/284              279/284
HSW                 -8              380/380              372/380
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*ILK  igt@gem_ringfill@blitter      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*ILK  igt@gem_ringfill@blitter-interruptible      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*ILK  igt@gem_userptr_blits@forked-sync-multifd-normal      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*ILK  igt@gem_userptr_blits@forked-unsync-normal      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*SNB  igt@gem_ring_sync_copy@sync-render-blitter-read-write      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*SNB  igt@gem_ring_sync_copy@sync-render-blitter-write-read      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*SNB  igt@gem_ring_sync_copy@sync-render-blitter-write-write      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*SNB  igt@gem_userptr_blits@coherency-sync      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*SNB  igt@pm_rps@min-max-config-loaded      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
WARNING:at_drivers/gpu/drm/i915/i915_gem.c:#__i915_add_request[i915]()@WARNING:.* at .* __i915_add_request+0x
*IVB  igt@gem_linear_blits@interruptible      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*IVB  igt@gem_linear_blits@normal      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*IVB  igt@gem_ringfill@blitter      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*IVB  igt@gem_ringfill@blitter-interruptible      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*IVB  igt@gem_tiled_blits@interruptible      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*IVB  igt@gem_tiled_blits@normal      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*IVB  igt@gem_userptr_blits@coherency-sync      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*IVB  igt@gem_userptr_blits@coherency-unsync      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*IVB  igt@pm_rps@min-max-config-loaded      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
WARNING:at_drivers/gpu/drm/i915/i915_gem.c:#__i915_add_request[i915]()@WARNING:.* at .* __i915_add_request+0x
*BYT  igt@gem_gtt_hog      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*BYT  igt@gem_partial_pwrite_pread@reads-display      PASS(1)      FAIL(1)
*BYT  igt@gem_ringfill@blitter      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*BYT  igt@gem_ringfill@blitter-interruptible      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*BYT  igt@gem_ringfill@render-interruptible      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*HSW  igt@gem_linear_blits@interruptible      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*HSW  igt@gem_linear_blits@normal      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*HSW  igt@gem_ringfill@blitter      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*HSW  igt@gem_ringfill@blitter-interruptible      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*HSW  igt@gem_tiled_blits@interruptible      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*HSW  igt@gem_tiled_blits@normal      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*HSW  igt@gem_userptr_blits@coherency-sync      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
*HSW  igt@gem_userptr_blits@coherency-unsync      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)WARNING:at_drivers/gpu/drm/i915/intel_ringbuffer.c:#ring_wait_for_space[i915]()@WARNING:.* at .* ring_wait_for_space+0x
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] drm/i915: Reserve space improvements
  2015-06-24 17:03 [PATCH] drm/i915: Reserve space improvements John.C.Harrison
  2015-06-25 18:38 ` Tomas Elf
  2015-06-28 21:11 ` shuang.he
@ 2015-06-29 16:36 ` John.C.Harrison
  2015-06-30  7:26   ` shuang.he
  2015-06-30 11:33   ` Tomas Elf
  2015-06-30 11:40 ` John.C.Harrison
  3 siblings, 2 replies; 14+ messages in thread
From: John.C.Harrison @ 2015-06-29 16:36 UTC (permalink / raw)
  To: Intel-GFX

From: John Harrison <John.C.Harrison@Intel.com>

An earlier patch was added to reserve space in the ring buffer for the
commands issued during 'add_request()'. The initial version was
pessimistic in the way it handled buffer wrapping and would cause
premature wraps and thus waste ring space.

This patch updates the code to better handle the wrap case. It no
longer enforces that the space being asked for and the reserved space
are a single contiguous block. Instead, it allows the reserve to be on
the far end of a wrap operation. It still guarantees that the space is
available so when the wrap occurs, no wait will happen. Thus the wrap
cannot fail which is the whole point of the exercise.

Also fixed a merge failure with some comments from the original patch.

v2: Incorporated suggestion by David Gordon to move the wrap code
inside the prepare function and thus allow a single combined
wait_for_space() call rather than doing one before the wrap and
another after. This also makes the prepare code much simpler and
easier to follow.

For: VIZ-5115
CC: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 72 +++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.c | 89 ++++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +-
 3 files changed, 88 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b36e064..a41936b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -663,12 +663,12 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
 	unsigned space;
 	int ret;
 
-	/* The whole point of reserving space is to not wait! */
-	WARN_ON(ringbuf->reserved_in_use);
-
 	if (intel_ring_space(ringbuf) >= bytes)
 		return 0;
 
+	/* The whole point of reserving space is to not wait! */
+	WARN_ON(ringbuf->reserved_in_use);
+
 	list_for_each_entry(target, &ring->request_list, list) {
 		/*
 		 * The request queue is per-engine, so can contain requests
@@ -718,22 +718,11 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 	execlists_context_queue(request);
 }
 
-static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
+static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
 {
-	struct intel_ringbuffer *ringbuf = req->ringbuf;
 	uint32_t __iomem *virt;
 	int rem = ringbuf->size - ringbuf->tail;
 
-	/* Can't wrap if space has already been reserved! */
-	WARN_ON(ringbuf->reserved_in_use);
-
-	if (ringbuf->space < rem) {
-		int ret = logical_ring_wait_for_space(req, rem);
-
-		if (ret)
-			return ret;
-	}
-
 	virt = ringbuf->virtual_start + ringbuf->tail;
 	rem /= 4;
 	while (rem--)
@@ -741,40 +730,49 @@ static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
 
 	ringbuf->tail = 0;
 	intel_ring_update_space(ringbuf);
-
-	return 0;
 }
 
 static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
 {
 	struct intel_ringbuffer *ringbuf = req->ringbuf;
-	int ret;
-
-	/*
-	 * Add on the reserved size to the request to make sure that after
-	 * the intended commands have been emitted, there is guaranteed to
-	 * still be enough free space to send them to the hardware.
-	 */
-	if (!ringbuf->reserved_in_use)
-		bytes += ringbuf->reserved_size;
-
-	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
-		ret = logical_ring_wrap_buffer(req);
-		if (unlikely(ret))
-			return ret;
+	int remain = ringbuf->effective_size - ringbuf->tail;
+	int ret, total_bytes, wait_bytes = 0;
+	bool need_wrap = false;
 
-		if(ringbuf->reserved_size) {
-			uint32_t size = ringbuf->reserved_size;
+	if (ringbuf->reserved_in_use)
+		total_bytes = bytes;
+	else
+		total_bytes = bytes + ringbuf->reserved_size;
 
-			intel_ring_reserved_space_cancel(ringbuf);
-			intel_ring_reserved_space_reserve(ringbuf, size);
+	if (unlikely(bytes > remain)) {
+		/*
+		 * Not enough space for the basic request. So need to flush
+		 * out the remainder and then wait for base + reserved.
+		 */
+		wait_bytes = remain + total_bytes;
+		need_wrap = true;
+	} else {
+		if (unlikely(total_bytes > remain)) {
+			/*
+			 * The base request will fit but the reserved space
+			 * falls off the end. So only need to to wait for the
+			 * reserved size after flushing out the remainder.
+			 */
+			wait_bytes = remain + ringbuf->reserved_size;
+			need_wrap = true;
+		} else if (total_bytes > ringbuf->space) {
+			/* No wrapping required, just waiting. */
+			wait_bytes = total_bytes;
 		}
 	}
 
-	if (unlikely(ringbuf->space < bytes)) {
-		ret = logical_ring_wait_for_space(req, bytes);
+	if (wait_bytes) {
+		ret = logical_ring_wait_for_space(req, wait_bytes);
 		if (unlikely(ret))
 			return ret;
+
+		if (need_wrap)
+			__wrap_ring_buffer(ringbuf);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index af7c12e..9b10019 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2121,12 +2121,12 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	unsigned space;
 	int ret;
 
-	/* The whole point of reserving space is to not wait! */
-	WARN_ON(ringbuf->reserved_in_use);
-
 	if (intel_ring_space(ringbuf) >= n)
 		return 0;
 
+	/* The whole point of reserving space is to not wait! */
+	WARN_ON(ringbuf->reserved_in_use);
+
 	list_for_each_entry(request, &ring->request_list, list) {
 		space = __intel_ring_space(request->postfix, ringbuf->tail,
 					   ringbuf->size);
@@ -2145,21 +2145,11 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	return 0;
 }
 
-static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
+static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
 {
 	uint32_t __iomem *virt;
-	struct intel_ringbuffer *ringbuf = ring->buffer;
 	int rem = ringbuf->size - ringbuf->tail;
 
-	/* Can't wrap if space has already been reserved! */
-	WARN_ON(ringbuf->reserved_in_use);
-
-	if (ringbuf->space < rem) {
-		int ret = ring_wait_for_space(ring, rem);
-		if (ret)
-			return ret;
-	}
-
 	virt = ringbuf->virtual_start + ringbuf->tail;
 	rem /= 4;
 	while (rem--)
@@ -2167,8 +2157,6 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 
 	ringbuf->tail = 0;
 	intel_ring_update_space(ringbuf);
-
-	return 0;
 }
 
 int intel_ring_idle(struct intel_engine_cs *ring)
@@ -2238,9 +2226,21 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
 void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
 {
 	WARN_ON(!ringbuf->reserved_in_use);
-	WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
-	     "request reserved size too small: %d vs %d!\n",
-	     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
+	if (ringbuf->tail > ringbuf->reserved_tail) {
+		WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
+		     "request reserved size too small: %d vs %d!\n",
+		     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
+	} else {
+		/*
+		 * The ring was wrapped while the reserved space was in use.
+		 * That means that some unknown amount of the ring tail was
+		 * no-op filled and skipped. Thus simply adding the ring size
+		 * to the tail and doing the above space check will not work.
+		 * Rather than attempt to track how much tail was skipped,
+		 * it is much simpler to say that also skipping the sanity
+		 * check every once in a while is not a big issue.
+		 */
+	}
 
 	ringbuf->reserved_size   = 0;
 	ringbuf->reserved_in_use = false;
@@ -2249,33 +2249,44 @@ void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
 static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
-	int ret;
-
-	/*
-	 * Add on the reserved size to the request to make sure that after
-	 * the intended commands have been emitted, there is guaranteed to
-	 * still be enough free space to send them to the hardware.
-	 */
-	if (!ringbuf->reserved_in_use)
-		bytes += ringbuf->reserved_size;
-
-	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
-		ret = intel_wrap_ring_buffer(ring);
-		if (unlikely(ret))
-			return ret;
+	int remain = ringbuf->effective_size - ringbuf->tail;
+	int ret, total_bytes, wait_bytes = 0;
+	bool need_wrap = false;
 
-		if(ringbuf->reserved_size) {
-			uint32_t size = ringbuf->reserved_size;
+	if (ringbuf->reserved_in_use)
+		total_bytes = bytes;
+	else
+		total_bytes = bytes + ringbuf->reserved_size;
 
-			intel_ring_reserved_space_cancel(ringbuf);
-			intel_ring_reserved_space_reserve(ringbuf, size);
+	if (unlikely(bytes > remain)) {
+		/*
+		 * Not enough space for the basic request. So need to flush
+		 * out the remainder and then wait for base + reserved.
+		 */
+		wait_bytes = remain + total_bytes;
+		need_wrap = true;
+	} else {
+		if (unlikely(total_bytes > remain)) {
+			/*
+			 * The base request will fit but the reserved space
+			 * falls off the end. So only need to to wait for the
+			 * reserved size after flushing out the remainder.
+			 */
+			wait_bytes = remain + ringbuf->reserved_size;
+			need_wrap = true;
+		} else if (total_bytes > ringbuf->space) {
+			/* No wrapping required, just waiting. */
+			wait_bytes = total_bytes;
 		}
 	}
 
-	if (unlikely(ringbuf->space < bytes)) {
-		ret = ring_wait_for_space(ring, bytes);
+	if (wait_bytes) {
+		ret = ring_wait_for_space(ring, wait_bytes);
 		if (unlikely(ret))
 			return ret;
+
+		if (need_wrap)
+			__wrap_ring_buffer(ringbuf);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0e2bbc6..304cac4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -473,7 +473,6 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
  * will always have sufficient room to do its stuff. The request creation
  * code calls this automatically.
  */
-int intel_ring_reserve_space(struct drm_i915_gem_request *request);
 void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size);
 /* Cancel the reservation, e.g. because the request is being discarded. */
 void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf);
@@ -482,4 +481,7 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf);
 /* Finish with the reserved space - for use by i915_add_request() only. */
 void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
 
+/* Legacy ringbuffer specific portion of reservation code: */
+int intel_ring_reserve_space(struct drm_i915_gem_request *request);
+
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Reserve space improvements
  2015-06-29 16:36 ` John.C.Harrison
@ 2015-06-30  7:26   ` shuang.he
  2015-06-30 11:33   ` Tomas Elf
  1 sibling, 0 replies; 14+ messages in thread
From: shuang.he @ 2015-06-30  7:26 UTC (permalink / raw)
  To: shuang.he, lei.a.liu, intel-gfx, John.C.Harrison

Tested-By: Intel Graphics QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6668
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
ILK                                  302/302              302/302
SNB                                  312/316              312/316
IVB                                  343/343              343/343
BYT                 -2              287/287              285/287
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*BYT  igt@gem_partial_pwrite_pread@reads      PASS(1)      FAIL(1)
*BYT  igt@gem_partial_pwrite_pread@reads-display      PASS(1)      FAIL(1)
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Reserve space improvements
  2015-06-29 16:36 ` John.C.Harrison
  2015-06-30  7:26   ` shuang.he
@ 2015-06-30 11:33   ` Tomas Elf
  1 sibling, 0 replies; 14+ messages in thread
From: Tomas Elf @ 2015-06-30 11:33 UTC (permalink / raw)
  To: John.C.Harrison, Intel-GFX

On 29/06/2015 17:36, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> An earlier patch was added to reserve space in the ring buffer for the
> commands issued during 'add_request()'. The initial version was
> pessimistic in the way it handled buffer wrapping and would cause
> premature wraps and thus waste ring space.
>
> This patch updates the code to better handle the wrap case. It no
> longer enforces that the space being asked for and the reserved space
> are a single contiguous block. Instead, it allows the reserve to be on
> the far end of a wrap operation. It still guarantees that the space is
> available so when the wrap occurs, no wait will happen. Thus the wrap
> cannot fail which is the whole point of the exercise.
>
> Also fixed a merge failure with some comments from the original patch.
>
> v2: Incorporated suggestion by David Gordon to move the wrap code
> inside the prepare function and thus allow a single combined
> wait_for_space() call rather than doing one before the wrap and
> another after. This also makes the prepare code much simpler and
> easier to follow.
>
> For: VIZ-5115
> CC: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c        | 72 +++++++++++++-------------
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 89 ++++++++++++++++++---------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +-
>   3 files changed, 88 insertions(+), 77 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b36e064..a41936b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -663,12 +663,12 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
>   	unsigned space;
>   	int ret;
>
> -	/* The whole point of reserving space is to not wait! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
>   	if (intel_ring_space(ringbuf) >= bytes)
>   		return 0;
>
> +	/* The whole point of reserving space is to not wait! */
> +	WARN_ON(ringbuf->reserved_in_use);
> +
>   	list_for_each_entry(target, &ring->request_list, list) {
>   		/*
>   		 * The request queue is per-engine, so can contain requests
> @@ -718,22 +718,11 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>   	execlists_context_queue(request);
>   }
>
> -static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
> +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
>   {
> -	struct intel_ringbuffer *ringbuf = req->ringbuf;
>   	uint32_t __iomem *virt;
>   	int rem = ringbuf->size - ringbuf->tail;
>
> -	/* Can't wrap if space has already been reserved! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
> -	if (ringbuf->space < rem) {
> -		int ret = logical_ring_wait_for_space(req, rem);
> -
> -		if (ret)
> -			return ret;
> -	}
> -
>   	virt = ringbuf->virtual_start + ringbuf->tail;
>   	rem /= 4;
>   	while (rem--)
> @@ -741,40 +730,49 @@ static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
>
>   	ringbuf->tail = 0;
>   	intel_ring_update_space(ringbuf);
> -
> -	return 0;
>   }
>
>   static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
>   {
>   	struct intel_ringbuffer *ringbuf = req->ringbuf;
> -	int ret;
> -
> -	/*
> -	 * Add on the reserved size to the request to make sure that after
> -	 * the intended commands have been emitted, there is guaranteed to
> -	 * still be enough free space to send them to the hardware.
> -	 */
> -	if (!ringbuf->reserved_in_use)
> -		bytes += ringbuf->reserved_size;
> -
> -	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
> -		ret = logical_ring_wrap_buffer(req);
> -		if (unlikely(ret))
> -			return ret;
> +	int remain = ringbuf->effective_size - ringbuf->tail;
> +	int ret, total_bytes, wait_bytes = 0;
> +	bool need_wrap = false;
>
> -		if(ringbuf->reserved_size) {
> -			uint32_t size = ringbuf->reserved_size;
> +	if (ringbuf->reserved_in_use)
> +		total_bytes = bytes;
> +	else
> +		total_bytes = bytes + ringbuf->reserved_size;
>
> -			intel_ring_reserved_space_cancel(ringbuf);
> -			intel_ring_reserved_space_reserve(ringbuf, size);
> +	if (unlikely(bytes > remain)) {
> +		/*
> +		 * Not enough space for the basic request. So need to flush
> +		 * out the remainder and then wait for base + reserved.
> +		 */
> +		wait_bytes = remain + total_bytes;
> +		need_wrap = true;
> +	} else {
> +		if (unlikely(total_bytes > remain)) {
> +			/*
> +			 * The base request will fit but the reserved space
> +			 * falls off the end. So only need to to wait for the
> +			 * reserved size after flushing out the remainder.
> +			 */
> +			wait_bytes = remain + ringbuf->reserved_size;
> +			need_wrap = true;
> +		} else if (total_bytes > ringbuf->space) {
> +			/* No wrapping required, just waiting. */
> +			wait_bytes = total_bytes;
>   		}
>   	}
>
> -	if (unlikely(ringbuf->space < bytes)) {
> -		ret = logical_ring_wait_for_space(req, bytes);
> +	if (wait_bytes) {
> +		ret = logical_ring_wait_for_space(req, wait_bytes);

Here we're potentially waiting for an amount of space that is 
optimistically computed (ringbuf->effective_size - ringbuf->tail). If we 
want to stay consistent, that is stay pessimistic, we should use 
ringbuf->size - ringbuf->tail so that we're waiting for the biggest 
possible size here.

>   		if (unlikely(ret))
>   			return ret;
> +
> +		if (need_wrap)
> +			__wrap_ring_buffer(ringbuf);
>   	}
>
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index af7c12e..9b10019 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2121,12 +2121,12 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
>   	unsigned space;
>   	int ret;
>
> -	/* The whole point of reserving space is to not wait! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
>   	if (intel_ring_space(ringbuf) >= n)
>   		return 0;
>
> +	/* The whole point of reserving space is to not wait! */
> +	WARN_ON(ringbuf->reserved_in_use);
> +
>   	list_for_each_entry(request, &ring->request_list, list) {
>   		space = __intel_ring_space(request->postfix, ringbuf->tail,
>   					   ringbuf->size);
> @@ -2145,21 +2145,11 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
>   	return 0;
>   }
>
> -static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
> +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
>   {
>   	uint32_t __iomem *virt;
> -	struct intel_ringbuffer *ringbuf = ring->buffer;
>   	int rem = ringbuf->size - ringbuf->tail;
>
> -	/* Can't wrap if space has already been reserved! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
> -	if (ringbuf->space < rem) {
> -		int ret = ring_wait_for_space(ring, rem);
> -		if (ret)
> -			return ret;
> -	}
> -
>   	virt = ringbuf->virtual_start + ringbuf->tail;
>   	rem /= 4;
>   	while (rem--)
> @@ -2167,8 +2157,6 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
>
>   	ringbuf->tail = 0;
>   	intel_ring_update_space(ringbuf);
> -
> -	return 0;
>   }
>
>   int intel_ring_idle(struct intel_engine_cs *ring)
> @@ -2238,9 +2226,21 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
>   {
>   	WARN_ON(!ringbuf->reserved_in_use);
> -	WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
> -	     "request reserved size too small: %d vs %d!\n",
> -	     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
> +	if (ringbuf->tail > ringbuf->reserved_tail) {
> +		WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
> +		     "request reserved size too small: %d vs %d!\n",
> +		     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
> +	} else {
> +		/*
> +		 * The ring was wrapped while the reserved space was in use.
> +		 * That means that some unknown amount of the ring tail was
> +		 * no-op filled and skipped. Thus simply adding the ring size
> +		 * to the tail and doing the above space check will not work.
> +		 * Rather than attempt to track how much tail was skipped,
> +		 * it is much simpler to say that also skipping the sanity
> +		 * check every once in a while is not a big issue.
> +		 */
> +	}
>
>   	ringbuf->reserved_size   = 0;
>   	ringbuf->reserved_in_use = false;
> @@ -2249,33 +2249,44 @@ void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
>   static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
>   {
>   	struct intel_ringbuffer *ringbuf = ring->buffer;
> -	int ret;
> -
> -	/*
> -	 * Add on the reserved size to the request to make sure that after
> -	 * the intended commands have been emitted, there is guaranteed to
> -	 * still be enough free space to send them to the hardware.
> -	 */
> -	if (!ringbuf->reserved_in_use)
> -		bytes += ringbuf->reserved_size;
> -
> -	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
> -		ret = intel_wrap_ring_buffer(ring);
> -		if (unlikely(ret))
> -			return ret;
> +	int remain = ringbuf->effective_size - ringbuf->tail;
> +	int ret, total_bytes, wait_bytes = 0;
> +	bool need_wrap = false;
>
> -		if(ringbuf->reserved_size) {
> -			uint32_t size = ringbuf->reserved_size;
> +	if (ringbuf->reserved_in_use)
> +		total_bytes = bytes;
> +	else
> +		total_bytes = bytes + ringbuf->reserved_size;
>
> -			intel_ring_reserved_space_cancel(ringbuf);
> -			intel_ring_reserved_space_reserve(ringbuf, size);
> +	if (unlikely(bytes > remain)) {
> +		/*
> +		 * Not enough space for the basic request. So need to flush
> +		 * out the remainder and then wait for base + reserved.
> +		 */
> +		wait_bytes = remain + total_bytes;
> +		need_wrap = true;
> +	} else {
> +		if (unlikely(total_bytes > remain)) {
> +			/*
> +			 * The base request will fit but the reserved space
> +			 * falls off the end. So only need to to wait for the
> +			 * reserved size after flushing out the remainder.
> +			 */
> +			wait_bytes = remain + ringbuf->reserved_size;
> +			need_wrap = true;
> +		} else if (total_bytes > ringbuf->space) {
> +			/* No wrapping required, just waiting. */
> +			wait_bytes = total_bytes;
>   		}
>   	}
>
> -	if (unlikely(ringbuf->space < bytes)) {
> -		ret = ring_wait_for_space(ring, bytes);
> +	if (wait_bytes) {
> +		ret = ring_wait_for_space(ring, wait_bytes);

Same issue here as for the LRC implementation.

Thanks,
Tomas

>   		if (unlikely(ret))
>   			return ret;
> +
> +		if (need_wrap)
> +			__wrap_ring_buffer(ringbuf);
>   	}
>
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 0e2bbc6..304cac4 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -473,7 +473,6 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>    * will always have sufficient room to do its stuff. The request creation
>    * code calls this automatically.
>    */
> -int intel_ring_reserve_space(struct drm_i915_gem_request *request);
>   void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size);
>   /* Cancel the reservation, e.g. because the request is being discarded. */
>   void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf);
> @@ -482,4 +481,7 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf);
>   /* Finish with the reserved space - for use by i915_add_request() only. */
>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
>
> +/* Legacy ringbuffer specific portion of reservation code: */
> +int intel_ring_reserve_space(struct drm_i915_gem_request *request);
> +
>   #endif /* _INTEL_RINGBUFFER_H_ */
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] drm/i915: Reserve space improvements
  2015-06-24 17:03 [PATCH] drm/i915: Reserve space improvements John.C.Harrison
                   ` (2 preceding siblings ...)
  2015-06-29 16:36 ` John.C.Harrison
@ 2015-06-30 11:40 ` John.C.Harrison
  2015-06-30 14:43   ` Tomas Elf
  2015-07-01 22:18   ` shuang.he
  3 siblings, 2 replies; 14+ messages in thread
From: John.C.Harrison @ 2015-06-30 11:40 UTC (permalink / raw)
  To: Intel-GFX

From: John Harrison <John.C.Harrison@Intel.com>

An earlier patch was added to reserve space in the ring buffer for the
commands issued during 'add_request()'. The initial version was
pessimistic in the way it handled buffer wrapping and would cause
premature wraps and thus waste ring space.

This patch updates the code to better handle the wrap case. It no
longer enforces that the space being asked for and the reserved space
are a single contiguous block. Instead, it allows the reserve to be on
the far end of a wrap operation. It still guarantees that the space is
available so when the wrap occurs, no wait will happen. Thus the wrap
cannot fail which is the whole point of the exercise.

Also fixed a merge failure with some comments from the original patch.

v2: Incorporated suggestion by David Gordon to move the wrap code
inside the prepare function and thus allow a single combined
wait_for_space() call rather than doing one before the wrap and
another after. This also makes the prepare code much simpler and
easier to follow.

v3: Fix for 'effective_size' vs 'size' during ring buffer remainder
calculations (spotted by Tomas Elf).

For: VIZ-5115
CC: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 73 +++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.c | 90 +++++++++++++++++++--------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +-
 3 files changed, 90 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b36e064..7d9515d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -663,12 +663,12 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
 	unsigned space;
 	int ret;
 
-	/* The whole point of reserving space is to not wait! */
-	WARN_ON(ringbuf->reserved_in_use);
-
 	if (intel_ring_space(ringbuf) >= bytes)
 		return 0;
 
+	/* The whole point of reserving space is to not wait! */
+	WARN_ON(ringbuf->reserved_in_use);
+
 	list_for_each_entry(target, &ring->request_list, list) {
 		/*
 		 * The request queue is per-engine, so can contain requests
@@ -718,22 +718,11 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 	execlists_context_queue(request);
 }
 
-static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
+static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
 {
-	struct intel_ringbuffer *ringbuf = req->ringbuf;
 	uint32_t __iomem *virt;
 	int rem = ringbuf->size - ringbuf->tail;
 
-	/* Can't wrap if space has already been reserved! */
-	WARN_ON(ringbuf->reserved_in_use);
-
-	if (ringbuf->space < rem) {
-		int ret = logical_ring_wait_for_space(req, rem);
-
-		if (ret)
-			return ret;
-	}
-
 	virt = ringbuf->virtual_start + ringbuf->tail;
 	rem /= 4;
 	while (rem--)
@@ -741,40 +730,50 @@ static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
 
 	ringbuf->tail = 0;
 	intel_ring_update_space(ringbuf);
-
-	return 0;
 }
 
 static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
 {
 	struct intel_ringbuffer *ringbuf = req->ringbuf;
-	int ret;
-
-	/*
-	 * Add on the reserved size to the request to make sure that after
-	 * the intended commands have been emitted, there is guaranteed to
-	 * still be enough free space to send them to the hardware.
-	 */
-	if (!ringbuf->reserved_in_use)
-		bytes += ringbuf->reserved_size;
-
-	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
-		ret = logical_ring_wrap_buffer(req);
-		if (unlikely(ret))
-			return ret;
+	int remain_usable = ringbuf->effective_size - ringbuf->tail;
+	int remain_actual = ringbuf->size - ringbuf->tail;
+	int ret, total_bytes, wait_bytes = 0;
+	bool need_wrap = false;
 
-		if(ringbuf->reserved_size) {
-			uint32_t size = ringbuf->reserved_size;
+	if (ringbuf->reserved_in_use)
+		total_bytes = bytes;
+	else
+		total_bytes = bytes + ringbuf->reserved_size;
 
-			intel_ring_reserved_space_cancel(ringbuf);
-			intel_ring_reserved_space_reserve(ringbuf, size);
+	if (unlikely(bytes > remain_usable)) {
+		/*
+		 * Not enough space for the basic request. So need to flush
+		 * out the remainder and then wait for base + reserved.
+		 */
+		wait_bytes = remain_actual + total_bytes;
+		need_wrap = true;
+	} else {
+		if (unlikely(total_bytes > remain_usable)) {
+			/*
+			 * The base request will fit but the reserved space
+			 * falls off the end. So only need to to wait for the
+			 * reserved size after flushing out the remainder.
+			 */
+			wait_bytes = remain_actual + ringbuf->reserved_size;
+			need_wrap = true;
+		} else if (total_bytes > ringbuf->space) {
+			/* No wrapping required, just waiting. */
+			wait_bytes = total_bytes;
 		}
 	}
 
-	if (unlikely(ringbuf->space < bytes)) {
-		ret = logical_ring_wait_for_space(req, bytes);
+	if (wait_bytes) {
+		ret = logical_ring_wait_for_space(req, wait_bytes);
 		if (unlikely(ret))
 			return ret;
+
+		if (need_wrap)
+			__wrap_ring_buffer(ringbuf);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index af7c12e..e39c891 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2121,12 +2121,12 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	unsigned space;
 	int ret;
 
-	/* The whole point of reserving space is to not wait! */
-	WARN_ON(ringbuf->reserved_in_use);
-
 	if (intel_ring_space(ringbuf) >= n)
 		return 0;
 
+	/* The whole point of reserving space is to not wait! */
+	WARN_ON(ringbuf->reserved_in_use);
+
 	list_for_each_entry(request, &ring->request_list, list) {
 		space = __intel_ring_space(request->postfix, ringbuf->tail,
 					   ringbuf->size);
@@ -2145,21 +2145,11 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	return 0;
 }
 
-static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
+static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
 {
 	uint32_t __iomem *virt;
-	struct intel_ringbuffer *ringbuf = ring->buffer;
 	int rem = ringbuf->size - ringbuf->tail;
 
-	/* Can't wrap if space has already been reserved! */
-	WARN_ON(ringbuf->reserved_in_use);
-
-	if (ringbuf->space < rem) {
-		int ret = ring_wait_for_space(ring, rem);
-		if (ret)
-			return ret;
-	}
-
 	virt = ringbuf->virtual_start + ringbuf->tail;
 	rem /= 4;
 	while (rem--)
@@ -2167,8 +2157,6 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 
 	ringbuf->tail = 0;
 	intel_ring_update_space(ringbuf);
-
-	return 0;
 }
 
 int intel_ring_idle(struct intel_engine_cs *ring)
@@ -2238,9 +2226,21 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
 void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
 {
 	WARN_ON(!ringbuf->reserved_in_use);
-	WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
-	     "request reserved size too small: %d vs %d!\n",
-	     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
+	if (ringbuf->tail > ringbuf->reserved_tail) {
+		WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
+		     "request reserved size too small: %d vs %d!\n",
+		     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
+	} else {
+		/*
+		 * The ring was wrapped while the reserved space was in use.
+		 * That means that some unknown amount of the ring tail was
+		 * no-op filled and skipped. Thus simply adding the ring size
+		 * to the tail and doing the above space check will not work.
+		 * Rather than attempt to track how much tail was skipped,
+		 * it is much simpler to say that also skipping the sanity
+		 * check every once in a while is not a big issue.
+		 */
+	}
 
 	ringbuf->reserved_size   = 0;
 	ringbuf->reserved_in_use = false;
@@ -2249,33 +2249,45 @@ void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
 static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
-	int ret;
-
-	/*
-	 * Add on the reserved size to the request to make sure that after
-	 * the intended commands have been emitted, there is guaranteed to
-	 * still be enough free space to send them to the hardware.
-	 */
-	if (!ringbuf->reserved_in_use)
-		bytes += ringbuf->reserved_size;
-
-	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
-		ret = intel_wrap_ring_buffer(ring);
-		if (unlikely(ret))
-			return ret;
+	int remain_usable = ringbuf->effective_size - ringbuf->tail;
+	int remain_actual = ringbuf->size - ringbuf->tail;
+	int ret, total_bytes, wait_bytes = 0;
+	bool need_wrap = false;
 
-		if(ringbuf->reserved_size) {
-			uint32_t size = ringbuf->reserved_size;
+	if (ringbuf->reserved_in_use)
+		total_bytes = bytes;
+	else
+		total_bytes = bytes + ringbuf->reserved_size;
 
-			intel_ring_reserved_space_cancel(ringbuf);
-			intel_ring_reserved_space_reserve(ringbuf, size);
+	if (unlikely(bytes > remain_usable)) {
+		/*
+		 * Not enough space for the basic request. So need to flush
+		 * out the remainder and then wait for base + reserved.
+		 */
+		wait_bytes = remain_actual + total_bytes;
+		need_wrap = true;
+	} else {
+		if (unlikely(total_bytes > remain_usable)) {
+			/*
+			 * The base request will fit but the reserved space
+			 * falls off the end. So only need to to wait for the
+			 * reserved size after flushing out the remainder.
+			 */
+			wait_bytes = remain_actual + ringbuf->reserved_size;
+			need_wrap = true;
+		} else if (total_bytes > ringbuf->space) {
+			/* No wrapping required, just waiting. */
+			wait_bytes = total_bytes;
 		}
 	}
 
-	if (unlikely(ringbuf->space < bytes)) {
-		ret = ring_wait_for_space(ring, bytes);
+	if (wait_bytes) {
+		ret = ring_wait_for_space(ring, wait_bytes);
 		if (unlikely(ret))
 			return ret;
+
+		if (need_wrap)
+			__wrap_ring_buffer(ringbuf);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0e2bbc6..304cac4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -473,7 +473,6 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
  * will always have sufficient room to do its stuff. The request creation
  * code calls this automatically.
  */
-int intel_ring_reserve_space(struct drm_i915_gem_request *request);
 void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size);
 /* Cancel the reservation, e.g. because the request is being discarded. */
 void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf);
@@ -482,4 +481,7 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf);
 /* Finish with the reserved space - for use by i915_add_request() only. */
 void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
 
+/* Legacy ringbuffer specific portion of reservation code: */
+int intel_ring_reserve_space(struct drm_i915_gem_request *request);
+
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Reserve space improvements
  2015-06-30 11:40 ` John.C.Harrison
@ 2015-06-30 14:43   ` Tomas Elf
  2015-06-30 15:53     ` John Harrison
  2015-07-01 22:18   ` shuang.he
  1 sibling, 1 reply; 14+ messages in thread
From: Tomas Elf @ 2015-06-30 14:43 UTC (permalink / raw)
  To: John.C.Harrison, Intel-GFX

On 30/06/2015 12:40, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> An earlier patch was added to reserve space in the ring buffer for the
> commands issued during 'add_request()'. The initial version was
> pessimistic in the way it handled buffer wrapping and would cause
> premature wraps and thus waste ring space.
>
> This patch updates the code to better handle the wrap case. It no
> longer enforces that the space being asked for and the reserved space
> are a single contiguous block. Instead, it allows the reserve to be on
> the far end of a wrap operation. It still guarantees that the space is
> available so when the wrap occurs, no wait will happen. Thus the wrap
> cannot fail which is the whole point of the exercise.
>
> Also fixed a merge failure with some comments from the original patch.
>
> v2: Incorporated suggestion by David Gordon to move the wrap code
> inside the prepare function and thus allow a single combined
> wait_for_space() call rather than doing one before the wrap and
> another after. This also makes the prepare code much simpler and
> easier to follow.
>
> v3: Fix for 'effective_size' vs 'size' during ring buffer remainder
> calculations (spotted by Tomas Elf).
>
> For: VIZ-5115
> CC: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> ---
>   drivers/gpu/drm/i915/intel_lrc.c        | 73 +++++++++++++-------------
>   drivers/gpu/drm/i915/intel_ringbuffer.c | 90 +++++++++++++++++++--------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +-
>   3 files changed, 90 insertions(+), 77 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b36e064..7d9515d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -663,12 +663,12 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
>   	unsigned space;
>   	int ret;
>
> -	/* The whole point of reserving space is to not wait! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
>   	if (intel_ring_space(ringbuf) >= bytes)
>   		return 0;
>
> +	/* The whole point of reserving space is to not wait! */
> +	WARN_ON(ringbuf->reserved_in_use);
> +
>   	list_for_each_entry(target, &ring->request_list, list) {
>   		/*
>   		 * The request queue is per-engine, so can contain requests
> @@ -718,22 +718,11 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>   	execlists_context_queue(request);
>   }
>
> -static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
> +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
>   {
> -	struct intel_ringbuffer *ringbuf = req->ringbuf;
>   	uint32_t __iomem *virt;
>   	int rem = ringbuf->size - ringbuf->tail;
>
> -	/* Can't wrap if space has already been reserved! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
> -	if (ringbuf->space < rem) {
> -		int ret = logical_ring_wait_for_space(req, rem);
> -
> -		if (ret)
> -			return ret;
> -	}
> -
>   	virt = ringbuf->virtual_start + ringbuf->tail;
>   	rem /= 4;
>   	while (rem--)
> @@ -741,40 +730,50 @@ static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
>
>   	ringbuf->tail = 0;
>   	intel_ring_update_space(ringbuf);
> -
> -	return 0;
>   }
>
>   static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
>   {
>   	struct intel_ringbuffer *ringbuf = req->ringbuf;
> -	int ret;
> -
> -	/*
> -	 * Add on the reserved size to the request to make sure that after
> -	 * the intended commands have been emitted, there is guaranteed to
> -	 * still be enough free space to send them to the hardware.
> -	 */
> -	if (!ringbuf->reserved_in_use)
> -		bytes += ringbuf->reserved_size;
> -
> -	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
> -		ret = logical_ring_wrap_buffer(req);
> -		if (unlikely(ret))
> -			return ret;
> +	int remain_usable = ringbuf->effective_size - ringbuf->tail;
> +	int remain_actual = ringbuf->size - ringbuf->tail;

You could add a comment here (and in the legacy implementation) 
explaining why we make the distinction between remain_usable and 
remain_actual. Or you could add the comment when we actually use 
remain_actual further down in the function. It's up to you.

We first need to be pessimistic about how much space is left in the ring 
buffer when determining the need for wrapping - therefore we use 
remain_usable, which disregards the end-of-buffer padding - and then we 
need to be pessimistic about how much ring space we need to wait for - 
therefore we use remain_actual, which takes the end-of-buffer padding 
into consideration to make sure that we're actually not waiting for too 
little space.

If you add those comments maybe you could also rename the variables to 
something like "remaining_space_usable" etc. since "remain_usable" is 
more of a verb than a noun. But that's seriously nitpicking.

If you at least add the comments to make it crystal clear why we do it 
this way then I'm fine.

Reviewed-by: Tomas Elf <tomas.elf@intel.com>

Thanks,
Tomas

> +	int ret, total_bytes, wait_bytes = 0;
> +	bool need_wrap = false;
>
> -		if(ringbuf->reserved_size) {
> -			uint32_t size = ringbuf->reserved_size;
> +	if (ringbuf->reserved_in_use)
> +		total_bytes = bytes;
> +	else
> +		total_bytes = bytes + ringbuf->reserved_size;
>
> -			intel_ring_reserved_space_cancel(ringbuf);
> -			intel_ring_reserved_space_reserve(ringbuf, size);
> +	if (unlikely(bytes > remain_usable)) {
> +		/*
> +		 * Not enough space for the basic request. So need to flush
> +		 * out the remainder and then wait for base + reserved.
> +		 */
> +		wait_bytes = remain_actual + total_bytes;
> +		need_wrap = true;
> +	} else {
> +		if (unlikely(total_bytes > remain_usable)) {
> +			/*
> +			 * The base request will fit but the reserved space
> +			 * falls off the end. So only need to to wait for the
> +			 * reserved size after flushing out the remainder.
> +			 */
> +			wait_bytes = remain_actual + ringbuf->reserved_size;
> +			need_wrap = true;
> +		} else if (total_bytes > ringbuf->space) {
> +			/* No wrapping required, just waiting. */
> +			wait_bytes = total_bytes;
>   		}
>   	}
>
> -	if (unlikely(ringbuf->space < bytes)) {
> -		ret = logical_ring_wait_for_space(req, bytes);
> +	if (wait_bytes) {
> +		ret = logical_ring_wait_for_space(req, wait_bytes);
>   		if (unlikely(ret))
>   			return ret;
> +
> +		if (need_wrap)
> +			__wrap_ring_buffer(ringbuf);
>   	}
>
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index af7c12e..e39c891 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2121,12 +2121,12 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
>   	unsigned space;
>   	int ret;
>
> -	/* The whole point of reserving space is to not wait! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
>   	if (intel_ring_space(ringbuf) >= n)
>   		return 0;
>
> +	/* The whole point of reserving space is to not wait! */
> +	WARN_ON(ringbuf->reserved_in_use);
> +
>   	list_for_each_entry(request, &ring->request_list, list) {
>   		space = __intel_ring_space(request->postfix, ringbuf->tail,
>   					   ringbuf->size);
> @@ -2145,21 +2145,11 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
>   	return 0;
>   }
>
> -static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
> +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
>   {
>   	uint32_t __iomem *virt;
> -	struct intel_ringbuffer *ringbuf = ring->buffer;
>   	int rem = ringbuf->size - ringbuf->tail;
>
> -	/* Can't wrap if space has already been reserved! */
> -	WARN_ON(ringbuf->reserved_in_use);
> -
> -	if (ringbuf->space < rem) {
> -		int ret = ring_wait_for_space(ring, rem);
> -		if (ret)
> -			return ret;
> -	}
> -
>   	virt = ringbuf->virtual_start + ringbuf->tail;
>   	rem /= 4;
>   	while (rem--)
> @@ -2167,8 +2157,6 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
>
>   	ringbuf->tail = 0;
>   	intel_ring_update_space(ringbuf);
> -
> -	return 0;
>   }
>
>   int intel_ring_idle(struct intel_engine_cs *ring)
> @@ -2238,9 +2226,21 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
>   {
>   	WARN_ON(!ringbuf->reserved_in_use);
> -	WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
> -	     "request reserved size too small: %d vs %d!\n",
> -	     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
> +	if (ringbuf->tail > ringbuf->reserved_tail) {
> +		WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
> +		     "request reserved size too small: %d vs %d!\n",
> +		     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
> +	} else {
> +		/*
> +		 * The ring was wrapped while the reserved space was in use.
> +		 * That means that some unknown amount of the ring tail was
> +		 * no-op filled and skipped. Thus simply adding the ring size
> +		 * to the tail and doing the above space check will not work.
> +		 * Rather than attempt to track how much tail was skipped,
> +		 * it is much simpler to say that also skipping the sanity
> +		 * check every once in a while is not a big issue.
> +		 */
> +	}
>
>   	ringbuf->reserved_size   = 0;
>   	ringbuf->reserved_in_use = false;
> @@ -2249,33 +2249,45 @@ void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
>   static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
>   {
>   	struct intel_ringbuffer *ringbuf = ring->buffer;
> -	int ret;
> -
> -	/*
> -	 * Add on the reserved size to the request to make sure that after
> -	 * the intended commands have been emitted, there is guaranteed to
> -	 * still be enough free space to send them to the hardware.
> -	 */
> -	if (!ringbuf->reserved_in_use)
> -		bytes += ringbuf->reserved_size;
> -
> -	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
> -		ret = intel_wrap_ring_buffer(ring);
> -		if (unlikely(ret))
> -			return ret;
> +	int remain_usable = ringbuf->effective_size - ringbuf->tail;
> +	int remain_actual = ringbuf->size - ringbuf->tail;
> +	int ret, total_bytes, wait_bytes = 0;
> +	bool need_wrap = false;
>
> -		if(ringbuf->reserved_size) {
> -			uint32_t size = ringbuf->reserved_size;
> +	if (ringbuf->reserved_in_use)
> +		total_bytes = bytes;
> +	else
> +		total_bytes = bytes + ringbuf->reserved_size;
>
> -			intel_ring_reserved_space_cancel(ringbuf);
> -			intel_ring_reserved_space_reserve(ringbuf, size);
> +	if (unlikely(bytes > remain_usable)) {
> +		/*
> +		 * Not enough space for the basic request. So need to flush
> +		 * out the remainder and then wait for base + reserved.
> +		 */
> +		wait_bytes = remain_actual + total_bytes;
> +		need_wrap = true;
> +	} else {
> +		if (unlikely(total_bytes > remain_usable)) {
> +			/*
> +			 * The base request will fit but the reserved space
> +			 * falls off the end. So only need to to wait for the
> +			 * reserved size after flushing out the remainder.
> +			 */
> +			wait_bytes = remain_actual + ringbuf->reserved_size;
> +			need_wrap = true;
> +		} else if (total_bytes > ringbuf->space) {
> +			/* No wrapping required, just waiting. */
> +			wait_bytes = total_bytes;
>   		}
>   	}
>
> -	if (unlikely(ringbuf->space < bytes)) {
> -		ret = ring_wait_for_space(ring, bytes);
> +	if (wait_bytes) {
> +		ret = ring_wait_for_space(ring, wait_bytes);
>   		if (unlikely(ret))
>   			return ret;
> +
> +		if (need_wrap)
> +			__wrap_ring_buffer(ringbuf);
>   	}
>
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 0e2bbc6..304cac4 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -473,7 +473,6 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>    * will always have sufficient room to do its stuff. The request creation
>    * code calls this automatically.
>    */
> -int intel_ring_reserve_space(struct drm_i915_gem_request *request);
>   void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size);
>   /* Cancel the reservation, e.g. because the request is being discarded. */
>   void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf);
> @@ -482,4 +481,7 @@ void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf);
>   /* Finish with the reserved space - for use by i915_add_request() only. */
>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
>
> +/* Legacy ringbuffer specific portion of reservation code: */
> +int intel_ring_reserve_space(struct drm_i915_gem_request *request);
> +
>   #endif /* _INTEL_RINGBUFFER_H_ */
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Reserve space improvements
  2015-06-30 14:43   ` Tomas Elf
@ 2015-06-30 15:53     ` John Harrison
  2015-06-30 16:15       ` Tomas Elf
  0 siblings, 1 reply; 14+ messages in thread
From: John Harrison @ 2015-06-30 15:53 UTC (permalink / raw)
  To: Tomas Elf, Intel-GFX

On 30/06/2015 15:43, Tomas Elf wrote:
> On 30/06/2015 12:40, John.C.Harrison@Intel.com wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> An earlier patch was added to reserve space in the ring buffer for the
>> commands issued during 'add_request()'. The initial version was
>> pessimistic in the way it handled buffer wrapping and would cause
>> premature wraps and thus waste ring space.
>>
>> This patch updates the code to better handle the wrap case. It no
>> longer enforces that the space being asked for and the reserved space
>> are a single contiguous block. Instead, it allows the reserve to be on
>> the far end of a wrap operation. It still guarantees that the space is
>> available so when the wrap occurs, no wait will happen. Thus the wrap
>> cannot fail which is the whole point of the exercise.
>>
>> Also fixed a merge failure with some comments from the original patch.
>>
>> v2: Incorporated suggestion by David Gordon to move the wrap code
>> inside the prepare function and thus allow a single combined
>> wait_for_space() call rather than doing one before the wrap and
>> another after. This also makes the prepare code much simpler and
>> easier to follow.
>>
>> v3: Fix for 'effective_size' vs 'size' during ring buffer remainder
>> calculations (spotted by Tomas Elf).
>>
>> For: VIZ-5115
>> CC: Daniel Vetter <daniel@ffwll.ch>
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_lrc.c        | 73 
>> +++++++++++++-------------
>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 90 
>> +++++++++++++++++++--------------
>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +-
>>   3 files changed, 90 insertions(+), 77 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index b36e064..7d9515d 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -663,12 +663,12 @@ static int logical_ring_wait_for_space(struct 
>> drm_i915_gem_request *req,
>>       unsigned space;
>>       int ret;
>>
>> -    /* The whole point of reserving space is to not wait! */
>> -    WARN_ON(ringbuf->reserved_in_use);
>> -
>>       if (intel_ring_space(ringbuf) >= bytes)
>>           return 0;
>>
>> +    /* The whole point of reserving space is to not wait! */
>> +    WARN_ON(ringbuf->reserved_in_use);
>> +
>>       list_for_each_entry(target, &ring->request_list, list) {
>>           /*
>>            * The request queue is per-engine, so can contain requests
>> @@ -718,22 +718,11 @@ intel_logical_ring_advance_and_submit(struct 
>> drm_i915_gem_request *request)
>>       execlists_context_queue(request);
>>   }
>>
>> -static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
>> +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
>>   {
>> -    struct intel_ringbuffer *ringbuf = req->ringbuf;
>>       uint32_t __iomem *virt;
>>       int rem = ringbuf->size - ringbuf->tail;
>>
>> -    /* Can't wrap if space has already been reserved! */
>> -    WARN_ON(ringbuf->reserved_in_use);
>> -
>> -    if (ringbuf->space < rem) {
>> -        int ret = logical_ring_wait_for_space(req, rem);
>> -
>> -        if (ret)
>> -            return ret;
>> -    }
>> -
>>       virt = ringbuf->virtual_start + ringbuf->tail;
>>       rem /= 4;
>>       while (rem--)
>> @@ -741,40 +730,50 @@ static int logical_ring_wrap_buffer(struct 
>> drm_i915_gem_request *req)
>>
>>       ringbuf->tail = 0;
>>       intel_ring_update_space(ringbuf);
>> -
>> -    return 0;
>>   }
>>
>>   static int logical_ring_prepare(struct drm_i915_gem_request *req, 
>> int bytes)
>>   {
>>       struct intel_ringbuffer *ringbuf = req->ringbuf;
>> -    int ret;
>> -
>> -    /*
>> -     * Add on the reserved size to the request to make sure that after
>> -     * the intended commands have been emitted, there is guaranteed to
>> -     * still be enough free space to send them to the hardware.
>> -     */
>> -    if (!ringbuf->reserved_in_use)
>> -        bytes += ringbuf->reserved_size;
>> -
>> -    if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
>> -        ret = logical_ring_wrap_buffer(req);
>> -        if (unlikely(ret))
>> -            return ret;
>> +    int remain_usable = ringbuf->effective_size - ringbuf->tail;
>> +    int remain_actual = ringbuf->size - ringbuf->tail;
>
> You could add a comment here (and in the legacy implementation) 
> explaining why we make the distinction between remain_usable and 
> remain_actual. Or you could add the comment when we actually use 
> remain_actual further down in the function. It's up to you.
>
> We first need to be pessimistic about how much space is left in the 
> ring buffer when determining the need for wrapping - therefore we use 
> remain_usable, which disregards the end-of-buffer padding - and then 
> we need to be pessimistic about how much ring space we need to wait 
> for - therefore we use remain_actual, which takes the end-of-buffer 
> padding into consideration to make sure that we're actually not 
> waiting for too little space.

It's not about being pessimistic or optimistic. That implies there is 
some choice, that one version is a little bit better in one situation 
but either would do. Whereas this is about functional correctness. The 
difference between the actual ring size and the usable ring size is not 
some reserved space thing to make something go faster. This is about the 
hardware locking up if the entire buffer is used. I think 'usable' and 
'actual' are fairly obvious names. If you want to know the details about 
why there is an 'effective_size' in the first place then look up 
'effective_size' in the code and read the comment about i830 hangs.

> If you add those comments maybe you could also rename the variables to 
> something like "remaining_space_usable" etc. since "remain_usable" is 
> more of a verb than a noun. But that's seriously nitpicking.
Or maybe 
'remaining_space_that_is_usable_without_causing_an_i830_to_hang_because_it_skips_the_last_two_cachelines'? 
There is such a thing as being too verbose and making the code hard to read.


> If you at least add the comments to make it crystal clear why we do it 
> this way then I'm fine.

I don't see what comment could be added that would actually make things 
clearer without being a long and unnecessary description of the i830 
issue. There are two variables declared on consecutive lines that cache 
pretty simple calculations and have fairly clear names. It looks quite 
self explanatory to me! The code then tests to see if it can use the 
usable space, if not then it wraps around the actual buffer size. Again, 
seems pretty obvious as to what is going on and why - you can only use 
the usable bit, but when traversing the whole buffer you pretty 
obviously need to traverse the whole buffer not just the part that may 
or may not have been used.


>
> Reviewed-by: Tomas Elf <tomas.elf@intel.com>
>
> Thanks,
> Tomas
>
>> +    int ret, total_bytes, wait_bytes = 0;
>> +    bool need_wrap = false;
>>
>> -        if(ringbuf->reserved_size) {
>> -            uint32_t size = ringbuf->reserved_size;
>> +    if (ringbuf->reserved_in_use)
>> +        total_bytes = bytes;
>> +    else
>> +        total_bytes = bytes + ringbuf->reserved_size;
>>
>> -            intel_ring_reserved_space_cancel(ringbuf);
>> -            intel_ring_reserved_space_reserve(ringbuf, size);
>> +    if (unlikely(bytes > remain_usable)) {
>> +        /*
>> +         * Not enough space for the basic request. So need to flush
>> +         * out the remainder and then wait for base + reserved.
>> +         */
>> +        wait_bytes = remain_actual + total_bytes;
>> +        need_wrap = true;
>> +    } else {
>> +        if (unlikely(total_bytes > remain_usable)) {
>> +            /*
>> +             * The base request will fit but the reserved space
>> +             * falls off the end. So only need to to wait for the
>> +             * reserved size after flushing out the remainder.
>> +             */
>> +            wait_bytes = remain_actual + ringbuf->reserved_size;
>> +            need_wrap = true;
>> +        } else if (total_bytes > ringbuf->space) {
>> +            /* No wrapping required, just waiting. */
>> +            wait_bytes = total_bytes;
>>           }
>>       }
>>
>> -    if (unlikely(ringbuf->space < bytes)) {
>> -        ret = logical_ring_wait_for_space(req, bytes);
>> +    if (wait_bytes) {
>> +        ret = logical_ring_wait_for_space(req, wait_bytes);
>>           if (unlikely(ret))
>>               return ret;
>> +
>> +        if (need_wrap)
>> +            __wrap_ring_buffer(ringbuf);
>>       }
>>
>>       return 0;
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index af7c12e..e39c891 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -2121,12 +2121,12 @@ static int ring_wait_for_space(struct 
>> intel_engine_cs *ring, int n)
>>       unsigned space;
>>       int ret;
>>
>> -    /* The whole point of reserving space is to not wait! */
>> -    WARN_ON(ringbuf->reserved_in_use);
>> -
>>       if (intel_ring_space(ringbuf) >= n)
>>           return 0;
>>
>> +    /* The whole point of reserving space is to not wait! */
>> +    WARN_ON(ringbuf->reserved_in_use);
>> +
>>       list_for_each_entry(request, &ring->request_list, list) {
>>           space = __intel_ring_space(request->postfix, ringbuf->tail,
>>                          ringbuf->size);
>> @@ -2145,21 +2145,11 @@ static int ring_wait_for_space(struct 
>> intel_engine_cs *ring, int n)
>>       return 0;
>>   }
>>
>> -static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
>> +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
>>   {
>>       uint32_t __iomem *virt;
>> -    struct intel_ringbuffer *ringbuf = ring->buffer;
>>       int rem = ringbuf->size - ringbuf->tail;
>>
>> -    /* Can't wrap if space has already been reserved! */
>> -    WARN_ON(ringbuf->reserved_in_use);
>> -
>> -    if (ringbuf->space < rem) {
>> -        int ret = ring_wait_for_space(ring, rem);
>> -        if (ret)
>> -            return ret;
>> -    }
>> -
>>       virt = ringbuf->virtual_start + ringbuf->tail;
>>       rem /= 4;
>>       while (rem--)
>> @@ -2167,8 +2157,6 @@ static int intel_wrap_ring_buffer(struct 
>> intel_engine_cs *ring)
>>
>>       ringbuf->tail = 0;
>>       intel_ring_update_space(ringbuf);
>> -
>> -    return 0;
>>   }
>>
>>   int intel_ring_idle(struct intel_engine_cs *ring)
>> @@ -2238,9 +2226,21 @@ void intel_ring_reserved_space_use(struct 
>> intel_ringbuffer *ringbuf)
>>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
>>   {
>>       WARN_ON(!ringbuf->reserved_in_use);
>> -    WARN(ringbuf->tail > ringbuf->reserved_tail + 
>> ringbuf->reserved_size,
>> -         "request reserved size too small: %d vs %d!\n",
>> -         ringbuf->tail - ringbuf->reserved_tail, 
>> ringbuf->reserved_size);
>> +    if (ringbuf->tail > ringbuf->reserved_tail) {
>> +        WARN(ringbuf->tail > ringbuf->reserved_tail + 
>> ringbuf->reserved_size,
>> +             "request reserved size too small: %d vs %d!\n",
>> +             ringbuf->tail - ringbuf->reserved_tail, 
>> ringbuf->reserved_size);
>> +    } else {
>> +        /*
>> +         * The ring was wrapped while the reserved space was in use.
>> +         * That means that some unknown amount of the ring tail was
>> +         * no-op filled and skipped. Thus simply adding the ring size
>> +         * to the tail and doing the above space check will not work.
>> +         * Rather than attempt to track how much tail was skipped,
>> +         * it is much simpler to say that also skipping the sanity
>> +         * check every once in a while is not a big issue.
>> +         */
>> +    }
>>
>>       ringbuf->reserved_size   = 0;
>>       ringbuf->reserved_in_use = false;
>> @@ -2249,33 +2249,45 @@ void intel_ring_reserved_space_end(struct 
>> intel_ringbuffer *ringbuf)
>>   static int __intel_ring_prepare(struct intel_engine_cs *ring, int 
>> bytes)
>>   {
>>       struct intel_ringbuffer *ringbuf = ring->buffer;
>> -    int ret;
>> -
>> -    /*
>> -     * Add on the reserved size to the request to make sure that after
>> -     * the intended commands have been emitted, there is guaranteed to
>> -     * still be enough free space to send them to the hardware.
>> -     */
>> -    if (!ringbuf->reserved_in_use)
>> -        bytes += ringbuf->reserved_size;
>> -
>> -    if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
>> -        ret = intel_wrap_ring_buffer(ring);
>> -        if (unlikely(ret))
>> -            return ret;
>> +    int remain_usable = ringbuf->effective_size - ringbuf->tail;
>> +    int remain_actual = ringbuf->size - ringbuf->tail;
>> +    int ret, total_bytes, wait_bytes = 0;
>> +    bool need_wrap = false;
>>
>> -        if(ringbuf->reserved_size) {
>> -            uint32_t size = ringbuf->reserved_size;
>> +    if (ringbuf->reserved_in_use)
>> +        total_bytes = bytes;
>> +    else
>> +        total_bytes = bytes + ringbuf->reserved_size;
>>
>> -            intel_ring_reserved_space_cancel(ringbuf);
>> -            intel_ring_reserved_space_reserve(ringbuf, size);
>> +    if (unlikely(bytes > remain_usable)) {
>> +        /*
>> +         * Not enough space for the basic request. So need to flush
>> +         * out the remainder and then wait for base + reserved.
>> +         */
>> +        wait_bytes = remain_actual + total_bytes;
>> +        need_wrap = true;
>> +    } else {
>> +        if (unlikely(total_bytes > remain_usable)) {
>> +            /*
>> +             * The base request will fit but the reserved space
>> +             * falls off the end. So only need to to wait for the
>> +             * reserved size after flushing out the remainder.
>> +             */
>> +            wait_bytes = remain_actual + ringbuf->reserved_size;
>> +            need_wrap = true;
>> +        } else if (total_bytes > ringbuf->space) {
>> +            /* No wrapping required, just waiting. */
>> +            wait_bytes = total_bytes;
>>           }
>>       }
>>
>> -    if (unlikely(ringbuf->space < bytes)) {
>> -        ret = ring_wait_for_space(ring, bytes);
>> +    if (wait_bytes) {
>> +        ret = ring_wait_for_space(ring, wait_bytes);
>>           if (unlikely(ret))
>>               return ret;
>> +
>> +        if (need_wrap)
>> +            __wrap_ring_buffer(ringbuf);
>>       }
>>
>>       return 0;
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index 0e2bbc6..304cac4 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -473,7 +473,6 @@ static inline u32 intel_ring_get_tail(struct 
>> intel_ringbuffer *ringbuf)
>>    * will always have sufficient room to do its stuff. The request 
>> creation
>>    * code calls this automatically.
>>    */
>> -int intel_ring_reserve_space(struct drm_i915_gem_request *request);
>>   void intel_ring_reserved_space_reserve(struct intel_ringbuffer 
>> *ringbuf, int size);
>>   /* Cancel the reservation, e.g. because the request is being 
>> discarded. */
>>   void intel_ring_reserved_space_cancel(struct intel_ringbuffer 
>> *ringbuf);
>> @@ -482,4 +481,7 @@ void intel_ring_reserved_space_use(struct 
>> intel_ringbuffer *ringbuf);
>>   /* Finish with the reserved space - for use by i915_add_request() 
>> only. */
>>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
>>
>> +/* Legacy ringbuffer specific portion of reservation code: */
>> +int intel_ring_reserve_space(struct drm_i915_gem_request *request);
>> +
>>   #endif /* _INTEL_RINGBUFFER_H_ */
>>
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Reserve space improvements
  2015-06-30 15:53     ` John Harrison
@ 2015-06-30 16:15       ` Tomas Elf
  2015-07-01 10:44         ` Tomas Elf
  0 siblings, 1 reply; 14+ messages in thread
From: Tomas Elf @ 2015-06-30 16:15 UTC (permalink / raw)
  To: John Harrison, Intel-GFX

On 30/06/2015 16:53, John Harrison wrote:
> On 30/06/2015 15:43, Tomas Elf wrote:
>> On 30/06/2015 12:40, John.C.Harrison@Intel.com wrote:
>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>
>>> An earlier patch was added to reserve space in the ring buffer for the
>>> commands issued during 'add_request()'. The initial version was
>>> pessimistic in the way it handled buffer wrapping and would cause
>>> premature wraps and thus waste ring space.
>>>
>>> This patch updates the code to better handle the wrap case. It no
>>> longer enforces that the space being asked for and the reserved space
>>> are a single contiguous block. Instead, it allows the reserve to be on
>>> the far end of a wrap operation. It still guarantees that the space is
>>> available so when the wrap occurs, no wait will happen. Thus the wrap
>>> cannot fail which is the whole point of the exercise.
>>>
>>> Also fixed a merge failure with some comments from the original patch.
>>>
>>> v2: Incorporated suggestion by David Gordon to move the wrap code
>>> inside the prepare function and thus allow a single combined
>>> wait_for_space() call rather than doing one before the wrap and
>>> another after. This also makes the prepare code much simpler and
>>> easier to follow.
>>>
>>> v3: Fix for 'effective_size' vs 'size' during ring buffer remainder
>>> calculations (spotted by Tomas Elf).
>>>
>>> For: VIZ-5115
>>> CC: Daniel Vetter <daniel@ffwll.ch>
>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/intel_lrc.c        | 73
>>> +++++++++++++-------------
>>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 90
>>> +++++++++++++++++++--------------
>>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +-
>>>   3 files changed, 90 insertions(+), 77 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index b36e064..7d9515d 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -663,12 +663,12 @@ static int logical_ring_wait_for_space(struct
>>> drm_i915_gem_request *req,
>>>       unsigned space;
>>>       int ret;
>>>
>>> -    /* The whole point of reserving space is to not wait! */
>>> -    WARN_ON(ringbuf->reserved_in_use);
>>> -
>>>       if (intel_ring_space(ringbuf) >= bytes)
>>>           return 0;
>>>
>>> +    /* The whole point of reserving space is to not wait! */
>>> +    WARN_ON(ringbuf->reserved_in_use);
>>> +
>>>       list_for_each_entry(target, &ring->request_list, list) {
>>>           /*
>>>            * The request queue is per-engine, so can contain requests
>>> @@ -718,22 +718,11 @@ intel_logical_ring_advance_and_submit(struct
>>> drm_i915_gem_request *request)
>>>       execlists_context_queue(request);
>>>   }
>>>
>>> -static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
>>> +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
>>>   {
>>> -    struct intel_ringbuffer *ringbuf = req->ringbuf;
>>>       uint32_t __iomem *virt;
>>>       int rem = ringbuf->size - ringbuf->tail;
>>>
>>> -    /* Can't wrap if space has already been reserved! */
>>> -    WARN_ON(ringbuf->reserved_in_use);
>>> -
>>> -    if (ringbuf->space < rem) {
>>> -        int ret = logical_ring_wait_for_space(req, rem);
>>> -
>>> -        if (ret)
>>> -            return ret;
>>> -    }
>>> -
>>>       virt = ringbuf->virtual_start + ringbuf->tail;
>>>       rem /= 4;
>>>       while (rem--)
>>> @@ -741,40 +730,50 @@ static int logical_ring_wrap_buffer(struct
>>> drm_i915_gem_request *req)
>>>
>>>       ringbuf->tail = 0;
>>>       intel_ring_update_space(ringbuf);
>>> -
>>> -    return 0;
>>>   }
>>>
>>>   static int logical_ring_prepare(struct drm_i915_gem_request *req,
>>> int bytes)
>>>   {
>>>       struct intel_ringbuffer *ringbuf = req->ringbuf;
>>> -    int ret;
>>> -
>>> -    /*
>>> -     * Add on the reserved size to the request to make sure that after
>>> -     * the intended commands have been emitted, there is guaranteed to
>>> -     * still be enough free space to send them to the hardware.
>>> -     */
>>> -    if (!ringbuf->reserved_in_use)
>>> -        bytes += ringbuf->reserved_size;
>>> -
>>> -    if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
>>> -        ret = logical_ring_wrap_buffer(req);
>>> -        if (unlikely(ret))
>>> -            return ret;
>>> +    int remain_usable = ringbuf->effective_size - ringbuf->tail;
>>> +    int remain_actual = ringbuf->size - ringbuf->tail;
>>
>> You could add a comment here (and in the legacy implementation)
>> explaining why we make the distinction between remain_usable and
>> remain_actual. Or you could add the comment when we actually use
>> remain_actual further down in the function. It's up to you.
>>
>> We first need to be pessimistic about how much space is left in the
>> ring buffer when determining the need for wrapping - therefore we use
>> remain_usable, which disregards the end-of-buffer padding - and then
>> we need to be pessimistic about how much ring space we need to wait
>> for - therefore we use remain_actual, which takes the end-of-buffer
>> padding into consideration to make sure that we're actually not
>> waiting for too little space.
>
> It's not about being pessimistic or optimistic. That implies there is
> some choice, that one version is a little bit better in one situation
> but either would do. Whereas this is about functional correctness. The
> difference between the actual ring size and the usable ring size is not
> some reserved space thing to make something go faster. This is about the
> hardware locking up if the entire buffer is used. I think 'usable' and
> 'actual' are fairly obvious names. If you want to know the details about
> why there is an 'effective_size' in the first place then look up
> 'effective_size' in the code and read the comment about i830 hangs.

You're probably right. Let's just forget about it.

>
>> If you add those comments maybe you could also rename the variables to
>> something like "remaining_space_usable" etc. since "remain_usable" is
>> more of a verb than a noun. But that's seriously nitpicking.
> Or maybe
> 'remaining_space_that_is_usable_without_causing_an_i830_to_hang_because_it_skips_the_last_two_cachelines'?
> There is such a thing as being too verbose and making the code hard to
> read.

That is indeed a very long variable name. It's 104 characters long so 
that wouldn't clear checkpatch.pl :). My nitpicky suggestion was 9 
characters longer. And, yeah, it's possible to get too verbose. This 
driver is not even remotely close to that point as it stands today :).

Thanks,
Tomas

>
>
>> If you at least add the comments to make it crystal clear why we do it
>> this way then I'm fine.
>
> I don't see what comment could be added that would actually make things
> clearer without being a long and unnecessary description of the i830
> issue. There are two variables declared on consecutive lines that cache
> pretty simple calculations and have fairly clear names. It looks quite
> self explanatory to me! The code then tests to see if it can use the
> usable space, if not then it wraps around the actual buffer size. Again,
> seems pretty obvious as to what is going on and why - you can only use
> the usable bit, but when traversing the whole buffer you pretty
> obviously need to traverse the whole buffer not just the part that may
> or may not have been used.
>
>
>>
>> Reviewed-by: Tomas Elf <tomas.elf@intel.com>
>>
>> Thanks,
>> Tomas
>>
>>> +    int ret, total_bytes, wait_bytes = 0;
>>> +    bool need_wrap = false;
>>>
>>> -        if(ringbuf->reserved_size) {
>>> -            uint32_t size = ringbuf->reserved_size;
>>> +    if (ringbuf->reserved_in_use)
>>> +        total_bytes = bytes;
>>> +    else
>>> +        total_bytes = bytes + ringbuf->reserved_size;
>>>
>>> -            intel_ring_reserved_space_cancel(ringbuf);
>>> -            intel_ring_reserved_space_reserve(ringbuf, size);
>>> +    if (unlikely(bytes > remain_usable)) {
>>> +        /*
>>> +         * Not enough space for the basic request. So need to flush
>>> +         * out the remainder and then wait for base + reserved.
>>> +         */
>>> +        wait_bytes = remain_actual + total_bytes;
>>> +        need_wrap = true;
>>> +    } else {
>>> +        if (unlikely(total_bytes > remain_usable)) {
>>> +            /*
>>> +             * The base request will fit but the reserved space
>>> +             * falls off the end. So only need to to wait for the
>>> +             * reserved size after flushing out the remainder.
>>> +             */
>>> +            wait_bytes = remain_actual + ringbuf->reserved_size;
>>> +            need_wrap = true;
>>> +        } else if (total_bytes > ringbuf->space) {
>>> +            /* No wrapping required, just waiting. */
>>> +            wait_bytes = total_bytes;
>>>           }
>>>       }
>>>
>>> -    if (unlikely(ringbuf->space < bytes)) {
>>> -        ret = logical_ring_wait_for_space(req, bytes);
>>> +    if (wait_bytes) {
>>> +        ret = logical_ring_wait_for_space(req, wait_bytes);
>>>           if (unlikely(ret))
>>>               return ret;
>>> +
>>> +        if (need_wrap)
>>> +            __wrap_ring_buffer(ringbuf);
>>>       }
>>>
>>>       return 0;
>>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> index af7c12e..e39c891 100644
>>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>> @@ -2121,12 +2121,12 @@ static int ring_wait_for_space(struct
>>> intel_engine_cs *ring, int n)
>>>       unsigned space;
>>>       int ret;
>>>
>>> -    /* The whole point of reserving space is to not wait! */
>>> -    WARN_ON(ringbuf->reserved_in_use);
>>> -
>>>       if (intel_ring_space(ringbuf) >= n)
>>>           return 0;
>>>
>>> +    /* The whole point of reserving space is to not wait! */
>>> +    WARN_ON(ringbuf->reserved_in_use);
>>> +
>>>       list_for_each_entry(request, &ring->request_list, list) {
>>>           space = __intel_ring_space(request->postfix, ringbuf->tail,
>>>                          ringbuf->size);
>>> @@ -2145,21 +2145,11 @@ static int ring_wait_for_space(struct
>>> intel_engine_cs *ring, int n)
>>>       return 0;
>>>   }
>>>
>>> -static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
>>> +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
>>>   {
>>>       uint32_t __iomem *virt;
>>> -    struct intel_ringbuffer *ringbuf = ring->buffer;
>>>       int rem = ringbuf->size - ringbuf->tail;
>>>
>>> -    /* Can't wrap if space has already been reserved! */
>>> -    WARN_ON(ringbuf->reserved_in_use);
>>> -
>>> -    if (ringbuf->space < rem) {
>>> -        int ret = ring_wait_for_space(ring, rem);
>>> -        if (ret)
>>> -            return ret;
>>> -    }
>>> -
>>>       virt = ringbuf->virtual_start + ringbuf->tail;
>>>       rem /= 4;
>>>       while (rem--)
>>> @@ -2167,8 +2157,6 @@ static int intel_wrap_ring_buffer(struct
>>> intel_engine_cs *ring)
>>>
>>>       ringbuf->tail = 0;
>>>       intel_ring_update_space(ringbuf);
>>> -
>>> -    return 0;
>>>   }
>>>
>>>   int intel_ring_idle(struct intel_engine_cs *ring)
>>> @@ -2238,9 +2226,21 @@ void intel_ring_reserved_space_use(struct
>>> intel_ringbuffer *ringbuf)
>>>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
>>>   {
>>>       WARN_ON(!ringbuf->reserved_in_use);
>>> -    WARN(ringbuf->tail > ringbuf->reserved_tail +
>>> ringbuf->reserved_size,
>>> -         "request reserved size too small: %d vs %d!\n",
>>> -         ringbuf->tail - ringbuf->reserved_tail,
>>> ringbuf->reserved_size);
>>> +    if (ringbuf->tail > ringbuf->reserved_tail) {
>>> +        WARN(ringbuf->tail > ringbuf->reserved_tail +
>>> ringbuf->reserved_size,
>>> +             "request reserved size too small: %d vs %d!\n",
>>> +             ringbuf->tail - ringbuf->reserved_tail,
>>> ringbuf->reserved_size);
>>> +    } else {
>>> +        /*
>>> +         * The ring was wrapped while the reserved space was in use.
>>> +         * That means that some unknown amount of the ring tail was
>>> +         * no-op filled and skipped. Thus simply adding the ring size
>>> +         * to the tail and doing the above space check will not work.
>>> +         * Rather than attempt to track how much tail was skipped,
>>> +         * it is much simpler to say that also skipping the sanity
>>> +         * check every once in a while is not a big issue.
>>> +         */
>>> +    }
>>>
>>>       ringbuf->reserved_size   = 0;
>>>       ringbuf->reserved_in_use = false;
>>> @@ -2249,33 +2249,45 @@ void intel_ring_reserved_space_end(struct
>>> intel_ringbuffer *ringbuf)
>>>   static int __intel_ring_prepare(struct intel_engine_cs *ring, int
>>> bytes)
>>>   {
>>>       struct intel_ringbuffer *ringbuf = ring->buffer;
>>> -    int ret;
>>> -
>>> -    /*
>>> -     * Add on the reserved size to the request to make sure that after
>>> -     * the intended commands have been emitted, there is guaranteed to
>>> -     * still be enough free space to send them to the hardware.
>>> -     */
>>> -    if (!ringbuf->reserved_in_use)
>>> -        bytes += ringbuf->reserved_size;
>>> -
>>> -    if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
>>> -        ret = intel_wrap_ring_buffer(ring);
>>> -        if (unlikely(ret))
>>> -            return ret;
>>> +    int remain_usable = ringbuf->effective_size - ringbuf->tail;
>>> +    int remain_actual = ringbuf->size - ringbuf->tail;
>>> +    int ret, total_bytes, wait_bytes = 0;
>>> +    bool need_wrap = false;
>>>
>>> -        if(ringbuf->reserved_size) {
>>> -            uint32_t size = ringbuf->reserved_size;
>>> +    if (ringbuf->reserved_in_use)
>>> +        total_bytes = bytes;
>>> +    else
>>> +        total_bytes = bytes + ringbuf->reserved_size;
>>>
>>> -            intel_ring_reserved_space_cancel(ringbuf);
>>> -            intel_ring_reserved_space_reserve(ringbuf, size);
>>> +    if (unlikely(bytes > remain_usable)) {
>>> +        /*
>>> +         * Not enough space for the basic request. So need to flush
>>> +         * out the remainder and then wait for base + reserved.
>>> +         */
>>> +        wait_bytes = remain_actual + total_bytes;
>>> +        need_wrap = true;
>>> +    } else {
>>> +        if (unlikely(total_bytes > remain_usable)) {
>>> +            /*
>>> +             * The base request will fit but the reserved space
>>> +             * falls off the end. So only need to to wait for the
>>> +             * reserved size after flushing out the remainder.
>>> +             */
>>> +            wait_bytes = remain_actual + ringbuf->reserved_size;
>>> +            need_wrap = true;
>>> +        } else if (total_bytes > ringbuf->space) {
>>> +            /* No wrapping required, just waiting. */
>>> +            wait_bytes = total_bytes;
>>>           }
>>>       }
>>>
>>> -    if (unlikely(ringbuf->space < bytes)) {
>>> -        ret = ring_wait_for_space(ring, bytes);
>>> +    if (wait_bytes) {
>>> +        ret = ring_wait_for_space(ring, wait_bytes);
>>>           if (unlikely(ret))
>>>               return ret;
>>> +
>>> +        if (need_wrap)
>>> +            __wrap_ring_buffer(ringbuf);
>>>       }
>>>
>>>       return 0;
>>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h
>>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>>> index 0e2bbc6..304cac4 100644
>>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>>> @@ -473,7 +473,6 @@ static inline u32 intel_ring_get_tail(struct
>>> intel_ringbuffer *ringbuf)
>>>    * will always have sufficient room to do its stuff. The request
>>> creation
>>>    * code calls this automatically.
>>>    */
>>> -int intel_ring_reserve_space(struct drm_i915_gem_request *request);
>>>   void intel_ring_reserved_space_reserve(struct intel_ringbuffer
>>> *ringbuf, int size);
>>>   /* Cancel the reservation, e.g. because the request is being
>>> discarded. */
>>>   void intel_ring_reserved_space_cancel(struct intel_ringbuffer
>>> *ringbuf);
>>> @@ -482,4 +481,7 @@ void intel_ring_reserved_space_use(struct
>>> intel_ringbuffer *ringbuf);
>>>   /* Finish with the reserved space - for use by i915_add_request()
>>> only. */
>>>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
>>>
>>> +/* Legacy ringbuffer specific portion of reservation code: */
>>> +int intel_ring_reserve_space(struct drm_i915_gem_request *request);
>>> +
>>>   #endif /* _INTEL_RINGBUFFER_H_ */
>>>
>>
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Reserve space improvements
  2015-06-30 16:15       ` Tomas Elf
@ 2015-07-01 10:44         ` Tomas Elf
  2015-07-01 12:29           ` Daniel Vetter
  0 siblings, 1 reply; 14+ messages in thread
From: Tomas Elf @ 2015-07-01 10:44 UTC (permalink / raw)
  To: John Harrison, Intel-GFX

On 30/06/2015 17:15, Tomas Elf wrote:
> On 30/06/2015 16:53, John Harrison wrote:
>> On 30/06/2015 15:43, Tomas Elf wrote:
>>> On 30/06/2015 12:40, John.C.Harrison@Intel.com wrote:
>>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>>
>>>> An earlier patch was added to reserve space in the ring buffer for the
>>>> commands issued during 'add_request()'. The initial version was
>>>> pessimistic in the way it handled buffer wrapping and would cause
>>>> premature wraps and thus waste ring space.
>>>>
>>>> This patch updates the code to better handle the wrap case. It no
>>>> longer enforces that the space being asked for and the reserved space
>>>> are a single contiguous block. Instead, it allows the reserve to be on
>>>> the far end of a wrap operation. It still guarantees that the space is
>>>> available so when the wrap occurs, no wait will happen. Thus the wrap
>>>> cannot fail which is the whole point of the exercise.
>>>>
>>>> Also fixed a merge failure with some comments from the original patch.
>>>>
>>>> v2: Incorporated suggestion by David Gordon to move the wrap code
>>>> inside the prepare function and thus allow a single combined
>>>> wait_for_space() call rather than doing one before the wrap and
>>>> another after. This also makes the prepare code much simpler and
>>>> easier to follow.
>>>>
>>>> v3: Fix for 'effective_size' vs 'size' during ring buffer remainder
>>>> calculations (spotted by Tomas Elf).
>>>>
>>>> For: VIZ-5115
>>>> CC: Daniel Vetter <daniel@ffwll.ch>
>>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/intel_lrc.c        | 73
>>>> +++++++++++++-------------
>>>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 90
>>>> +++++++++++++++++++--------------
>>>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +-
>>>>   3 files changed, 90 insertions(+), 77 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>>> index b36e064..7d9515d 100644
>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>> @@ -663,12 +663,12 @@ static int logical_ring_wait_for_space(struct
>>>> drm_i915_gem_request *req,
>>>>       unsigned space;
>>>>       int ret;
>>>>
>>>> -    /* The whole point of reserving space is to not wait! */
>>>> -    WARN_ON(ringbuf->reserved_in_use);
>>>> -
>>>>       if (intel_ring_space(ringbuf) >= bytes)
>>>>           return 0;
>>>>
>>>> +    /* The whole point of reserving space is to not wait! */
>>>> +    WARN_ON(ringbuf->reserved_in_use);
>>>> +
>>>>       list_for_each_entry(target, &ring->request_list, list) {
>>>>           /*
>>>>            * The request queue is per-engine, so can contain requests
>>>> @@ -718,22 +718,11 @@ intel_logical_ring_advance_and_submit(struct
>>>> drm_i915_gem_request *request)
>>>>       execlists_context_queue(request);
>>>>   }
>>>>
>>>> -static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
>>>> +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
>>>>   {
>>>> -    struct intel_ringbuffer *ringbuf = req->ringbuf;
>>>>       uint32_t __iomem *virt;
>>>>       int rem = ringbuf->size - ringbuf->tail;
>>>>
>>>> -    /* Can't wrap if space has already been reserved! */
>>>> -    WARN_ON(ringbuf->reserved_in_use);
>>>> -
>>>> -    if (ringbuf->space < rem) {
>>>> -        int ret = logical_ring_wait_for_space(req, rem);
>>>> -
>>>> -        if (ret)
>>>> -            return ret;
>>>> -    }
>>>> -
>>>>       virt = ringbuf->virtual_start + ringbuf->tail;
>>>>       rem /= 4;
>>>>       while (rem--)
>>>> @@ -741,40 +730,50 @@ static int logical_ring_wrap_buffer(struct
>>>> drm_i915_gem_request *req)
>>>>
>>>>       ringbuf->tail = 0;
>>>>       intel_ring_update_space(ringbuf);
>>>> -
>>>> -    return 0;
>>>>   }
>>>>
>>>>   static int logical_ring_prepare(struct drm_i915_gem_request *req,
>>>> int bytes)
>>>>   {
>>>>       struct intel_ringbuffer *ringbuf = req->ringbuf;
>>>> -    int ret;
>>>> -
>>>> -    /*
>>>> -     * Add on the reserved size to the request to make sure that after
>>>> -     * the intended commands have been emitted, there is guaranteed to
>>>> -     * still be enough free space to send them to the hardware.
>>>> -     */
>>>> -    if (!ringbuf->reserved_in_use)
>>>> -        bytes += ringbuf->reserved_size;
>>>> -
>>>> -    if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
>>>> -        ret = logical_ring_wrap_buffer(req);
>>>> -        if (unlikely(ret))
>>>> -            return ret;
>>>> +    int remain_usable = ringbuf->effective_size - ringbuf->tail;
>>>> +    int remain_actual = ringbuf->size - ringbuf->tail;
>>>
>>> You could add a comment here (and in the legacy implementation)
>>> explaining why we make the distinction between remain_usable and
>>> remain_actual. Or you could add the comment when we actually use
>>> remain_actual further down in the function. It's up to you.
>>>
>>> We first need to be pessimistic about how much space is left in the
>>> ring buffer when determining the need for wrapping - therefore we use
>>> remain_usable, which disregards the end-of-buffer padding - and then
>>> we need to be pessimistic about how much ring space we need to wait
>>> for - therefore we use remain_actual, which takes the end-of-buffer
>>> padding into consideration to make sure that we're actually not
>>> waiting for too little space.
>>
>> It's not about being pessimistic or optimistic. That implies there is
>> some choice, that one version is a little bit better in one situation
>> but either would do. Whereas this is about functional correctness. The
>> difference between the actual ring size and the usable ring size is not
>> some reserved space thing to make something go faster. This is about the
>> hardware locking up if the entire buffer is used. I think 'usable' and
>> 'actual' are fairly obvious names. If you want to know the details about
>> why there is an 'effective_size' in the first place then look up
>> 'effective_size' in the code and read the comment about i830 hangs.
>
> You're probably right. Let's just forget about it.
>
>>
>>> If you add those comments maybe you could also rename the variables to
>>> something like "remaining_space_usable" etc. since "remain_usable" is
>>> more of a verb than a noun. But that's seriously nitpicking.
>> Or maybe
>> 'remaining_space_that_is_usable_without_causing_an_i830_to_hang_because_it_skips_the_last_two_cachelines'?
>>
>> There is such a thing as being too verbose and making the code hard to
>> read.
>
> That is indeed a very long variable name. It's 104 characters long so
> that wouldn't clear checkpatch.pl :). My nitpicky suggestion was 9
> characters longer. And, yeah, it's possible to get too verbose. This
> driver is not even remotely close to that point as it stands today :).
>
> Thanks,
> Tomas

Anyway, I think we're done here.

Reviewed-by: Tomas Elf <tomas.elf@intel.com>

Thanks,
Tomas


>
>>
>>
>>> If you at least add the comments to make it crystal clear why we do it
>>> this way then I'm fine.
>>
>> I don't see what comment could be added that would actually make things
>> clearer without being a long and unnecessary description of the i830
>> issue. There are two variables declared on consecutive lines that cache
>> pretty simple calculations and have fairly clear names. It looks quite
>> self explanatory to me! The code then tests to see if it can use the
>> usable space, if not then it wraps around the actual buffer size. Again,
>> seems pretty obvious as to what is going on and why - you can only use
>> the usable bit, but when traversing the whole buffer you pretty
>> obviously need to traverse the whole buffer not just the part that may
>> or may not have been used.
>>
>>
>>>
>>> Reviewed-by: Tomas Elf <tomas.elf@intel.com>
>>>
>>> Thanks,
>>> Tomas
>>>
>>>> +    int ret, total_bytes, wait_bytes = 0;
>>>> +    bool need_wrap = false;
>>>>
>>>> -        if(ringbuf->reserved_size) {
>>>> -            uint32_t size = ringbuf->reserved_size;
>>>> +    if (ringbuf->reserved_in_use)
>>>> +        total_bytes = bytes;
>>>> +    else
>>>> +        total_bytes = bytes + ringbuf->reserved_size;
>>>>
>>>> -            intel_ring_reserved_space_cancel(ringbuf);
>>>> -            intel_ring_reserved_space_reserve(ringbuf, size);
>>>> +    if (unlikely(bytes > remain_usable)) {
>>>> +        /*
>>>> +         * Not enough space for the basic request. So need to flush
>>>> +         * out the remainder and then wait for base + reserved.
>>>> +         */
>>>> +        wait_bytes = remain_actual + total_bytes;
>>>> +        need_wrap = true;
>>>> +    } else {
>>>> +        if (unlikely(total_bytes > remain_usable)) {
>>>> +            /*
>>>> +             * The base request will fit but the reserved space
>>>> +             * falls off the end. So only need to to wait for the
>>>> +             * reserved size after flushing out the remainder.
>>>> +             */
>>>> +            wait_bytes = remain_actual + ringbuf->reserved_size;
>>>> +            need_wrap = true;
>>>> +        } else if (total_bytes > ringbuf->space) {
>>>> +            /* No wrapping required, just waiting. */
>>>> +            wait_bytes = total_bytes;
>>>>           }
>>>>       }
>>>>
>>>> -    if (unlikely(ringbuf->space < bytes)) {
>>>> -        ret = logical_ring_wait_for_space(req, bytes);
>>>> +    if (wait_bytes) {
>>>> +        ret = logical_ring_wait_for_space(req, wait_bytes);
>>>>           if (unlikely(ret))
>>>>               return ret;
>>>> +
>>>> +        if (need_wrap)
>>>> +            __wrap_ring_buffer(ringbuf);
>>>>       }
>>>>
>>>>       return 0;
>>>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
>>>> b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>>> index af7c12e..e39c891 100644
>>>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>>>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>>>> @@ -2121,12 +2121,12 @@ static int ring_wait_for_space(struct
>>>> intel_engine_cs *ring, int n)
>>>>       unsigned space;
>>>>       int ret;
>>>>
>>>> -    /* The whole point of reserving space is to not wait! */
>>>> -    WARN_ON(ringbuf->reserved_in_use);
>>>> -
>>>>       if (intel_ring_space(ringbuf) >= n)
>>>>           return 0;
>>>>
>>>> +    /* The whole point of reserving space is to not wait! */
>>>> +    WARN_ON(ringbuf->reserved_in_use);
>>>> +
>>>>       list_for_each_entry(request, &ring->request_list, list) {
>>>>           space = __intel_ring_space(request->postfix, ringbuf->tail,
>>>>                          ringbuf->size);
>>>> @@ -2145,21 +2145,11 @@ static int ring_wait_for_space(struct
>>>> intel_engine_cs *ring, int n)
>>>>       return 0;
>>>>   }
>>>>
>>>> -static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
>>>> +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
>>>>   {
>>>>       uint32_t __iomem *virt;
>>>> -    struct intel_ringbuffer *ringbuf = ring->buffer;
>>>>       int rem = ringbuf->size - ringbuf->tail;
>>>>
>>>> -    /* Can't wrap if space has already been reserved! */
>>>> -    WARN_ON(ringbuf->reserved_in_use);
>>>> -
>>>> -    if (ringbuf->space < rem) {
>>>> -        int ret = ring_wait_for_space(ring, rem);
>>>> -        if (ret)
>>>> -            return ret;
>>>> -    }
>>>> -
>>>>       virt = ringbuf->virtual_start + ringbuf->tail;
>>>>       rem /= 4;
>>>>       while (rem--)
>>>> @@ -2167,8 +2157,6 @@ static int intel_wrap_ring_buffer(struct
>>>> intel_engine_cs *ring)
>>>>
>>>>       ringbuf->tail = 0;
>>>>       intel_ring_update_space(ringbuf);
>>>> -
>>>> -    return 0;
>>>>   }
>>>>
>>>>   int intel_ring_idle(struct intel_engine_cs *ring)
>>>> @@ -2238,9 +2226,21 @@ void intel_ring_reserved_space_use(struct
>>>> intel_ringbuffer *ringbuf)
>>>>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
>>>>   {
>>>>       WARN_ON(!ringbuf->reserved_in_use);
>>>> -    WARN(ringbuf->tail > ringbuf->reserved_tail +
>>>> ringbuf->reserved_size,
>>>> -         "request reserved size too small: %d vs %d!\n",
>>>> -         ringbuf->tail - ringbuf->reserved_tail,
>>>> ringbuf->reserved_size);
>>>> +    if (ringbuf->tail > ringbuf->reserved_tail) {
>>>> +        WARN(ringbuf->tail > ringbuf->reserved_tail +
>>>> ringbuf->reserved_size,
>>>> +             "request reserved size too small: %d vs %d!\n",
>>>> +             ringbuf->tail - ringbuf->reserved_tail,
>>>> ringbuf->reserved_size);
>>>> +    } else {
>>>> +        /*
>>>> +         * The ring was wrapped while the reserved space was in use.
>>>> +         * That means that some unknown amount of the ring tail was
>>>> +         * no-op filled and skipped. Thus simply adding the ring size
>>>> +         * to the tail and doing the above space check will not work.
>>>> +         * Rather than attempt to track how much tail was skipped,
>>>> +         * it is much simpler to say that also skipping the sanity
>>>> +         * check every once in a while is not a big issue.
>>>> +         */
>>>> +    }
>>>>
>>>>       ringbuf->reserved_size   = 0;
>>>>       ringbuf->reserved_in_use = false;
>>>> @@ -2249,33 +2249,45 @@ void intel_ring_reserved_space_end(struct
>>>> intel_ringbuffer *ringbuf)
>>>>   static int __intel_ring_prepare(struct intel_engine_cs *ring, int
>>>> bytes)
>>>>   {
>>>>       struct intel_ringbuffer *ringbuf = ring->buffer;
>>>> -    int ret;
>>>> -
>>>> -    /*
>>>> -     * Add on the reserved size to the request to make sure that after
>>>> -     * the intended commands have been emitted, there is guaranteed to
>>>> -     * still be enough free space to send them to the hardware.
>>>> -     */
>>>> -    if (!ringbuf->reserved_in_use)
>>>> -        bytes += ringbuf->reserved_size;
>>>> -
>>>> -    if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
>>>> -        ret = intel_wrap_ring_buffer(ring);
>>>> -        if (unlikely(ret))
>>>> -            return ret;
>>>> +    int remain_usable = ringbuf->effective_size - ringbuf->tail;
>>>> +    int remain_actual = ringbuf->size - ringbuf->tail;
>>>> +    int ret, total_bytes, wait_bytes = 0;
>>>> +    bool need_wrap = false;
>>>>
>>>> -        if(ringbuf->reserved_size) {
>>>> -            uint32_t size = ringbuf->reserved_size;
>>>> +    if (ringbuf->reserved_in_use)
>>>> +        total_bytes = bytes;
>>>> +    else
>>>> +        total_bytes = bytes + ringbuf->reserved_size;
>>>>
>>>> -            intel_ring_reserved_space_cancel(ringbuf);
>>>> -            intel_ring_reserved_space_reserve(ringbuf, size);
>>>> +    if (unlikely(bytes > remain_usable)) {
>>>> +        /*
>>>> +         * Not enough space for the basic request. So need to flush
>>>> +         * out the remainder and then wait for base + reserved.
>>>> +         */
>>>> +        wait_bytes = remain_actual + total_bytes;
>>>> +        need_wrap = true;
>>>> +    } else {
>>>> +        if (unlikely(total_bytes > remain_usable)) {
>>>> +            /*
>>>> +             * The base request will fit but the reserved space
>>>> +             * falls off the end. So only need to to wait for the
>>>> +             * reserved size after flushing out the remainder.
>>>> +             */
>>>> +            wait_bytes = remain_actual + ringbuf->reserved_size;
>>>> +            need_wrap = true;
>>>> +        } else if (total_bytes > ringbuf->space) {
>>>> +            /* No wrapping required, just waiting. */
>>>> +            wait_bytes = total_bytes;
>>>>           }
>>>>       }
>>>>
>>>> -    if (unlikely(ringbuf->space < bytes)) {
>>>> -        ret = ring_wait_for_space(ring, bytes);
>>>> +    if (wait_bytes) {
>>>> +        ret = ring_wait_for_space(ring, wait_bytes);
>>>>           if (unlikely(ret))
>>>>               return ret;
>>>> +
>>>> +        if (need_wrap)
>>>> +            __wrap_ring_buffer(ringbuf);
>>>>       }
>>>>
>>>>       return 0;
>>>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h
>>>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>>>> index 0e2bbc6..304cac4 100644
>>>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>>>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>>>> @@ -473,7 +473,6 @@ static inline u32 intel_ring_get_tail(struct
>>>> intel_ringbuffer *ringbuf)
>>>>    * will always have sufficient room to do its stuff. The request
>>>> creation
>>>>    * code calls this automatically.
>>>>    */
>>>> -int intel_ring_reserve_space(struct drm_i915_gem_request *request);
>>>>   void intel_ring_reserved_space_reserve(struct intel_ringbuffer
>>>> *ringbuf, int size);
>>>>   /* Cancel the reservation, e.g. because the request is being
>>>> discarded. */
>>>>   void intel_ring_reserved_space_cancel(struct intel_ringbuffer
>>>> *ringbuf);
>>>> @@ -482,4 +481,7 @@ void intel_ring_reserved_space_use(struct
>>>> intel_ringbuffer *ringbuf);
>>>>   /* Finish with the reserved space - for use by i915_add_request()
>>>> only. */
>>>>   void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
>>>>
>>>> +/* Legacy ringbuffer specific portion of reservation code: */
>>>> +int intel_ring_reserve_space(struct drm_i915_gem_request *request);
>>>> +
>>>>   #endif /* _INTEL_RINGBUFFER_H_ */
>>>>
>>>
>>
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Reserve space improvements
  2015-07-01 10:44         ` Tomas Elf
@ 2015-07-01 12:29           ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2015-07-01 12:29 UTC (permalink / raw)
  To: Tomas Elf; +Cc: Intel-GFX

On Wed, Jul 01, 2015 at 11:44:07AM +0100, Tomas Elf wrote:
> On 30/06/2015 17:15, Tomas Elf wrote:
> >On 30/06/2015 16:53, John Harrison wrote:
> >>On 30/06/2015 15:43, Tomas Elf wrote:
> >>>On 30/06/2015 12:40, John.C.Harrison@Intel.com wrote:
> >>>>From: John Harrison <John.C.Harrison@Intel.com>
> >>>>
> >>>>An earlier patch was added to reserve space in the ring buffer for the
> >>>>commands issued during 'add_request()'. The initial version was
> >>>>pessimistic in the way it handled buffer wrapping and would cause
> >>>>premature wraps and thus waste ring space.
> >>>>
> >>>>This patch updates the code to better handle the wrap case. It no
> >>>>longer enforces that the space being asked for and the reserved space
> >>>>are a single contiguous block. Instead, it allows the reserve to be on
> >>>>the far end of a wrap operation. It still guarantees that the space is
> >>>>available so when the wrap occurs, no wait will happen. Thus the wrap
> >>>>cannot fail which is the whole point of the exercise.
> >>>>
> >>>>Also fixed a merge failure with some comments from the original patch.
> >>>>
> >>>>v2: Incorporated suggestion by David Gordon to move the wrap code
> >>>>inside the prepare function and thus allow a single combined
> >>>>wait_for_space() call rather than doing one before the wrap and
> >>>>another after. This also makes the prepare code much simpler and
> >>>>easier to follow.
> >>>>
> >>>>v3: Fix for 'effective_size' vs 'size' during ring buffer remainder
> >>>>calculations (spotted by Tomas Elf).
> >>>>
> >>>>For: VIZ-5115
> >>>>CC: Daniel Vetter <daniel@ffwll.ch>
> >>>>Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> >>>>---
> >>>>  drivers/gpu/drm/i915/intel_lrc.c        | 73
> >>>>+++++++++++++-------------
> >>>>  drivers/gpu/drm/i915/intel_ringbuffer.c | 90
> >>>>+++++++++++++++++++--------------
> >>>>  drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +-
> >>>>  3 files changed, 90 insertions(+), 77 deletions(-)
> >>>>
> >>>>diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> >>>>b/drivers/gpu/drm/i915/intel_lrc.c
> >>>>index b36e064..7d9515d 100644
> >>>>--- a/drivers/gpu/drm/i915/intel_lrc.c
> >>>>+++ b/drivers/gpu/drm/i915/intel_lrc.c
> >>>>@@ -663,12 +663,12 @@ static int logical_ring_wait_for_space(struct
> >>>>drm_i915_gem_request *req,
> >>>>      unsigned space;
> >>>>      int ret;
> >>>>
> >>>>-    /* The whole point of reserving space is to not wait! */
> >>>>-    WARN_ON(ringbuf->reserved_in_use);
> >>>>-
> >>>>      if (intel_ring_space(ringbuf) >= bytes)
> >>>>          return 0;
> >>>>
> >>>>+    /* The whole point of reserving space is to not wait! */
> >>>>+    WARN_ON(ringbuf->reserved_in_use);
> >>>>+
> >>>>      list_for_each_entry(target, &ring->request_list, list) {
> >>>>          /*
> >>>>           * The request queue is per-engine, so can contain requests
> >>>>@@ -718,22 +718,11 @@ intel_logical_ring_advance_and_submit(struct
> >>>>drm_i915_gem_request *request)
> >>>>      execlists_context_queue(request);
> >>>>  }
> >>>>
> >>>>-static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
> >>>>+static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
> >>>>  {
> >>>>-    struct intel_ringbuffer *ringbuf = req->ringbuf;
> >>>>      uint32_t __iomem *virt;
> >>>>      int rem = ringbuf->size - ringbuf->tail;
> >>>>
> >>>>-    /* Can't wrap if space has already been reserved! */
> >>>>-    WARN_ON(ringbuf->reserved_in_use);
> >>>>-
> >>>>-    if (ringbuf->space < rem) {
> >>>>-        int ret = logical_ring_wait_for_space(req, rem);
> >>>>-
> >>>>-        if (ret)
> >>>>-            return ret;
> >>>>-    }
> >>>>-
> >>>>      virt = ringbuf->virtual_start + ringbuf->tail;
> >>>>      rem /= 4;
> >>>>      while (rem--)
> >>>>@@ -741,40 +730,50 @@ static int logical_ring_wrap_buffer(struct
> >>>>drm_i915_gem_request *req)
> >>>>
> >>>>      ringbuf->tail = 0;
> >>>>      intel_ring_update_space(ringbuf);
> >>>>-
> >>>>-    return 0;
> >>>>  }
> >>>>
> >>>>  static int logical_ring_prepare(struct drm_i915_gem_request *req,
> >>>>int bytes)
> >>>>  {
> >>>>      struct intel_ringbuffer *ringbuf = req->ringbuf;
> >>>>-    int ret;
> >>>>-
> >>>>-    /*
> >>>>-     * Add on the reserved size to the request to make sure that after
> >>>>-     * the intended commands have been emitted, there is guaranteed to
> >>>>-     * still be enough free space to send them to the hardware.
> >>>>-     */
> >>>>-    if (!ringbuf->reserved_in_use)
> >>>>-        bytes += ringbuf->reserved_size;
> >>>>-
> >>>>-    if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
> >>>>-        ret = logical_ring_wrap_buffer(req);
> >>>>-        if (unlikely(ret))
> >>>>-            return ret;
> >>>>+    int remain_usable = ringbuf->effective_size - ringbuf->tail;
> >>>>+    int remain_actual = ringbuf->size - ringbuf->tail;
> >>>
> >>>You could add a comment here (and in the legacy implementation)
> >>>explaining why we make the distinction between remain_usable and
> >>>remain_actual. Or you could add the comment when we actually use
> >>>remain_actual further down in the function. It's up to you.
> >>>
> >>>We first need to be pessimistic about how much space is left in the
> >>>ring buffer when determining the need for wrapping - therefore we use
> >>>remain_usable, which disregards the end-of-buffer padding - and then
> >>>we need to be pessimistic about how much ring space we need to wait
> >>>for - therefore we use remain_actual, which takes the end-of-buffer
> >>>padding into consideration to make sure that we're actually not
> >>>waiting for too little space.
> >>
> >>It's not about being pessimistic or optimistic. That implies there is
> >>some choice, that one version is a little bit better in one situation
> >>but either would do. Whereas this is about functional correctness. The
> >>difference between the actual ring size and the usable ring size is not
> >>some reserved space thing to make something go faster. This is about the
> >>hardware locking up if the entire buffer is used. I think 'usable' and
> >>'actual' are fairly obvious names. If you want to know the details about
> >>why there is an 'effective_size' in the first place then look up
> >>'effective_size' in the code and read the comment about i830 hangs.
> >
> >You're probably right. Let's just forget about it.
> >
> >>
> >>>If you add those comments maybe you could also rename the variables to
> >>>something like "remaining_space_usable" etc. since "remain_usable" is
> >>>more of a verb than a noun. But that's seriously nitpicking.
> >>Or maybe
> >>'remaining_space_that_is_usable_without_causing_an_i830_to_hang_because_it_skips_the_last_two_cachelines'?
> >>
> >>There is such a thing as being too verbose and making the code hard to
> >>read.
> >
> >That is indeed a very long variable name. It's 104 characters long so
> >that wouldn't clear checkpatch.pl :). My nitpicky suggestion was 9
> >characters longer. And, yeah, it's possible to get too verbose. This
> >driver is not even remotely close to that point as it stands today :).
> >
> >Thanks,
> >Tomas
> 
> Anyway, I think we're done here.
> 
> Reviewed-by: Tomas Elf <tomas.elf@intel.com>

Queued for -next.t
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Reserve space improvements
  2015-06-30 11:40 ` John.C.Harrison
  2015-06-30 14:43   ` Tomas Elf
@ 2015-07-01 22:18   ` shuang.he
  1 sibling, 0 replies; 14+ messages in thread
From: shuang.he @ 2015-07-01 22:18 UTC (permalink / raw)
  To: shuang.he, lei.a.liu, intel-gfx, John.C.Harrison

Tested-By: Intel Graphics QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6679
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
ILK                                  302/302              302/302
SNB                                  312/316              312/316
IVB                                  343/343              343/343
BYT                 -2              287/287              285/287
HSW                                  380/380              380/380
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*BYT  igt@gem_partial_pwrite_pread@reads-display      PASS(1)      FAIL(1)
*BYT  igt@gem_tiled_partial_pwrite_pread@reads      PASS(1)      FAIL(1)
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-07-01 22:18 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-24 17:03 [PATCH] drm/i915: Reserve space improvements John.C.Harrison
2015-06-25 18:38 ` Tomas Elf
2015-06-26 18:04   ` Dave Gordon
2015-06-28 21:11 ` shuang.he
2015-06-29 16:36 ` John.C.Harrison
2015-06-30  7:26   ` shuang.he
2015-06-30 11:33   ` Tomas Elf
2015-06-30 11:40 ` John.C.Harrison
2015-06-30 14:43   ` Tomas Elf
2015-06-30 15:53     ` John Harrison
2015-06-30 16:15       ` Tomas Elf
2015-07-01 10:44         ` Tomas Elf
2015-07-01 12:29           ` Daniel Vetter
2015-07-01 22:18   ` shuang.he

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.