All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: <intel-gfx@lists.freedesktop.org>, <dri-devel@lists.freedesktop.org>
Cc: <john.c.harrison@intel.com>
Subject: [PATCH 23/25] drm/i915: Update I915_GEM_BUSY IOCTL to understand composite fences
Date: Thu, 14 Oct 2021 10:20:03 -0700	[thread overview]
Message-ID: <20211014172005.27155-24-matthew.brost@intel.com> (raw)
In-Reply-To: <20211014172005.27155-1-matthew.brost@intel.com>

Parallel submission create composite fences (dma_fence_array) for excl /
shared slots in objects. The I915_GEM_BUSY IOCTL checks these slots to
determine the busyness of the object. Prior to patch it only check if
the fence in the slot was a i915_request. Update the check to understand
composite fences and correctly report the busyness.

v2:
 (Tvrtko)
  - Remove duplicate BUILD_BUG_ON

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_busy.c      | 57 +++++++++++++++----
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  5 +-
 drivers/gpu/drm/i915/i915_request.h           |  6 ++
 3 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
index 6234e17259c1..7358bebef15c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -4,6 +4,8 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
+#include <linux/dma-fence-array.h>
+
 #include "gt/intel_engine.h"
 
 #include "i915_gem_ioctls.h"
@@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id)
 }
 
 static __always_inline unsigned int
-__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
+__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id))
 {
 	const struct i915_request *rq;
 
@@ -46,29 +48,60 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
 	 * to eventually flush us, but to minimise latency just ask the
 	 * hardware.
 	 *
-	 * Note we only report on the status of native fences.
+	 * Note we only report on the status of native fences and we currently
+	 * have two native fences:
+	 *
+	 * 1. A composite fence (dma_fence_array) constructed of i915 requests
+	 * created during a parallel submission. In this case we deconstruct the
+	 * composite fence into individual i915 requests and check the status of
+	 * each request.
+	 *
+	 * 2. A single i915 request.
 	 */
-	if (!dma_fence_is_i915(fence))
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+		struct dma_fence **child = array->fences;
+		unsigned int nchild = array->num_fences;
+
+		do {
+			struct dma_fence *current_fence = *child++;
+
+			/* Not an i915 fence, can't be busy per above */
+			if (!dma_fence_is_i915(current_fence) ||
+			    !test_bit(I915_FENCE_FLAG_COMPOSITE,
+				      &current_fence->flags)) {
+				return 0;
+			}
+
+			rq = to_request(current_fence);
+			if (!i915_request_completed(rq))
+				return flag(rq->engine->uabi_class);
+		} while (--nchild);
+
+		/* All requests in array complete, not busy */
 		return 0;
+	} else {
+		if (!dma_fence_is_i915(fence))
+			return 0;
 
-	/* opencode to_request() in order to avoid const warnings */
-	rq = container_of(fence, const struct i915_request, fence);
-	if (i915_request_completed(rq))
-		return 0;
+		rq = to_request(fence);
+		if (i915_request_completed(rq))
+			return 0;
 
-	/* Beware type-expansion follies! */
-	BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
-	return flag(rq->engine->uabi_class);
+		/* Beware type-expansion follies! */
+		BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
+		return flag(rq->engine->uabi_class);
+	}
 }
 
 static __always_inline unsigned int
-busy_check_reader(const struct dma_fence *fence)
+busy_check_reader(struct dma_fence *fence)
 {
 	return __busy_set_if_active(fence, __busy_read_flag);
 }
 
 static __always_inline unsigned int
-busy_check_writer(const struct dma_fence *fence)
+busy_check_writer(struct dma_fence *fence)
 {
 	if (!fence)
 		return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index fc30856e81fa..1231224728e4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -3008,8 +3008,11 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd)
 	if (!fences)
 		return ERR_PTR(-ENOMEM);
 
-	for_each_batch_create_order(eb, i)
+	for_each_batch_create_order(eb, i) {
 		fences[i] = &eb->requests[i]->fence;
+		__set_bit(I915_FENCE_FLAG_COMPOSITE,
+			  &eb->requests[i]->fence.flags);
+	}
 
 	fence_array = dma_fence_array_create(eb->num_batches,
 					     fences,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 24db8459376b..dc359242d1ae 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -156,6 +156,12 @@ enum {
 	 * submission / relationship encoutered an error.
 	 */
 	I915_FENCE_FLAG_SKIP_PARALLEL,
+
+	/*
+	 * I915_FENCE_FLAG_COMPOSITE - Indicates fence is part of a composite
+	 * fence (dma_fence_array) and i915 generated for parallel submission.
+	 */
+	I915_FENCE_FLAG_COMPOSITE,
 };
 
 /**
-- 
2.32.0


WARNING: multiple messages have this Message-ID (diff)
From: Matthew Brost <matthew.brost@intel.com>
To: <intel-gfx@lists.freedesktop.org>, <dri-devel@lists.freedesktop.org>
Cc: <john.c.harrison@intel.com>
Subject: [Intel-gfx] [PATCH 23/25] drm/i915: Update I915_GEM_BUSY IOCTL to understand composite fences
Date: Thu, 14 Oct 2021 10:20:03 -0700	[thread overview]
Message-ID: <20211014172005.27155-24-matthew.brost@intel.com> (raw)
In-Reply-To: <20211014172005.27155-1-matthew.brost@intel.com>

Parallel submission create composite fences (dma_fence_array) for excl /
shared slots in objects. The I915_GEM_BUSY IOCTL checks these slots to
determine the busyness of the object. Prior to patch it only check if
the fence in the slot was a i915_request. Update the check to understand
composite fences and correctly report the busyness.

v2:
 (Tvrtko)
  - Remove duplicate BUILD_BUG_ON

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_busy.c      | 57 +++++++++++++++----
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  5 +-
 drivers/gpu/drm/i915/i915_request.h           |  6 ++
 3 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
index 6234e17259c1..7358bebef15c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -4,6 +4,8 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
+#include <linux/dma-fence-array.h>
+
 #include "gt/intel_engine.h"
 
 #include "i915_gem_ioctls.h"
@@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id)
 }
 
 static __always_inline unsigned int
-__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
+__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id))
 {
 	const struct i915_request *rq;
 
@@ -46,29 +48,60 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
 	 * to eventually flush us, but to minimise latency just ask the
 	 * hardware.
 	 *
-	 * Note we only report on the status of native fences.
+	 * Note we only report on the status of native fences and we currently
+	 * have two native fences:
+	 *
+	 * 1. A composite fence (dma_fence_array) constructed of i915 requests
+	 * created during a parallel submission. In this case we deconstruct the
+	 * composite fence into individual i915 requests and check the status of
+	 * each request.
+	 *
+	 * 2. A single i915 request.
 	 */
-	if (!dma_fence_is_i915(fence))
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+		struct dma_fence **child = array->fences;
+		unsigned int nchild = array->num_fences;
+
+		do {
+			struct dma_fence *current_fence = *child++;
+
+			/* Not an i915 fence, can't be busy per above */
+			if (!dma_fence_is_i915(current_fence) ||
+			    !test_bit(I915_FENCE_FLAG_COMPOSITE,
+				      &current_fence->flags)) {
+				return 0;
+			}
+
+			rq = to_request(current_fence);
+			if (!i915_request_completed(rq))
+				return flag(rq->engine->uabi_class);
+		} while (--nchild);
+
+		/* All requests in array complete, not busy */
 		return 0;
+	} else {
+		if (!dma_fence_is_i915(fence))
+			return 0;
 
-	/* opencode to_request() in order to avoid const warnings */
-	rq = container_of(fence, const struct i915_request, fence);
-	if (i915_request_completed(rq))
-		return 0;
+		rq = to_request(fence);
+		if (i915_request_completed(rq))
+			return 0;
 
-	/* Beware type-expansion follies! */
-	BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
-	return flag(rq->engine->uabi_class);
+		/* Beware type-expansion follies! */
+		BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
+		return flag(rq->engine->uabi_class);
+	}
 }
 
 static __always_inline unsigned int
-busy_check_reader(const struct dma_fence *fence)
+busy_check_reader(struct dma_fence *fence)
 {
 	return __busy_set_if_active(fence, __busy_read_flag);
 }
 
 static __always_inline unsigned int
-busy_check_writer(const struct dma_fence *fence)
+busy_check_writer(struct dma_fence *fence)
 {
 	if (!fence)
 		return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index fc30856e81fa..1231224728e4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -3008,8 +3008,11 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd)
 	if (!fences)
 		return ERR_PTR(-ENOMEM);
 
-	for_each_batch_create_order(eb, i)
+	for_each_batch_create_order(eb, i) {
 		fences[i] = &eb->requests[i]->fence;
+		__set_bit(I915_FENCE_FLAG_COMPOSITE,
+			  &eb->requests[i]->fence.flags);
+	}
 
 	fence_array = dma_fence_array_create(eb->num_batches,
 					     fences,
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 24db8459376b..dc359242d1ae 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -156,6 +156,12 @@ enum {
 	 * submission / relationship encoutered an error.
 	 */
 	I915_FENCE_FLAG_SKIP_PARALLEL,
+
+	/*
+	 * I915_FENCE_FLAG_COMPOSITE - Indicates fence is part of a composite
+	 * fence (dma_fence_array) and i915 generated for parallel submission.
+	 */
+	I915_FENCE_FLAG_COMPOSITE,
 };
 
 /**
-- 
2.32.0


  parent reply	other threads:[~2021-10-14 17:47 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-14 17:19 [PATCH 00/25] Parallel submission aka multi-bb execbuf Matthew Brost
2021-10-14 17:19 ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 01/25] drm/i915/guc: Move GuC guc_id allocation under submission state sub-struct Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 02/25] drm/i915/guc: Take GT PM ref when deregistering context Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 03/25] drm/i915/guc: Take engine PM when a context is pinned with GuC submission Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 04/25] drm/i915/guc: Don't call switch_to_kernel_context " Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 05/25] drm/i915: Add logical engine mapping Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 06/25] drm/i915: Expose logical engine instance to user Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 07/25] drm/i915/guc: Introduce context parent-child relationship Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 08/25] drm/i915/guc: Add multi-lrc context registration Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 18:18   ` John Harrison
2021-10-14 18:18     ` [Intel-gfx] " John Harrison
2021-10-14 17:19 ` [PATCH 09/25] drm/i915/guc: Ensure GuC schedule operations do not operate on child contexts Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 10/25] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 11/25] drm/i915/guc: Implement parallel context pin / unpin functions Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 12/25] drm/i915/guc: Implement multi-lrc submission Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 13/25] drm/i915/guc: Insert submit fences between requests in parent-child relationship Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 14/25] drm/i915/guc: Implement multi-lrc reset Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 15/25] drm/i915/guc: Update debugfs for GuC multi-lrc Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 16/25] drm/i915/guc: Connect UAPI to GuC multi-lrc interface Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 18:24   ` John Harrison
2021-10-14 18:24     ` [Intel-gfx] " John Harrison
2021-10-14 17:19 ` [PATCH 17/25] drm/i915/doc: Update parallel submit doc to point to i915_drm.h Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 18/25] drm/i915/guc: Add basic GuC multi-lrc selftest Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:19 ` [PATCH 19/25] drm/i915/guc: Implement no mid batch preemption for multi-lrc Matthew Brost
2021-10-14 17:19   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:20 ` [PATCH 20/25] drm/i915: Multi-BB execbuf Matthew Brost
2021-10-14 17:20   ` [Intel-gfx] " Matthew Brost
2021-10-14 18:27   ` John Harrison
2021-10-14 18:27     ` [Intel-gfx] " John Harrison
2021-10-14 17:20 ` [PATCH 21/25] drm/i915/guc: Handle errors in multi-lrc requests Matthew Brost
2021-10-14 17:20   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:20 ` [PATCH 22/25] drm/i915: Make request conflict tracking understand parallel submits Matthew Brost
2021-10-14 17:20   ` [Intel-gfx] " Matthew Brost
2021-10-14 17:20 ` Matthew Brost [this message]
2021-10-14 17:20   ` [Intel-gfx] [PATCH 23/25] drm/i915: Update I915_GEM_BUSY IOCTL to understand composite fences Matthew Brost
2021-10-14 17:20 ` [PATCH 24/25] drm/i915: Enable multi-bb execbuf Matthew Brost
2021-10-14 17:20   ` [Intel-gfx] " Matthew Brost
2021-10-14 18:29   ` John Harrison
2021-10-14 18:29     ` [Intel-gfx] " John Harrison
2021-10-14 17:20 ` [PATCH 25/25] drm/i915/execlists: Weak parallel submission support for execlists Matthew Brost
2021-10-14 17:20   ` [Intel-gfx] " Matthew Brost
2021-10-14 18:42   ` John Harrison
2021-10-14 18:42     ` [Intel-gfx] " John Harrison
2021-10-14 18:55     ` Matthew Brost
2021-10-14 18:55       ` [Intel-gfx] " Matthew Brost
2021-10-14 23:50 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for Parallel submission aka multi-bb execbuf (rev7) Patchwork
2021-10-14 23:51 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2021-10-15  0:25 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-10-15  6:12 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2021-10-13 20:42 [PATCH 00/25] Parallel submission aka multi-bb execbuf Matthew Brost
2021-10-13 20:42 ` [PATCH 23/25] drm/i915: Update I915_GEM_BUSY IOCTL to understand composite fences Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211014172005.27155-24-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=john.c.harrison@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.