* [PATCH 2/4] drm/i915/guc: Submit GuC workitems containing coalesced requests
2017-09-13 11:06 [PATCH 1/4] drm/i915/guc: Remove obsolete comments and remove unused variable Chris Wilson
@ 2017-09-13 11:06 ` Chris Wilson
2017-09-13 11:06 ` [PATCH 3/4] polish-doorbell Chris Wilson
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Chris Wilson @ 2017-09-13 11:06 UTC (permalink / raw)
To: intel-gfx
From: Michał Winiarski <michal.winiarski@intel.com>
To create an upper bound on number of GuC workitems, we need to change
the way that requests are being submitted. Rather than submitting each
request as an individual workitem, we can do coalescing in a similar way
we're handlig execlist submission ports. We also need to stop pretending
that we're doing "lite-restore" in GuC submission (we would create a
workitem each time we hit this condition). This allows us to completely
remove the reservation, replacing it with a compile time check.
v2: Also coalesce when replaying on reset (Daniele)
v3: Consistent wq_resv - per-request (Daniele)
v4: Squash removing wq_resv
References: https://bugs.freedesktop.org/show_bug.cgi?id=101873
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Jeff McGee <jeff.mcgee@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20170912124726.19689-2-michal.winiarski@intel.com
---
drivers/gpu/drm/i915/i915_debugfs.c | 2 -
drivers/gpu/drm/i915/i915_guc_submission.c | 179 ++++++++++-------------------
drivers/gpu/drm/i915/intel_lrc.c | 25 +---
drivers/gpu/drm/i915/intel_uc.h | 11 --
4 files changed, 62 insertions(+), 155 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6338018f655d..f5fd00cfb3b0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2450,8 +2450,6 @@ static void i915_guc_client_info(struct seq_file *m,
seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
client->wq_size, client->wq_offset, client->wq_tail);
- seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
-
for_each_engine(engine, dev_priv, id) {
u64 submissions = client->submissions[id];
tot += submissions;
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 9828c77910b1..28aee8e1c89b 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -406,63 +406,6 @@ static void guc_stage_desc_fini(struct intel_guc *guc,
memset(desc, 0, sizeof(*desc));
}
-/**
- * i915_guc_wq_reserve() - reserve space in the GuC's workqueue
- * @request: request associated with the commands
- *
- * Return: 0 if space is available
- * -EAGAIN if space is not currently available
- *
- * This function must be called (and must return 0) before a request
- * is submitted to the GuC via i915_guc_submit() below. Once a result
- * of 0 has been returned, it must be balanced by a corresponding
- * call to submit().
- *
- * Reservation allows the caller to determine in advance that space
- * will be available for the next submission before committing resources
- * to it, and helps avoid late failures with complicated recovery paths.
- */
-int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
-{
- const size_t wqi_size = sizeof(struct guc_wq_item);
- struct i915_guc_client *client = request->i915->guc.execbuf_client;
- struct guc_process_desc *desc = __get_process_desc(client);
- u32 freespace;
- int ret;
-
- spin_lock_irq(&client->wq_lock);
- freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
- freespace -= client->wq_rsvd;
- if (likely(freespace >= wqi_size)) {
- client->wq_rsvd += wqi_size;
- ret = 0;
- } else {
- client->no_wq_space++;
- ret = -EAGAIN;
- }
- spin_unlock_irq(&client->wq_lock);
-
- return ret;
-}
-
-static void guc_client_update_wq_rsvd(struct i915_guc_client *client, int size)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&client->wq_lock, flags);
- client->wq_rsvd += size;
- spin_unlock_irqrestore(&client->wq_lock, flags);
-}
-
-void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
-{
- const int wqi_size = sizeof(struct guc_wq_item);
- struct i915_guc_client *client = request->i915->guc.execbuf_client;
-
- GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
- guc_client_update_wq_rsvd(client, -wqi_size);
-}
-
/* Construct a Work Item and append it to the GuC's Work Queue */
static void guc_wq_item_append(struct i915_guc_client *client,
struct drm_i915_gem_request *rq)
@@ -476,7 +419,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
struct guc_wq_item *wqi;
u32 freespace, tail, wq_off;
- /* Free space is guaranteed, see i915_guc_wq_reserve() above */
+ /* Free space is guaranteed */
freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
GEM_BUG_ON(freespace < wqi_size);
@@ -491,14 +434,12 @@ static void guc_wq_item_append(struct i915_guc_client *client,
* workqueue buffer dw by dw.
*/
BUILD_BUG_ON(wqi_size != 16);
- GEM_BUG_ON(client->wq_rsvd < wqi_size);
/* postincrement WQ tail for next time */
wq_off = client->wq_tail;
GEM_BUG_ON(wq_off & (wqi_size - 1));
client->wq_tail += wqi_size;
client->wq_tail &= client->wq_size - 1;
- client->wq_rsvd -= wqi_size;
/* WQ starts from the page after doorbell / process_desc */
wqi = client->vaddr + wq_off + GUC_DB_SIZE;
@@ -579,47 +520,43 @@ static int guc_ring_doorbell(struct i915_guc_client *client)
}
/**
- * __i915_guc_submit() - Submit commands through GuC
+ * i915_guc_submit() - Submit commands through GuC
* @rq: request associated with the commands
*
- * The caller must have already called i915_guc_wq_reserve() above with
- * a result of 0 (success), guaranteeing that there is space in the work
- * queue for the new request, so enqueuing the item cannot fail.
- *
- * Bad Things Will Happen if the caller violates this protocol e.g. calls
- * submit() when _reserve() says there's no space, or calls _submit()
- * a different number of times from (successful) calls to _reserve().
- *
* The only error here arises if the doorbell hardware isn't functioning
* as expected, which really shouln't happen.
*/
-static void __i915_guc_submit(struct drm_i915_gem_request *rq)
+static void i915_guc_submit(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = rq->i915;
- struct intel_engine_cs *engine = rq->engine;
- unsigned int engine_id = engine->id;
- struct intel_guc *guc = &rq->i915->guc;
+ struct drm_i915_private *dev_priv = engine->i915;
+ struct intel_guc *guc = &dev_priv->guc;
struct i915_guc_client *client = guc->execbuf_client;
+ struct execlist_port *port = engine->execlist_port;
+ unsigned int engine_id = engine->id;
+ unsigned int n;
unsigned long flags;
- /* WA to flush out the pending GMADR writes to ring buffer. */
- if (i915_vma_is_map_and_fenceable(rq->ring->vma))
- POSTING_READ_FW(GUC_STATUS);
+ for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+ struct drm_i915_gem_request *rq;
+ unsigned int count;
- spin_lock_irqsave(&client->wq_lock, flags);
+ rq = port_unpack(&port[n], &count);
+ if (rq && count == 0) {
+ port_set(&port[n], port_pack(rq, ++count));
- guc_wq_item_append(client, rq);
- WARN_ON(guc_ring_doorbell(client));
+ if (i915_vma_is_map_and_fenceable(rq->ring->vma))
+ POSTING_READ_FW(GUC_STATUS);
- client->submissions[engine_id] += 1;
+ spin_lock_irqsave(&client->wq_lock, flags);
- spin_unlock_irqrestore(&client->wq_lock, flags);
-}
+ guc_wq_item_append(client, rq);
+ WARN_ON(guc_ring_doorbell(client));
-static void i915_guc_submit(struct drm_i915_gem_request *rq)
-{
- __i915_gem_request_submit(rq);
- __i915_guc_submit(rq);
+ client->submissions[engine_id] += 1;
+
+ spin_unlock_irqrestore(&client->wq_lock, flags);
+ }
+ }
}
static void nested_enable_signaling(struct drm_i915_gem_request *rq)
@@ -653,16 +590,19 @@ static void port_assign(struct execlist_port *port,
if (port_isset(port))
i915_gem_request_put(port_request(port));
- port_set(port, i915_gem_request_get(rq));
+ port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
nested_enable_signaling(rq);
}
-static bool i915_guc_dequeue(struct intel_engine_cs *engine)
+static void i915_guc_dequeue(struct intel_engine_cs *engine)
{
struct execlist_port *port = engine->execlist_port;
- struct drm_i915_gem_request *last = port_request(port);
- struct rb_node *rb;
+ struct drm_i915_gem_request *last = NULL;
bool submit = false;
+ struct rb_node *rb;
+
+ if (port_isset(port))
+ port++;
spin_lock_irq(&engine->timeline->lock);
rb = engine->execlist_first;
@@ -687,7 +627,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&rq->priotree.link);
rq->priotree.priority = INT_MAX;
- i915_guc_submit(rq);
+ __i915_gem_request_submit(rq);
trace_i915_gem_request_in(rq, port_index(port, engine));
last = rq;
submit = true;
@@ -701,11 +641,11 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
}
done:
engine->execlist_first = rb;
- if (submit)
+ if (submit) {
port_assign(port, last);
+ i915_guc_submit(engine);
+ }
spin_unlock_irq(&engine->timeline->lock);
-
- return submit;
}
static void i915_guc_irq_handler(unsigned long data)
@@ -713,24 +653,20 @@ static void i915_guc_irq_handler(unsigned long data)
struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
struct execlist_port *port = engine->execlist_port;
struct drm_i915_gem_request *rq;
- bool submit;
- do {
- rq = port_request(&port[0]);
- while (rq && i915_gem_request_completed(rq)) {
- trace_i915_gem_request_out(rq);
- i915_gem_request_put(rq);
+ rq = port_request(&port[0]);
+ while (rq && i915_gem_request_completed(rq)) {
+ trace_i915_gem_request_out(rq);
+ i915_gem_request_put(rq);
- port[0] = port[1];
- memset(&port[1], 0, sizeof(port[1]));
+ port[0] = port[1];
+ memset(&port[1], 0, sizeof(port[1]));
- rq = port_request(&port[0]);
- }
+ rq = port_request(&port[0]);
+ }
- submit = false;
- if (!port_count(&port[1]))
- submit = i915_guc_dequeue(engine);
- } while (submit);
+ if (!port_isset(&port[1]))
+ i915_guc_dequeue(engine);
}
/*
@@ -1224,6 +1160,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
enum intel_engine_id id;
int err;
+ /*
+ * We're using GuC work items for submitting work through GuC. Since
+ * we're coalescing multiple requests from a single context into a
+ * single work item prior to assigning it to execlist_port, we can
+ * never have more work items than the total number of ports (for all
+ * engines). The GuC firmware is controlling the HEAD of work queue,
+ * and it is guaranteed that it will remove the work item from the
+ * queue before our request is completed.
+ */
+ BUILD_BUG_ON(ARRAY_SIZE(engine->execlist_port) *
+ sizeof(struct guc_wq_item) *
+ I915_NUM_ENGINES > GUC_WQ_SIZE);
+
if (!client) {
client = guc_client_alloc(dev_priv,
INTEL_INFO(dev_priv)->ring_mask,
@@ -1251,9 +1200,6 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
guc_interrupts_capture(dev_priv);
for_each_engine(engine, dev_priv, id) {
- const int wqi_size = sizeof(struct guc_wq_item);
- struct drm_i915_gem_request *rq;
-
/* The tasklet was initialised by execlists, and may be in
* a state of flux (across a reset) and so we just want to
* take over the callback without changing any other state
@@ -1261,14 +1207,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
*/
engine->irq_tasklet.func = i915_guc_irq_handler;
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-
- /* Replay the current set of previously submitted requests */
- spin_lock_irq(&engine->timeline->lock);
- list_for_each_entry(rq, &engine->timeline->requests, link) {
- guc_client_update_wq_rsvd(client, wqi_size);
- __i915_guc_submit(rq);
- }
- spin_unlock_irq(&engine->timeline->lock);
+ i915_guc_submit(engine);
}
return 0;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9e4342b8e52b..7457783ed182 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -908,27 +908,14 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
*/
request->reserved_space += EXECLISTS_REQUEST_SIZE;
- if (i915.enable_guc_submission) {
- /*
- * Check that the GuC has space for the request before
- * going any further, as the i915_add_request() call
- * later on mustn't fail ...
- */
- ret = i915_guc_wq_reserve(request);
- if (ret)
- goto err;
- }
-
cs = intel_ring_begin(request, 0);
- if (IS_ERR(cs)) {
- ret = PTR_ERR(cs);
- goto err_unreserve;
- }
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
if (!ce->initialised) {
ret = engine->init_context(request);
if (ret)
- goto err_unreserve;
+ return ret;
ce->initialised = true;
}
@@ -942,12 +929,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
request->reserved_space -= EXECLISTS_REQUEST_SIZE;
return 0;
-
-err_unreserve:
- if (i915.enable_guc_submission)
- i915_guc_wq_unreserve(request);
-err:
- return ret;
}
/*
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 69daf4c01cd0..d41051688221 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -52,13 +52,6 @@ struct drm_i915_gem_request;
* GuC). The subsequent pages of the client object constitute the work
* queue (a circular array of work items), again described in the process
* descriptor. Work queue pages are mapped momentarily as required.
- *
- * We also keep a few statistics on failures. Ideally, these should all
- * be zero!
- * no_wq_space: times that the submission pre-check found no space was
- * available in the work queue (note, the queue is shared,
- * not per-engine). It is OK for this to be nonzero, but
- * it should not be huge!
*/
struct i915_guc_client {
struct i915_vma *vma;
@@ -79,8 +72,6 @@ struct i915_guc_client {
uint32_t wq_offset;
uint32_t wq_size;
uint32_t wq_tail;
- uint32_t wq_rsvd;
- uint32_t no_wq_space;
/* Per-engine counts of GuC submissions */
uint64_t submissions[I915_NUM_ENGINES];
@@ -246,8 +237,6 @@ u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
/* i915_guc_submission.c */
int i915_guc_submission_init(struct drm_i915_private *dev_priv);
int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
-int i915_guc_wq_reserve(struct drm_i915_gem_request *rq);
-void i915_guc_wq_unreserve(struct drm_i915_gem_request *request);
void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/4] polish-doorbell
2017-09-13 11:06 [PATCH 1/4] drm/i915/guc: Remove obsolete comments and remove unused variable Chris Wilson
2017-09-13 11:06 ` [PATCH 2/4] drm/i915/guc: Submit GuC workitems containing coalesced requests Chris Wilson
@ 2017-09-13 11:06 ` Chris Wilson
2017-09-13 11:06 ` [PATCH 4/4] HAX Enable GuC Submission for CI Chris Wilson
2017-09-13 11:57 ` ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/guc: Remove obsolete comments and remove unused variable Patchwork
3 siblings, 0 replies; 5+ messages in thread
From: Chris Wilson @ 2017-09-13 11:06 UTC (permalink / raw)
To: intel-gfx
---
drivers/gpu/drm/i915/i915_debugfs.c | 4 +-
drivers/gpu/drm/i915/i915_guc_submission.c | 61 ++++++------------------------
drivers/gpu/drm/i915/intel_uc.h | 1 -
3 files changed, 13 insertions(+), 53 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f5fd00cfb3b0..40a315fc0e31 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2445,8 +2445,8 @@ static void i915_guc_client_info(struct seq_file *m,
seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n",
client->priority, client->stage_id, client->proc_desc_offset);
- seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n",
- client->doorbell_id, client->doorbell_offset, client->doorbell_cookie);
+ seq_printf(m, "\tDoorbell id %d, offset: 0x%lx\n",
+ client->doorbell_id, client->doorbell_offset);
seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
client->wq_size, client->wq_offset, client->wq_tail);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 28aee8e1c89b..8e691952f38d 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -192,13 +192,12 @@ static int __create_doorbell(struct i915_guc_client *client)
doorbell = __get_doorbell(client);
doorbell->db_status = GUC_DOORBELL_ENABLED;
- doorbell->cookie = client->doorbell_cookie;
+ doorbell->cookie = 0;
err = __guc_allocate_doorbell(client->guc, client->stage_id);
- if (err) {
+ if (err)
doorbell->db_status = GUC_DOORBELL_DISABLED;
- doorbell->cookie = 0;
- }
+
return err;
}
@@ -466,57 +465,19 @@ static void guc_reset_wq(struct i915_guc_client *client)
client->wq_tail = 0;
}
-static int guc_ring_doorbell(struct i915_guc_client *client)
+static void guc_ring_doorbell(struct i915_guc_client *client)
{
struct guc_process_desc *desc = __get_process_desc(client);
- union guc_doorbell_qw db_cmp, db_exc, db_ret;
- union guc_doorbell_qw *db;
- int attempt = 2, ret = -EAGAIN;
+ struct guc_doorbell_info *db = __get_doorbell(client);
+ u32 cookie;
/* Update the tail so it is visible to GuC */
desc->tail = client->wq_tail;
- /* current cookie */
- db_cmp.db_status = GUC_DOORBELL_ENABLED;
- db_cmp.cookie = client->doorbell_cookie;
-
- /* cookie to be updated */
- db_exc.db_status = GUC_DOORBELL_ENABLED;
- db_exc.cookie = client->doorbell_cookie + 1;
- if (db_exc.cookie == 0)
- db_exc.cookie = 1;
-
- /* pointer of current doorbell cacheline */
- db = (union guc_doorbell_qw *)__get_doorbell(client);
-
- while (attempt--) {
- /* lets ring the doorbell */
- db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db,
- db_cmp.value_qw, db_exc.value_qw);
-
- /* if the exchange was successfully executed */
- if (db_ret.value_qw == db_cmp.value_qw) {
- /* db was successfully rung */
- client->doorbell_cookie = db_exc.cookie;
- ret = 0;
- break;
- }
-
- /* XXX: doorbell was lost and need to acquire it again */
- if (db_ret.db_status == GUC_DOORBELL_DISABLED)
- break;
-
- DRM_WARN("Cookie mismatch. Expected %d, found %d\n",
- db_cmp.cookie, db_ret.cookie);
-
- /* update the cookie to newly read cookie from GuC */
- db_cmp.cookie = db_ret.cookie;
- db_exc.cookie = db_ret.cookie + 1;
- if (db_exc.cookie == 0)
- db_exc.cookie = 1;
- }
-
- return ret;
+ do {
+ cookie = READ_ONCE(db->cookie);
+ } while (cmpxchg(&db->cookie, cookie, cookie + 1) != cookie);
+ GEM_BUG_ON(db->db_status != GUC_DOORBELL_ENABLED);
}
/**
@@ -550,7 +511,7 @@ static void i915_guc_submit(struct intel_engine_cs *engine)
spin_lock_irqsave(&client->wq_lock, flags);
guc_wq_item_append(client, rq);
- WARN_ON(guc_ring_doorbell(client));
+ guc_ring_doorbell(client);
client->submissions[engine_id] += 1;
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index d41051688221..f505dcccd613 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -66,7 +66,6 @@ struct i915_guc_client {
u16 doorbell_id;
unsigned long doorbell_offset;
- u32 doorbell_cookie;
spinlock_t wq_lock;
uint32_t wq_offset;
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread