All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/i915/preemption: Allow preemption between submission ports
Date: Fri, 23 Feb 2018 17:18:17 +0200	[thread overview]
Message-ID: <877er3spja.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20180222142229.14517-1-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Sometimes we need to boost the priority of an in-flight request, which
> may lead to the situation where the second submission port then contains
> a higher priority context than the first and so we need to inject a
> preemption event. To do so we must always check inside
> execlists_dequeue() whether there is a priority inversion between the
> ports themselves as well as the head of the priority sorted queue, and we
> cannot just skip dequeuing if the queue is empty.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
> Rebase for conflicts
> -Chris
> ---
>  drivers/gpu/drm/i915/intel_engine_cs.c      |   2 +
>  drivers/gpu/drm/i915/intel_guc_submission.c |  17 +--
>  drivers/gpu/drm/i915/intel_lrc.c            | 161 ++++++++++++++++------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h     |   5 +
>  4 files changed, 107 insertions(+), 78 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index c31544406974..ce7fcf55ba18 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
>  	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
>  	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>  
> +	execlists->queue_priority = INT_MIN;
>  	execlists->queue = RB_ROOT;
>  	execlists->first = NULL;
>  }
> @@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>  	spin_lock_irq(&engine->timeline->lock);
>  	list_for_each_entry(rq, &engine->timeline->requests, link)
>  		print_request(m, rq, "\t\tE ");
> +	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
>  	for (rb = execlists->first; rb; rb = rb_next(rb)) {
>  		struct i915_priolist *p =
>  			rb_entry(rb, typeof(*p), node);
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 649113c7a3c2..586dde579903 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -75,6 +75,11 @@
>   *
>   */
>  
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> +	return rb_entry(rb, struct i915_priolist, node);
> +}
> +
>  static inline bool is_high_priority(struct intel_guc_client *client)
>  {
>  	return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
> @@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>  	rb = execlists->first;
>  	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
>  
> -	if (!rb)
> -		goto unlock;
> -
>  	if (port_isset(port)) {
>  		if (engine->i915->preempt_context) {
>  			struct guc_preempt_work *preempt_work =
>  				&engine->i915->guc.preempt_work[engine->id];
>  
> -			if (rb_entry(rb, struct i915_priolist, node)->priority >
> +			if (execlists->queue_priority >
>  			    max(port_request(port)->priotree.priority, 0)) {
>  				execlists_set_active(execlists,
>  						     EXECLISTS_ACTIVE_PREEMPT);
> @@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>  	}
>  	GEM_BUG_ON(port_isset(port));
>  
> -	do {
> -		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +	while (rb) {
> +		struct i915_priolist *p = to_priolist(rb);
>  		struct i915_request *rq, *rn;
>  
>  		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> @@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>  		INIT_LIST_HEAD(&p->requests);
>  		if (p->priority != I915_PRIORITY_NORMAL)
>  			kmem_cache_free(engine->i915->priorities, p);
> -	} while (rb);
> +	}
>  done:
> +	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
>  	execlists->first = rb;
>  	if (submit) {
>  		port_assign(port, last);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 964885b5d7cb..4bc72fbaf793 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
>  				     struct intel_engine_cs *engine,
>  				     struct intel_ring *ring);
>  
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> +	return rb_entry(rb, struct i915_priolist, node);
> +}
> +
> +static inline int rq_prio(const struct i915_request *rq)
> +{
> +	return rq->priotree.priority;
> +}
> +
> +static inline bool need_preempt(const struct intel_engine_cs *engine,
> +                                const struct i915_request *last,
> +                                int prio)
> +{
> +	return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
> +}
> +
>  /**
>   * intel_lr_context_descriptor_update() - calculate & cache the descriptor
>   * 					  descriptor for a pinned context
> @@ -224,7 +241,7 @@ lookup_priolist(struct intel_engine_cs *engine,
>  	parent = &execlists->queue.rb_node;
>  	while (*parent) {
>  		rb = *parent;
> -		p = rb_entry(rb, typeof(*p), node);
> +		p = to_priolist(rb);
>  		if (prio > p->priority) {
>  			parent = &rb->rb_left;
>  		} else if (prio < p->priority) {
> @@ -264,7 +281,7 @@ lookup_priolist(struct intel_engine_cs *engine,
>  	if (first)
>  		execlists->first = &p->node;
>  
> -	return ptr_pack_bits(p, first, 1);
> +	return p;
>  }
>  
>  static void unwind_wa_tail(struct i915_request *rq)
> @@ -290,14 +307,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>  		__i915_request_unsubmit(rq);
>  		unwind_wa_tail(rq);
>  
> -		GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
> -		if (rq->priotree.priority != last_prio) {
> -			p = lookup_priolist(engine,
> -					    &rq->priotree,
> -					    rq->priotree.priority);
> -			p = ptr_mask_bits(p, 1);
> -
> -			last_prio = rq->priotree.priority;
> +		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
> +		if (rq_prio(rq) != last_prio) {
> +			last_prio = rq_prio(rq);
> +			p = lookup_priolist(engine, &rq->priotree, last_prio);
>  		}
>  
>  		list_add(&rq->priotree.link, &p->requests);
> @@ -397,10 +410,11 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>  			desc = execlists_update_context(rq);
>  			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
>  
> -			GEM_TRACE("%s in[%d]:  ctx=%d.%d, seqno=%x\n",
> +			GEM_TRACE("%s in[%d]:  ctx=%d.%d, seqno=%x, prio=%d\n",
>  				  engine->name, n,
>  				  port[n].context_id, count,
> -				  rq->global_seqno);
> +				  rq->global_seqno,
> +				  rq_prio(rq));
>  		} else {
>  			GEM_BUG_ON(!n);
>  			desc = 0;
> @@ -453,12 +467,17 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>  		   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>  				      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>  
> +	/*
> +	 * Switch to our empty preempt context so
> +	 * the state of the GPU is known (idle).
> +	 */
>  	GEM_TRACE("%s\n", engine->name);
>  	for (n = execlists_num_ports(&engine->execlists); --n; )
>  		elsp_write(0, engine->execlists.elsp);
>  
>  	elsp_write(ce->lrc_desc, engine->execlists.elsp);
>  	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
> +	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>  }
>  
>  static void execlists_dequeue(struct intel_engine_cs *engine)
> @@ -495,8 +514,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  	spin_lock_irq(&engine->timeline->lock);
>  	rb = execlists->first;
>  	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> -	if (!rb)
> -		goto unlock;
>  
>  	if (last) {
>  		/*
> @@ -519,54 +536,48 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
>  			goto unlock;
>  
> -		if (engine->i915->preempt_context &&
> -		    rb_entry(rb, struct i915_priolist, node)->priority >
> -		    max(last->priotree.priority, 0)) {
> -			/*
> -			 * Switch to our empty preempt context so
> -			 * the state of the GPU is known (idle).
> -			 */
> +		if (need_preempt(engine, last, execlists->queue_priority)) {
>  			inject_preempt_context(engine);
> -			execlists_set_active(execlists,
> -					     EXECLISTS_ACTIVE_PREEMPT);
>  			goto unlock;
> -		} else {
> -			/*
> -			 * In theory, we could coalesce more requests onto
> -			 * the second port (the first port is active, with
> -			 * no preemptions pending). However, that means we
> -			 * then have to deal with the possible lite-restore
> -			 * of the second port (as we submit the ELSP, there
> -			 * may be a context-switch) but also we may complete
> -			 * the resubmission before the context-switch. Ergo,
> -			 * coalescing onto the second port will cause a
> -			 * preemption event, but we cannot predict whether
> -			 * that will affect port[0] or port[1].
> -			 *
> -			 * If the second port is already active, we can wait
> -			 * until the next context-switch before contemplating
> -			 * new requests. The GPU will be busy and we should be
> -			 * able to resubmit the new ELSP before it idles,
> -			 * avoiding pipeline bubbles (momentary pauses where
> -			 * the driver is unable to keep up the supply of new
> -			 * work).
> -			 */
> -			if (port_count(&port[1]))
> -				goto unlock;
> -
> -			/* WaIdleLiteRestore:bdw,skl
> -			 * Apply the wa NOOPs to prevent
> -			 * ring:HEAD == rq:TAIL as we resubmit the
> -			 * request. See gen8_emit_breadcrumb() for
> -			 * where we prepare the padding after the
> -			 * end of the request.
> -			 */
> -			last->tail = last->wa_tail;
>  		}
> +
> +		/*
> +		 * In theory, we could coalesce more requests onto
> +		 * the second port (the first port is active, with
> +		 * no preemptions pending). However, that means we
> +		 * then have to deal with the possible lite-restore
> +		 * of the second port (as we submit the ELSP, there
> +		 * may be a context-switch) but also we may complete
> +		 * the resubmission before the context-switch. Ergo,
> +		 * coalescing onto the second port will cause a
> +		 * preemption event, but we cannot predict whether
> +		 * that will affect port[0] or port[1].
> +		 *
> +		 * If the second port is already active, we can wait
> +		 * until the next context-switch before contemplating
> +		 * new requests. The GPU will be busy and we should be
> +		 * able to resubmit the new ELSP before it idles,
> +		 * avoiding pipeline bubbles (momentary pauses where
> +		 * the driver is unable to keep up the supply of new
> +		 * work). However, we have to double check that the
> +		 * priorities of the ports haven't been switch.
> +		 */
> +		if (port_count(&port[1]))
> +			goto unlock;
> +
> +		/*
> +		 * WaIdleLiteRestore:bdw,skl
> +		 * Apply the wa NOOPs to prevent
> +		 * ring:HEAD == rq:TAIL as we resubmit the
> +		 * request. See gen8_emit_breadcrumb() for
> +		 * where we prepare the padding after the
> +		 * end of the request.
> +		 */
> +		last->tail = last->wa_tail;
>  	}
>  
> -	do {
> -		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +	while (rb) {
> +		struct i915_priolist *p = to_priolist(rb);
>  		struct i915_request *rq, *rn;
>  
>  		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
> @@ -628,8 +639,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  		INIT_LIST_HEAD(&p->requests);
>  		if (p->priority != I915_PRIORITY_NORMAL)
>  			kmem_cache_free(engine->i915->priorities, p);
> -	} while (rb);
> +	}
>  done:
> +	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
>  	execlists->first = rb;
>  	if (submit)
>  		port_assign(port, last);
> @@ -690,7 +702,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  	/* Flush the queued requests to the timeline list (for retiring). */
>  	rb = execlists->first;
>  	while (rb) {
> -		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +		struct i915_priolist *p = to_priolist(rb);
>  
>  		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
>  			INIT_LIST_HEAD(&rq->priotree.link);
> @@ -708,7 +720,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  
>  	/* Remaining _unready_ requests will be nop'ed when submitted */
>  
> -
> +	execlists->queue_priority = INT_MIN;
>  	execlists->queue = RB_ROOT;
>  	execlists->first = NULL;
>  	GEM_BUG_ON(port_isset(execlists->port));
> @@ -864,10 +876,11 @@ static void execlists_submission_tasklet(unsigned long data)
>  							EXECLISTS_ACTIVE_USER));
>  
>  			rq = port_unpack(port, &count);
> -			GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
> +			GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
>  				  engine->name,
>  				  port->context_id, count,
> -				  rq ? rq->global_seqno : 0);
> +				  rq ? rq->global_seqno : 0,
> +				  rq ? rq_prio(rq) : 0);
>  
>  			/* Check the context/desc id for this event matches */
>  			GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> @@ -912,15 +925,19 @@ static void execlists_submission_tasklet(unsigned long data)
>  		intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
>  }
>  
> -static void insert_request(struct intel_engine_cs *engine,
> -			   struct i915_priotree *pt,
> -			   int prio)
> +static void queue_request(struct intel_engine_cs *engine,
> +			  struct i915_priotree *pt,
> +			  int prio)
>  {
> -	struct i915_priolist *p = lookup_priolist(engine, pt, prio);
> +	list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
> +}
>  
> -	list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
> -	if (ptr_unmask_bits(p, 1))
> +static void submit_queue(struct intel_engine_cs *engine, int prio)
> +{
> +	if (prio > engine->execlists.queue_priority) {
> +		engine->execlists.queue_priority = prio;
>  		tasklet_hi_schedule(&engine->execlists.tasklet);
> +	}
>  }
>  
>  static void execlists_submit_request(struct i915_request *request)
> @@ -931,7 +948,8 @@ static void execlists_submit_request(struct i915_request *request)
>  	/* Will be called from irq-context when using foreign fences. */
>  	spin_lock_irqsave(&engine->timeline->lock, flags);
>  
> -	insert_request(engine, &request->priotree, request->priotree.priority);
> +	queue_request(engine, &request->priotree, rq_prio(request));
> +	submit_queue(engine, rq_prio(request));
>  
>  	GEM_BUG_ON(!engine->execlists.first);
>  	GEM_BUG_ON(list_empty(&request->priotree.link));
> @@ -987,7 +1005,7 @@ static void execlists_schedule(struct i915_request *request, int prio)
>  	 * static void update_priorities(struct i915_priotree *pt, prio) {
>  	 *	list_for_each_entry(dep, &pt->signalers_list, signal_link)
>  	 *		update_priorities(dep->signal, prio)
> -	 *	insert_request(pt);
> +	 *	queue_request(pt);
>  	 * }
>  	 * but that may have unlimited recursion depth and so runs a very
>  	 * real risk of overunning the kernel stack. Instead, we build
> @@ -1050,8 +1068,9 @@ static void execlists_schedule(struct i915_request *request, int prio)
>  		pt->priority = prio;
>  		if (!list_empty(&pt->link)) {
>  			__list_del_entry(&pt->link);
> -			insert_request(engine, pt, prio);
> +			queue_request(engine, pt, prio);
>  		}
> +		submit_queue(engine, prio);
>  	}
>  
>  	spin_unlock_irq(&engine->timeline->lock);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index a9b83bf7e837..c4e9022b34e3 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -257,6 +257,11 @@ struct intel_engine_execlists {
>  	 */
>  	unsigned int port_mask;
>  
> +	/**
> +	 * @queue_priority: Highest pending priority.
> +	 */
> +	int queue_priority;
> +
>  	/**
>  	 * @queue: queue of requests, in priority lists
>  	 */
> -- 
> 2.16.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2018-02-23 15:21 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-22 14:11 [PATCH] drm/i915/preemption: Allow preemption between submission ports Chris Wilson
2018-02-22 14:22 ` Chris Wilson
2018-02-22 14:36   ` Chris Wilson
2018-02-23 14:06   ` Mika Kuoppala
2018-02-23 14:23     ` Chris Wilson
2018-02-23 15:18   ` Mika Kuoppala [this message]
2018-02-23 16:41     ` Chris Wilson
2018-02-22 14:59 ` ✓ Fi.CI.BAT: success for drm/i915/preemption: Allow preemption between submission ports (rev2) Patchwork
2018-02-22 19:48 ` ✓ Fi.CI.IGT: " Patchwork
2018-02-23 13:35 ` [PATCH] drm/i915/preemption: Allow preemption between submission ports Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=877er3spja.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.