All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Avoid keeping waitboost active for signaling threads
@ 2017-06-22 10:55 Chris Wilson
  2017-06-22 11:18 ` ✓ Fi.CI.BAT: success for " Patchwork
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Chris Wilson @ 2017-06-22 10:55 UTC (permalink / raw)
  To: intel-gfx

Once a client has requested a waitboost, we keep that waitboost active
until all clients are no longer waiting. This is because we don't
distinguish which waiter deserves the boost. However, with the advent of
fence signaling, the signaler threads appear as waiters to the RPS
interrupt handler. So instead of using a single boolean to track when to
keep the waitboost active, use a counter of all outstanding waitboosted
requests.

At this point, I have removed all vestiges of the rate limiting on
clients. Whilst this means that compositors should remain more fluid,
it also means that boosts are more prevalent.

A drawback of this implementation is that it requires constant request
submission to keep the waitboost trimmed (as it is now cancelled when the
request is completed). This will be fine for a busy system, but near
idle the boosts may be kept for longer than desired (effectively tens of
vblanks worstcase) and there is a reliance on rc6 instead.

Reported-by: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  7 ++--
 drivers/gpu/drm/i915/i915_drv.h         |  6 ++--
 drivers/gpu/drm/i915/i915_gem.c         | 25 +-------------
 drivers/gpu/drm/i915/i915_gem_request.c |  7 +++-
 drivers/gpu/drm/i915/i915_gem_request.h |  2 ++
 drivers/gpu/drm/i915/i915_irq.c         | 18 ++--------
 drivers/gpu/drm/i915/intel_drv.h        |  5 ++-
 drivers/gpu/drm/i915/intel_pm.c         | 58 +++++++++++++--------------------
 8 files changed, 43 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f7aa6cbe3a2e..231c3c22e9c8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2327,6 +2327,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	seq_printf(m, "GPU busy? %s [%d requests]\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
 	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
+	seq_printf(m, "Boosts outstanding? %d\n",
+		   atomic_read(&dev_priv->rps.num_waiters));
 	seq_printf(m, "Frequency requested %d\n",
 		   intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
 	seq_printf(m, "  min hard:%d, soft:%d; max soft:%d, hard:%d\n",
@@ -2347,11 +2349,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 
 		rcu_read_lock();
 		task = pid_task(file->pid, PIDTYPE_PID);
-		seq_printf(m, "%s [%d]: %d boosts%s\n",
+		seq_printf(m, "%s [%d]: %d boosts\n",
 			   task ? task->comm : "<unknown>",
 			   task ? task->pid : -1,
-			   file_priv->rps.boosts,
-			   list_empty(&file_priv->rps.link) ? "" : ", active");
+			   atomic_read(&file_priv->rps.boosts));
 		rcu_read_unlock();
 	}
 	seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 30e89456fc61..95e164387363 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -584,8 +584,7 @@ struct drm_i915_file_private {
 	struct idr context_idr;
 
 	struct intel_rps_client {
-		struct list_head link;
-		unsigned boosts;
+		atomic_t boosts;
 	} rps;
 
 	unsigned int bsd_engine;
@@ -1304,8 +1303,7 @@ struct intel_gen6_power_mgmt {
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
 	spinlock_t client_lock;
-	struct list_head clients;
-	bool client_boost;
+	atomic_t num_waiters;
 
 	bool enabled;
 	struct delayed_work autoenable_work;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ae3ce1314bd1..7391e2d36a31 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -388,7 +388,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 	 */
 	if (rps) {
 		if (INTEL_GEN(rq->i915) >= 6)
-			gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
+			gen6_rps_boost(rq, rps);
 		else
 			rps = NULL;
 	}
@@ -399,22 +399,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
 		i915_gem_request_retire_upto(rq);
 
-	if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
-		/* The GPU is now idle and this client has stalled.
-		 * Since no other client has submitted a request in the
-		 * meantime, assume that this client is the only one
-		 * supplying work to the GPU but is unable to keep that
-		 * work supplied because it is waiting. Since the GPU is
-		 * then never kept fully busy, RPS autoclocking will
-		 * keep the clocks relatively low, causing further delays.
-		 * Compensate by giving the synchronous client credit for
-		 * a waitboost next time.
-		 */
-		spin_lock(&rq->i915->rps.client_lock);
-		list_del_init(&rps->link);
-		spin_unlock(&rq->i915->rps.client_lock);
-	}
-
 	return timeout;
 }
 
@@ -5065,12 +5049,6 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 	list_for_each_entry(request, &file_priv->mm.request_list, client_link)
 		request->file_priv = NULL;
 	spin_unlock(&file_priv->mm.lock);
-
-	if (!list_empty(&file_priv->rps.link)) {
-		spin_lock(&to_i915(dev)->rps.client_lock);
-		list_del(&file_priv->rps.link);
-		spin_unlock(&to_i915(dev)->rps.client_lock);
-	}
 }
 
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
@@ -5087,7 +5065,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 	file->driver_priv = file_priv;
 	file_priv->dev_priv = i915;
 	file_priv->file = file;
-	INIT_LIST_HEAD(&file_priv->rps.link);
 
 	spin_lock_init(&file_priv->mm.lock);
 	INIT_LIST_HEAD(&file_priv->mm.request_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8c59c79cbd8b..483af8921060 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -384,7 +384,11 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 		engine->context_unpin(engine, engine->last_retired_context);
 	engine->last_retired_context = request->ctx;
 
-	dma_fence_signal(&request->fence);
+	spin_lock_irq(&request->lock);
+	if (request->waitboost)
+		atomic_dec(&request->i915->rps.num_waiters);
+	dma_fence_signal_locked(&request->fence);
+	spin_unlock_irq(&request->lock);
 
 	i915_priotree_fini(request->i915, &request->priotree);
 	i915_gem_request_put(request);
@@ -639,6 +643,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->file_priv = NULL;
 	req->batch = NULL;
 	req->capture_list = NULL;
+	req->waitboost = false;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 7b7c84369d78..604e131470a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -184,6 +184,8 @@ struct drm_i915_gem_request {
 	/** Time at which this request was emitted, in jiffies. */
 	unsigned long emitted_jiffies;
 
+	bool waitboost;
+
 	/** engine->request_list entry for this request */
 	struct list_head link;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b1c7d1a04612..61047ee2eede 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1091,18 +1091,6 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
 	return events;
 }
 
-static bool any_waiters(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, dev_priv, id)
-		if (intel_engine_has_waiter(engine))
-			return true;
-
-	return false;
-}
-
 static void gen6_pm_rps_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
@@ -1114,7 +1102,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	spin_lock_irq(&dev_priv->irq_lock);
 	if (dev_priv->rps.interrupts_enabled) {
 		pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir);
-		client_boost = fetch_and_zero(&dev_priv->rps.client_boost);
+		client_boost = atomic_read(&dev_priv->rps.num_waiters);
 	}
 	spin_unlock_irq(&dev_priv->irq_lock);
 
@@ -1131,7 +1119,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	new_delay = dev_priv->rps.cur_freq;
 	min = dev_priv->rps.min_freq_softlimit;
 	max = dev_priv->rps.max_freq_softlimit;
-	if (client_boost || any_waiters(dev_priv))
+	if (client_boost)
 		max = dev_priv->rps.max_freq;
 	if (client_boost && new_delay < dev_priv->rps.boost_freq) {
 		new_delay = dev_priv->rps.boost_freq;
@@ -1144,7 +1132,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 
 		if (new_delay >= dev_priv->rps.max_freq_softlimit)
 			adj = 0;
-	} else if (client_boost || any_waiters(dev_priv)) {
+	} else if (client_boost) {
 		adj = 0;
 	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
 		if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index d93efb49a2e2..d17a32437f07 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1858,9 +1858,8 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
 void gen6_rps_busy(struct drm_i915_private *dev_priv);
 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
-void gen6_rps_boost(struct drm_i915_private *dev_priv,
-		    struct intel_rps_client *rps,
-		    unsigned long submitted);
+void gen6_rps_boost(struct drm_i915_gem_request *rq,
+		    struct intel_rps_client *rps);
 void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req);
 void g4x_wm_get_hw_state(struct drm_device *dev);
 void vlv_wm_get_hw_state(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b5b7372fcddc..fabea61ca0b6 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6125,47 +6125,36 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 	}
 	mutex_unlock(&dev_priv->rps.hw_lock);
-
-	spin_lock(&dev_priv->rps.client_lock);
-	while (!list_empty(&dev_priv->rps.clients))
-		list_del_init(dev_priv->rps.clients.next);
-	spin_unlock(&dev_priv->rps.client_lock);
 }
 
-void gen6_rps_boost(struct drm_i915_private *dev_priv,
-		    struct intel_rps_client *rps,
-		    unsigned long submitted)
+void gen6_rps_boost(struct drm_i915_gem_request *rq,
+		    struct intel_rps_client *rps)
 {
+	struct drm_i915_private *i915 = rq->i915;
+	bool boost;
+
 	/* This is intentionally racy! We peek at the state here, then
 	 * validate inside the RPS worker.
 	 */
-	if (!(dev_priv->gt.awake &&
-	      dev_priv->rps.enabled &&
-	      dev_priv->rps.cur_freq < dev_priv->rps.boost_freq))
+	if (!i915->rps.enabled)
 		return;
 
-	/* Force a RPS boost (and don't count it against the client) if
-	 * the GPU is severely congested.
-	 */
-	if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
-		rps = NULL;
-
-	spin_lock(&dev_priv->rps.client_lock);
-	if (rps == NULL || list_empty(&rps->link)) {
-		spin_lock_irq(&dev_priv->irq_lock);
-		if (dev_priv->rps.interrupts_enabled) {
-			dev_priv->rps.client_boost = true;
-			schedule_work(&dev_priv->rps.work);
-		}
-		spin_unlock_irq(&dev_priv->irq_lock);
-
-		if (rps != NULL) {
-			list_add(&rps->link, &dev_priv->rps.clients);
-			rps->boosts++;
-		} else
-			dev_priv->rps.boosts++;
+	boost = false;
+	spin_lock_irq(&rq->lock);
+	if (!rq->waitboost && !i915_gem_request_completed(rq)) {
+		atomic_inc(&i915->rps.num_waiters);
+		rq->waitboost = true;
+		boost = true;
 	}
-	spin_unlock(&dev_priv->rps.client_lock);
+	spin_unlock_irq(&rq->lock);
+	if (!boost)
+		return;
+
+	if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq)
+		schedule_work(&i915->rps.work);
+
+	if (rps != NULL)
+		atomic_inc(&rps->boosts);
 }
 
 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
@@ -9112,7 +9101,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
 	struct drm_i915_gem_request *req = boost->req;
 
 	if (!i915_gem_request_completed(req))
-		gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
+		gen6_rps_boost(req, NULL);
 
 	i915_gem_request_put(req);
 	kfree(boost);
@@ -9141,11 +9130,10 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
 void intel_pm_setup(struct drm_i915_private *dev_priv)
 {
 	mutex_init(&dev_priv->rps.hw_lock);
-	spin_lock_init(&dev_priv->rps.client_lock);
 
 	INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work,
 			  __intel_autoenable_gt_powersave);
-	INIT_LIST_HEAD(&dev_priv->rps.clients);
+	atomic_set(&dev_priv->rps.num_waiters, 0);
 
 	dev_priv->pm.suspended = false;
 	atomic_set(&dev_priv->pm.wakeref_count, 0);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Avoid keeping waitboost active for signaling threads
  2017-06-22 10:55 [PATCH] drm/i915: Avoid keeping waitboost active for signaling threads Chris Wilson
@ 2017-06-22 11:18 ` Patchwork
  2017-06-23 10:35 ` [PATCH] " Michał Winiarski
  2017-06-28 13:04 ` ✗ Fi.CI.BAT: failure for drm/i915: Avoid keeping waitboost active for signaling threads (rev2) Patchwork
  2 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2017-06-22 11:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Avoid keeping waitboost active for signaling threads
URL   : https://patchwork.freedesktop.org/series/26211/
State : success

== Summary ==

Series 26211v1 drm/i915: Avoid keeping waitboost active for signaling threads
https://patchwork.freedesktop.org/api/1.0/series/26211/revisions/1/mbox/

Test gem_exec_suspend:
        Subgroup basic-s4-devices:
                dmesg-warn -> PASS       (fi-kbl-7560u) fdo#100125
Test kms_cursor_legacy:
        Subgroup basic-busy-flip-before-cursor-atomic:
                fail       -> PASS       (fi-snb-2600) fdo#100215
Test prime_busy:
        Subgroup basic-wait-after-default:
                dmesg-warn -> PASS       (fi-skl-6700hq) fdo#101515 +3

fdo#100125 https://bugs.freedesktop.org/show_bug.cgi?id=100125
fdo#100215 https://bugs.freedesktop.org/show_bug.cgi?id=100215
fdo#101515 https://bugs.freedesktop.org/show_bug.cgi?id=101515

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time:439s
fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time:427s
fi-bsw-n3050     total:278  pass:241  dwarn:1   dfail:0   fail:0   skip:36  time:538s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:506s
fi-byt-j1900     total:278  pass:252  dwarn:2   dfail:0   fail:0   skip:24  time:488s
fi-byt-n2820     total:278  pass:248  dwarn:2   dfail:0   fail:0   skip:28  time:479s
fi-glk-2a        total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:591s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:438s
fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:414s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time:418s
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:496s
fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:477s
fi-kbl-7500u     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:474s
fi-kbl-7560u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:566s
fi-kbl-r         total:278  pass:259  dwarn:1   dfail:0   fail:0   skip:18  time:579s
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:456s
fi-skl-6700hq    total:278  pass:220  dwarn:3   dfail:0   fail:30  skip:24  time:342s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time:471s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:472s
fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time:440s
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:539s
fi-snb-2600      total:278  pass:249  dwarn:0   dfail:0   fail:0   skip:29  time:400s

7f2c92652872fbbe37c8bca92eccb7218100c21d drm-tip: 2017y-06m-22d-08h-47m-32s UTC integration manifest
eb1e70f drm/i915: Avoid keeping waitboost active for signaling threads

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_5023/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/i915: Avoid keeping waitboost active for signaling threads
  2017-06-22 10:55 [PATCH] drm/i915: Avoid keeping waitboost active for signaling threads Chris Wilson
  2017-06-22 11:18 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2017-06-23 10:35 ` Michał Winiarski
  2017-06-23 10:48   ` Chris Wilson
  2017-06-28 12:35   ` [PATCH v2] " Chris Wilson
  2017-06-28 13:04 ` ✗ Fi.CI.BAT: failure for drm/i915: Avoid keeping waitboost active for signaling threads (rev2) Patchwork
  2 siblings, 2 replies; 6+ messages in thread
From: Michał Winiarski @ 2017-06-23 10:35 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Thu, Jun 22, 2017 at 11:55:51AM +0100, Chris Wilson wrote:
> Once a client has requested a waitboost, we keep that waitboost active
> until all clients are no longer waiting. This is because we don't
> distinguish which waiter deserves the boost. However, with the advent of
> fence signaling, the signaler threads appear as waiters to the RPS
> interrupt handler. So instead of using a single boolean to track when to
> keep the waitboost active, use a counter of all outstanding waitboosted
> requests.
> 
> At this point, I have removed all vestiges of the rate limiting on
> clients. Whilst this means that compositors should remain more fluid,
> it also means that boosts are more prevalent.
> 
> A drawback of this implementation is that it requires constant request
> submission to keep the waitboost trimmed (as it is now cancelled when the
> request is completed). This will be fine for a busy system, but near
> idle the boosts may be kept for longer than desired (effectively tens of
> vblanks worstcase) and there is a reliance on rc6 instead.

In other words, now we're disabling boost when all requests that required boost
are retired.
We may need to tweak igt pm_rps boost scenarios to account for that extra time.

> 
> Reported-by: Michał Winiarski <michal.winiarski@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>

[SNIP]

> @@ -2347,11 +2349,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>  
>  		rcu_read_lock();
>  		task = pid_task(file->pid, PIDTYPE_PID);
> -		seq_printf(m, "%s [%d]: %d boosts%s\n",
> +		seq_printf(m, "%s [%d]: %d boosts\n",
>  			   task ? task->comm : "<unknown>",
>  			   task ? task->pid : -1,
> -			   file_priv->rps.boosts,
> -			   list_empty(&file_priv->rps.link) ? "" : ", active");
> +			   atomic_read(&file_priv->rps.boosts));
>  		rcu_read_unlock();
>  	}
>  	seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);

dev_priv->rps.boosts seems to be unused at this point, could you remove it while
you're here?

With that:
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>

-Michał

> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 30e89456fc61..95e164387363 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -584,8 +584,7 @@ struct drm_i915_file_private {
>  	struct idr context_idr;
>  
>  	struct intel_rps_client {
> -		struct list_head link;
> -		unsigned boosts;
> +		atomic_t boosts;
>  	} rps;
>  
>  	unsigned int bsd_engine;
> @@ -1304,8 +1303,7 @@ struct intel_gen6_power_mgmt {
>  	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>  
>  	spinlock_t client_lock;
> -	struct list_head clients;
> -	bool client_boost;
> +	atomic_t num_waiters;
>  
>  	bool enabled;
>  	struct delayed_work autoenable_work;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ae3ce1314bd1..7391e2d36a31 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -388,7 +388,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
>  	 */
>  	if (rps) {
>  		if (INTEL_GEN(rq->i915) >= 6)
> -			gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
> +			gen6_rps_boost(rq, rps);
>  		else
>  			rps = NULL;
>  	}
> @@ -399,22 +399,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
>  	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
>  		i915_gem_request_retire_upto(rq);
>  
> -	if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
> -		/* The GPU is now idle and this client has stalled.
> -		 * Since no other client has submitted a request in the
> -		 * meantime, assume that this client is the only one
> -		 * supplying work to the GPU but is unable to keep that
> -		 * work supplied because it is waiting. Since the GPU is
> -		 * then never kept fully busy, RPS autoclocking will
> -		 * keep the clocks relatively low, causing further delays.
> -		 * Compensate by giving the synchronous client credit for
> -		 * a waitboost next time.
> -		 */
> -		spin_lock(&rq->i915->rps.client_lock);
> -		list_del_init(&rps->link);
> -		spin_unlock(&rq->i915->rps.client_lock);
> -	}
> -
>  	return timeout;
>  }
>  
> @@ -5065,12 +5049,6 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
>  	list_for_each_entry(request, &file_priv->mm.request_list, client_link)
>  		request->file_priv = NULL;
>  	spin_unlock(&file_priv->mm.lock);
> -
> -	if (!list_empty(&file_priv->rps.link)) {
> -		spin_lock(&to_i915(dev)->rps.client_lock);
> -		list_del(&file_priv->rps.link);
> -		spin_unlock(&to_i915(dev)->rps.client_lock);
> -	}
>  }
>  
>  int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
> @@ -5087,7 +5065,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
>  	file->driver_priv = file_priv;
>  	file_priv->dev_priv = i915;
>  	file_priv->file = file;
> -	INIT_LIST_HEAD(&file_priv->rps.link);
>  
>  	spin_lock_init(&file_priv->mm.lock);
>  	INIT_LIST_HEAD(&file_priv->mm.request_list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 8c59c79cbd8b..483af8921060 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -384,7 +384,11 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>  		engine->context_unpin(engine, engine->last_retired_context);
>  	engine->last_retired_context = request->ctx;
>  
> -	dma_fence_signal(&request->fence);
> +	spin_lock_irq(&request->lock);
> +	if (request->waitboost)
> +		atomic_dec(&request->i915->rps.num_waiters);
> +	dma_fence_signal_locked(&request->fence);
> +	spin_unlock_irq(&request->lock);
>  
>  	i915_priotree_fini(request->i915, &request->priotree);
>  	i915_gem_request_put(request);
> @@ -639,6 +643,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
>  	req->file_priv = NULL;
>  	req->batch = NULL;
>  	req->capture_list = NULL;
> +	req->waitboost = false;
>  
>  	/*
>  	 * Reserve space in the ring buffer for all the commands required to
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 7b7c84369d78..604e131470a1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -184,6 +184,8 @@ struct drm_i915_gem_request {
>  	/** Time at which this request was emitted, in jiffies. */
>  	unsigned long emitted_jiffies;
>  
> +	bool waitboost;
> +
>  	/** engine->request_list entry for this request */
>  	struct list_head link;
>  
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index b1c7d1a04612..61047ee2eede 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1091,18 +1091,6 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
>  	return events;
>  }
>  
> -static bool any_waiters(struct drm_i915_private *dev_priv)
> -{
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(engine, dev_priv, id)
> -		if (intel_engine_has_waiter(engine))
> -			return true;
> -
> -	return false;
> -}
> -
>  static void gen6_pm_rps_work(struct work_struct *work)
>  {
>  	struct drm_i915_private *dev_priv =
> @@ -1114,7 +1102,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
>  	spin_lock_irq(&dev_priv->irq_lock);
>  	if (dev_priv->rps.interrupts_enabled) {
>  		pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir);
> -		client_boost = fetch_and_zero(&dev_priv->rps.client_boost);
> +		client_boost = atomic_read(&dev_priv->rps.num_waiters);
>  	}
>  	spin_unlock_irq(&dev_priv->irq_lock);
>  
> @@ -1131,7 +1119,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
>  	new_delay = dev_priv->rps.cur_freq;
>  	min = dev_priv->rps.min_freq_softlimit;
>  	max = dev_priv->rps.max_freq_softlimit;
> -	if (client_boost || any_waiters(dev_priv))
> +	if (client_boost)
>  		max = dev_priv->rps.max_freq;
>  	if (client_boost && new_delay < dev_priv->rps.boost_freq) {
>  		new_delay = dev_priv->rps.boost_freq;
> @@ -1144,7 +1132,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
>  
>  		if (new_delay >= dev_priv->rps.max_freq_softlimit)
>  			adj = 0;
> -	} else if (client_boost || any_waiters(dev_priv)) {
> +	} else if (client_boost) {
>  		adj = 0;
>  	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
>  		if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index d93efb49a2e2..d17a32437f07 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1858,9 +1858,8 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
>  void gen6_rps_busy(struct drm_i915_private *dev_priv);
>  void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
>  void gen6_rps_idle(struct drm_i915_private *dev_priv);
> -void gen6_rps_boost(struct drm_i915_private *dev_priv,
> -		    struct intel_rps_client *rps,
> -		    unsigned long submitted);
> +void gen6_rps_boost(struct drm_i915_gem_request *rq,
> +		    struct intel_rps_client *rps);
>  void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req);
>  void g4x_wm_get_hw_state(struct drm_device *dev);
>  void vlv_wm_get_hw_state(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b5b7372fcddc..fabea61ca0b6 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6125,47 +6125,36 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
>  			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
>  	}
>  	mutex_unlock(&dev_priv->rps.hw_lock);
> -
> -	spin_lock(&dev_priv->rps.client_lock);
> -	while (!list_empty(&dev_priv->rps.clients))
> -		list_del_init(dev_priv->rps.clients.next);
> -	spin_unlock(&dev_priv->rps.client_lock);
>  }
>  
> -void gen6_rps_boost(struct drm_i915_private *dev_priv,
> -		    struct intel_rps_client *rps,
> -		    unsigned long submitted)
> +void gen6_rps_boost(struct drm_i915_gem_request *rq,
> +		    struct intel_rps_client *rps)
>  {
> +	struct drm_i915_private *i915 = rq->i915;
> +	bool boost;
> +
>  	/* This is intentionally racy! We peek at the state here, then
>  	 * validate inside the RPS worker.
>  	 */
> -	if (!(dev_priv->gt.awake &&
> -	      dev_priv->rps.enabled &&
> -	      dev_priv->rps.cur_freq < dev_priv->rps.boost_freq))
> +	if (!i915->rps.enabled)
>  		return;
>  
> -	/* Force a RPS boost (and don't count it against the client) if
> -	 * the GPU is severely congested.
> -	 */
> -	if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
> -		rps = NULL;
> -
> -	spin_lock(&dev_priv->rps.client_lock);
> -	if (rps == NULL || list_empty(&rps->link)) {
> -		spin_lock_irq(&dev_priv->irq_lock);
> -		if (dev_priv->rps.interrupts_enabled) {
> -			dev_priv->rps.client_boost = true;
> -			schedule_work(&dev_priv->rps.work);
> -		}
> -		spin_unlock_irq(&dev_priv->irq_lock);
> -
> -		if (rps != NULL) {
> -			list_add(&rps->link, &dev_priv->rps.clients);
> -			rps->boosts++;
> -		} else
> -			dev_priv->rps.boosts++;
> +	boost = false;
> +	spin_lock_irq(&rq->lock);
> +	if (!rq->waitboost && !i915_gem_request_completed(rq)) {
> +		atomic_inc(&i915->rps.num_waiters);
> +		rq->waitboost = true;
> +		boost = true;
>  	}
> -	spin_unlock(&dev_priv->rps.client_lock);
> +	spin_unlock_irq(&rq->lock);
> +	if (!boost)
> +		return;
> +
> +	if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq)
> +		schedule_work(&i915->rps.work);
> +
> +	if (rps != NULL)
> +		atomic_inc(&rps->boosts);
>  }
>  
>  int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> @@ -9112,7 +9101,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
>  	struct drm_i915_gem_request *req = boost->req;
>  
>  	if (!i915_gem_request_completed(req))
> -		gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
> +		gen6_rps_boost(req, NULL);
>  
>  	i915_gem_request_put(req);
>  	kfree(boost);
> @@ -9141,11 +9130,10 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
>  void intel_pm_setup(struct drm_i915_private *dev_priv)
>  {
>  	mutex_init(&dev_priv->rps.hw_lock);
> -	spin_lock_init(&dev_priv->rps.client_lock);
>  
>  	INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work,
>  			  __intel_autoenable_gt_powersave);
> -	INIT_LIST_HEAD(&dev_priv->rps.clients);
> +	atomic_set(&dev_priv->rps.num_waiters, 0);
>  
>  	dev_priv->pm.suspended = false;
>  	atomic_set(&dev_priv->pm.wakeref_count, 0);
> -- 
> 2.11.0
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/i915: Avoid keeping waitboost active for signaling threads
  2017-06-23 10:35 ` [PATCH] " Michał Winiarski
@ 2017-06-23 10:48   ` Chris Wilson
  2017-06-28 12:35   ` [PATCH v2] " Chris Wilson
  1 sibling, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2017-06-23 10:48 UTC (permalink / raw)
  To: Michał Winiarski; +Cc: intel-gfx

Quoting Michał Winiarski (2017-06-23 11:35:06)
> On Thu, Jun 22, 2017 at 11:55:51AM +0100, Chris Wilson wrote:
> > Once a client has requested a waitboost, we keep that waitboost active
> > until all clients are no longer waiting. This is because we don't
> > distinguish which waiter deserves the boost. However, with the advent of
> > fence signaling, the signaler threads appear as waiters to the RPS
> > interrupt handler. So instead of using a single boolean to track when to
> > keep the waitboost active, use a counter of all outstanding waitboosted
> > requests.
> > 
> > At this point, I have removed all vestiges of the rate limiting on
> > clients. Whilst this means that compositors should remain more fluid,
> > it also means that boosts are more prevalent.
> > 
> > A drawback of this implementation is that it requires constant request
> > submission to keep the waitboost trimmed (as it is now cancelled when the
> > request is completed). This will be fine for a busy system, but near
> > idle the boosts may be kept for longer than desired (effectively tens of
> > vblanks worstcase) and there is a reliance on rc6 instead.
> 
> In other words, now we're disabling boost when all requests that required boost
> are retired.
> We may need to tweak igt pm_rps boost scenarios to account for that extra time.

It should be less time boosted than before, if the requests are retired
promptly. But that's a big if.

> > Reported-by: Michał Winiarski <michal.winiarski@intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Michał Winiarski <michal.winiarski@intel.com>
> 
> [SNIP]
> 
> > @@ -2347,11 +2349,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
> >  
> >               rcu_read_lock();
> >               task = pid_task(file->pid, PIDTYPE_PID);
> > -             seq_printf(m, "%s [%d]: %d boosts%s\n",
> > +             seq_printf(m, "%s [%d]: %d boosts\n",
> >                          task ? task->comm : "<unknown>",
> >                          task ? task->pid : -1,
> > -                        file_priv->rps.boosts,
> > -                        list_empty(&file_priv->rps.link) ? "" : ", active");
> > +                        atomic_read(&file_priv->rps.boosts));
> >               rcu_read_unlock();
> >       }
> >       seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
> 
> dev_priv->rps.boosts seems to be unused at this point, could you remove it while
> you're here?

Oh, I should just hook up the other side of

> > +     if (rps != NULL)
> > +             atomic_inc(&rps->boosts);

else
	atomic_inc(&dev_priv->rps.boosts);

The only user is dead-code atm, but it may come in useful again for
atomic modesetting.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2] drm/i915: Avoid keeping waitboost active for signaling threads
  2017-06-23 10:35 ` [PATCH] " Michał Winiarski
  2017-06-23 10:48   ` Chris Wilson
@ 2017-06-28 12:35   ` Chris Wilson
  1 sibling, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2017-06-28 12:35 UTC (permalink / raw)
  To: intel-gfx

Once a client has requested a waitboost, we keep that waitboost active
until all clients are no longer waiting. This is because we don't
distinguish which waiter deserves the boost. However, with the advent of
fence signaling, the signaler threads appear as waiters to the RPS
interrupt handler. So instead of using a single boolean to track when to
keep the waitboost active, use a counter of all outstanding waitboosted
requests.

At this point, I have removed all vestiges of the rate limiting on
clients. Whilst this means that compositors should remain more fluid,
it also means that boosts are more prevalent. See commit b29c19b64528
("drm/i915: Boost RPS frequency for CPU stalls") for a longer discussion
on the pros and cons of both approaches.

A drawback of this implementation is that it requires constant request
submission to keep the waitboost trimmed (as it is now cancelled when the
request is completed). This will be fine for a busy system, but near
idle the boosts may be kept for longer than desired (effectively tens of
vblanks worstcase) and there is a reliance on rc6 instead.

v2: Remove defunct rps.client_lock

Reported-by: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 12 +++----
 drivers/gpu/drm/i915/i915_drv.h         | 10 ++----
 drivers/gpu/drm/i915/i915_gem.c         | 25 +--------------
 drivers/gpu/drm/i915/i915_gem_request.c |  7 +++-
 drivers/gpu/drm/i915/i915_gem_request.h |  2 ++
 drivers/gpu/drm/i915/i915_irq.c         | 18 ++---------
 drivers/gpu/drm/i915/intel_drv.h        |  5 ++-
 drivers/gpu/drm/i915/intel_pm.c         | 57 +++++++++++++--------------------
 8 files changed, 45 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index f7aa6cbe3a2e..580bd4f4a49e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2327,6 +2327,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	seq_printf(m, "GPU busy? %s [%d requests]\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
 	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
+	seq_printf(m, "Boosts outstanding? %d\n",
+		   atomic_read(&dev_priv->rps.num_waiters));
 	seq_printf(m, "Frequency requested %d\n",
 		   intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
 	seq_printf(m, "  min hard:%d, soft:%d; max soft:%d, hard:%d\n",
@@ -2340,22 +2342,20 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 		   intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq));
 
 	mutex_lock(&dev->filelist_mutex);
-	spin_lock(&dev_priv->rps.client_lock);
 	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
 		struct drm_i915_file_private *file_priv = file->driver_priv;
 		struct task_struct *task;
 
 		rcu_read_lock();
 		task = pid_task(file->pid, PIDTYPE_PID);
-		seq_printf(m, "%s [%d]: %d boosts%s\n",
+		seq_printf(m, "%s [%d]: %d boosts\n",
 			   task ? task->comm : "<unknown>",
 			   task ? task->pid : -1,
-			   file_priv->rps.boosts,
-			   list_empty(&file_priv->rps.link) ? "" : ", active");
+			   atomic_read(&file_priv->rps.boosts));
 		rcu_read_unlock();
 	}
-	seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
-	spin_unlock(&dev_priv->rps.client_lock);
+	seq_printf(m, "Kernel (anonymous) boosts: %d\n",
+		   atomic_read(&dev_priv->rps.boosts));
 	mutex_unlock(&dev->filelist_mutex);
 
 	if (INTEL_GEN(dev_priv) >= 6 &&
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 427d10c7eca5..effbe4f72a64 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -584,8 +584,7 @@ struct drm_i915_file_private {
 	struct idr context_idr;
 
 	struct intel_rps_client {
-		struct list_head link;
-		unsigned boosts;
+		atomic_t boosts;
 	} rps;
 
 	unsigned int bsd_engine;
@@ -1303,13 +1302,10 @@ struct intel_gen6_power_mgmt {
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
-	spinlock_t client_lock;
-	struct list_head clients;
-	bool client_boost;
-
 	bool enabled;
 	struct delayed_work autoenable_work;
-	unsigned boosts;
+	atomic_t num_waiters;
+	atomic_t boosts;
 
 	/* manual wa residency calculations */
 	struct intel_rps_ei ei;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f38c84e485ab..1b2dfa8bdeef 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -388,7 +388,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 	 */
 	if (rps) {
 		if (INTEL_GEN(rq->i915) >= 6)
-			gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
+			gen6_rps_boost(rq, rps);
 		else
 			rps = NULL;
 	}
@@ -399,22 +399,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
 	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
 		i915_gem_request_retire_upto(rq);
 
-	if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) {
-		/* The GPU is now idle and this client has stalled.
-		 * Since no other client has submitted a request in the
-		 * meantime, assume that this client is the only one
-		 * supplying work to the GPU but is unable to keep that
-		 * work supplied because it is waiting. Since the GPU is
-		 * then never kept fully busy, RPS autoclocking will
-		 * keep the clocks relatively low, causing further delays.
-		 * Compensate by giving the synchronous client credit for
-		 * a waitboost next time.
-		 */
-		spin_lock(&rq->i915->rps.client_lock);
-		list_del_init(&rps->link);
-		spin_unlock(&rq->i915->rps.client_lock);
-	}
-
 	return timeout;
 }
 
@@ -5053,12 +5037,6 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 	list_for_each_entry(request, &file_priv->mm.request_list, client_link)
 		request->file_priv = NULL;
 	spin_unlock(&file_priv->mm.lock);
-
-	if (!list_empty(&file_priv->rps.link)) {
-		spin_lock(&to_i915(dev)->rps.client_lock);
-		list_del(&file_priv->rps.link);
-		spin_unlock(&to_i915(dev)->rps.client_lock);
-	}
 }
 
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
@@ -5075,7 +5053,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 	file->driver_priv = file_priv;
 	file_priv->dev_priv = i915;
 	file_priv->file = file;
-	INIT_LIST_HEAD(&file_priv->rps.link);
 
 	spin_lock_init(&file_priv->mm.lock);
 	INIT_LIST_HEAD(&file_priv->mm.request_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8c59c79cbd8b..483af8921060 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -384,7 +384,11 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 		engine->context_unpin(engine, engine->last_retired_context);
 	engine->last_retired_context = request->ctx;
 
-	dma_fence_signal(&request->fence);
+	spin_lock_irq(&request->lock);
+	if (request->waitboost)
+		atomic_dec(&request->i915->rps.num_waiters);
+	dma_fence_signal_locked(&request->fence);
+	spin_unlock_irq(&request->lock);
 
 	i915_priotree_fini(request->i915, &request->priotree);
 	i915_gem_request_put(request);
@@ -639,6 +643,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->file_priv = NULL;
 	req->batch = NULL;
 	req->capture_list = NULL;
+	req->waitboost = false;
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 7b7c84369d78..604e131470a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -184,6 +184,8 @@ struct drm_i915_gem_request {
 	/** Time at which this request was emitted, in jiffies. */
 	unsigned long emitted_jiffies;
 
+	bool waitboost;
+
 	/** engine->request_list entry for this request */
 	struct list_head link;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 548b43400643..eb4f1dca2077 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1091,18 +1091,6 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
 	return events;
 }
 
-static bool any_waiters(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, dev_priv, id)
-		if (intel_engine_has_waiter(engine))
-			return true;
-
-	return false;
-}
-
 static void gen6_pm_rps_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
@@ -1114,7 +1102,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	spin_lock_irq(&dev_priv->irq_lock);
 	if (dev_priv->rps.interrupts_enabled) {
 		pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir);
-		client_boost = fetch_and_zero(&dev_priv->rps.client_boost);
+		client_boost = atomic_read(&dev_priv->rps.num_waiters);
 	}
 	spin_unlock_irq(&dev_priv->irq_lock);
 
@@ -1131,7 +1119,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	new_delay = dev_priv->rps.cur_freq;
 	min = dev_priv->rps.min_freq_softlimit;
 	max = dev_priv->rps.max_freq_softlimit;
-	if (client_boost || any_waiters(dev_priv))
+	if (client_boost)
 		max = dev_priv->rps.max_freq;
 	if (client_boost && new_delay < dev_priv->rps.boost_freq) {
 		new_delay = dev_priv->rps.boost_freq;
@@ -1144,7 +1132,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 
 		if (new_delay >= dev_priv->rps.max_freq_softlimit)
 			adj = 0;
-	} else if (client_boost || any_waiters(dev_priv)) {
+	} else if (client_boost) {
 		adj = 0;
 	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
 		if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index d93efb49a2e2..d17a32437f07 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1858,9 +1858,8 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
 void gen6_rps_busy(struct drm_i915_private *dev_priv);
 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
-void gen6_rps_boost(struct drm_i915_private *dev_priv,
-		    struct intel_rps_client *rps,
-		    unsigned long submitted);
+void gen6_rps_boost(struct drm_i915_gem_request *rq,
+		    struct intel_rps_client *rps);
 void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req);
 void g4x_wm_get_hw_state(struct drm_device *dev);
 void vlv_wm_get_hw_state(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 48ea0fca1f72..c3fcadfa0ae7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6126,47 +6126,35 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 	}
 	mutex_unlock(&dev_priv->rps.hw_lock);
-
-	spin_lock(&dev_priv->rps.client_lock);
-	while (!list_empty(&dev_priv->rps.clients))
-		list_del_init(dev_priv->rps.clients.next);
-	spin_unlock(&dev_priv->rps.client_lock);
 }
 
-void gen6_rps_boost(struct drm_i915_private *dev_priv,
-		    struct intel_rps_client *rps,
-		    unsigned long submitted)
+void gen6_rps_boost(struct drm_i915_gem_request *rq,
+		    struct intel_rps_client *rps)
 {
+	struct drm_i915_private *i915 = rq->i915;
+	bool boost;
+
 	/* This is intentionally racy! We peek at the state here, then
 	 * validate inside the RPS worker.
 	 */
-	if (!(dev_priv->gt.awake &&
-	      dev_priv->rps.enabled &&
-	      dev_priv->rps.cur_freq < dev_priv->rps.boost_freq))
+	if (!i915->rps.enabled)
 		return;
 
-	/* Force a RPS boost (and don't count it against the client) if
-	 * the GPU is severely congested.
-	 */
-	if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
-		rps = NULL;
-
-	spin_lock(&dev_priv->rps.client_lock);
-	if (rps == NULL || list_empty(&rps->link)) {
-		spin_lock_irq(&dev_priv->irq_lock);
-		if (dev_priv->rps.interrupts_enabled) {
-			dev_priv->rps.client_boost = true;
-			schedule_work(&dev_priv->rps.work);
-		}
-		spin_unlock_irq(&dev_priv->irq_lock);
-
-		if (rps != NULL) {
-			list_add(&rps->link, &dev_priv->rps.clients);
-			rps->boosts++;
-		} else
-			dev_priv->rps.boosts++;
+	boost = false;
+	spin_lock_irq(&rq->lock);
+	if (!rq->waitboost && !i915_gem_request_completed(rq)) {
+		atomic_inc(&i915->rps.num_waiters);
+		rq->waitboost = true;
+		boost = true;
 	}
-	spin_unlock(&dev_priv->rps.client_lock);
+	spin_unlock_irq(&rq->lock);
+	if (!boost)
+		return;
+
+	if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq)
+		schedule_work(&i915->rps.work);
+
+	atomic_inc(rps ? &rps->boosts : &i915->rps.boosts);
 }
 
 int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
@@ -9113,7 +9101,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
 	struct drm_i915_gem_request *req = boost->req;
 
 	if (!i915_gem_request_completed(req))
-		gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
+		gen6_rps_boost(req, NULL);
 
 	i915_gem_request_put(req);
 	kfree(boost);
@@ -9142,11 +9130,10 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
 void intel_pm_setup(struct drm_i915_private *dev_priv)
 {
 	mutex_init(&dev_priv->rps.hw_lock);
-	spin_lock_init(&dev_priv->rps.client_lock);
 
 	INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work,
 			  __intel_autoenable_gt_powersave);
-	INIT_LIST_HEAD(&dev_priv->rps.clients);
+	atomic_set(&dev_priv->rps.num_waiters, 0);
 
 	dev_priv->pm.suspended = false;
 	atomic_set(&dev_priv->pm.wakeref_count, 0);
-- 
2.13.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* ✗ Fi.CI.BAT: failure for drm/i915: Avoid keeping waitboost active for signaling threads (rev2)
  2017-06-22 10:55 [PATCH] drm/i915: Avoid keeping waitboost active for signaling threads Chris Wilson
  2017-06-22 11:18 ` ✓ Fi.CI.BAT: success for " Patchwork
  2017-06-23 10:35 ` [PATCH] " Michał Winiarski
@ 2017-06-28 13:04 ` Patchwork
  2 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2017-06-28 13:04 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Avoid keeping waitboost active for signaling threads (rev2)
URL   : https://patchwork.freedesktop.org/series/26211/
State : failure

== Summary ==

Series 26211v2 drm/i915: Avoid keeping waitboost active for signaling threads
https://patchwork.freedesktop.org/api/1.0/series/26211/revisions/2/mbox/

Test gem_exec_suspend:
        Subgroup basic-s4-devices:
                dmesg-warn -> PASS       (fi-kbl-7560u) fdo#100125 +1
Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                pass       -> FAIL       (fi-skl-6770hq)

fdo#100125 https://bugs.freedesktop.org/show_bug.cgi?id=100125

fi-bdw-5557u     total:279  pass:268  dwarn:0   dfail:0   fail:0   skip:11  time:442s
fi-bdw-gvtdvm    total:279  pass:257  dwarn:8   dfail:0   fail:0   skip:14  time:431s
fi-blb-e6850     total:279  pass:224  dwarn:1   dfail:0   fail:0   skip:54  time:354s
fi-bsw-n3050     total:279  pass:242  dwarn:1   dfail:0   fail:0   skip:36  time:528s
fi-bxt-j4205     total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:510s
fi-byt-j1900     total:279  pass:253  dwarn:2   dfail:0   fail:0   skip:24  time:492s
fi-byt-n2820     total:279  pass:249  dwarn:2   dfail:0   fail:0   skip:28  time:478s
fi-glk-2a        total:279  pass:260  dwarn:0   dfail:0   fail:0   skip:19  time:593s
fi-hsw-4770      total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:437s
fi-hsw-4770r     total:279  pass:263  dwarn:0   dfail:0   fail:0   skip:16  time:412s
fi-ilk-650       total:279  pass:229  dwarn:0   dfail:0   fail:0   skip:50  time:417s
fi-ivb-3520m     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:499s
fi-ivb-3770      total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:479s
fi-kbl-7500u     total:279  pass:261  dwarn:0   dfail:0   fail:0   skip:18  time:464s
fi-kbl-7560u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:573s
fi-kbl-r         total:279  pass:260  dwarn:1   dfail:0   fail:0   skip:18  time:582s
fi-pnv-d510      total:279  pass:222  dwarn:2   dfail:0   fail:0   skip:55  time:560s
fi-skl-6260u     total:279  pass:269  dwarn:0   dfail:0   fail:0   skip:10  time:470s
fi-skl-6700hq    total:279  pass:223  dwarn:1   dfail:0   fail:30  skip:24  time:340s
fi-skl-6700k     total:279  pass:257  dwarn:4   dfail:0   fail:0   skip:18  time:471s
fi-skl-6770hq    total:279  pass:268  dwarn:0   dfail:0   fail:1   skip:10  time:474s
fi-skl-gvtdvm    total:279  pass:266  dwarn:0   dfail:0   fail:0   skip:13  time:436s
fi-snb-2520m     total:279  pass:251  dwarn:0   dfail:0   fail:0   skip:28  time:540s
fi-snb-2600      total:279  pass:250  dwarn:0   dfail:0   fail:0   skip:29  time:413s

90513552ce32ddf2a4efffa21a5df5f21a425a18 drm-tip: 2017y-06m-28d-11h-26m-16s UTC integration manifest
332079a drm/i915: Avoid keeping waitboost active for signaling threads

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_5058/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-06-28 13:04 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-22 10:55 [PATCH] drm/i915: Avoid keeping waitboost active for signaling threads Chris Wilson
2017-06-22 11:18 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-06-23 10:35 ` [PATCH] " Michał Winiarski
2017-06-23 10:48   ` Chris Wilson
2017-06-28 12:35   ` [PATCH v2] " Chris Wilson
2017-06-28 13:04 ` ✗ Fi.CI.BAT: failure for drm/i915: Avoid keeping waitboost active for signaling threads (rev2) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.