All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up
@ 2018-01-19 15:23 Chris Wilson
  2018-01-19 15:23 ` [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Chris Wilson @ 2018-01-19 15:23 UTC (permalink / raw)
  To: intel-gfx

By counting the number of times we have woken up, we have a very simple
means of defining an epoch, which will come in handy if we want to
perform deferred tasks at the end of an epoch (i.e. while we are going
to sleep) without imposing on the next activity cycle.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 7 ++++---
 drivers/gpu/drm/i915/i915_drv.h         | 5 +++++
 drivers/gpu/drm/i915/i915_gem_request.c | 1 +
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index cc659b4b2a45..1aac3ec7d14d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2717,7 +2717,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 	if (!HAS_RUNTIME_PM(dev_priv))
 		seq_puts(m, "Runtime power management not supported\n");
 
-	seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
+	seq_printf(m, "GPU idle: %s (epoch %d)\n",
+		   yesno(!dev_priv->gt.awake), dev_priv->gt.epoch);
 	seq_printf(m, "IRQs disabled: %s\n",
 		   yesno(!intel_irqs_enabled(dev_priv)));
 #ifdef CONFIG_PM
@@ -3150,8 +3151,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 
 	intel_runtime_pm_get(dev_priv);
 
-	seq_printf(m, "GT awake? %s\n",
-		   yesno(dev_priv->gt.awake));
+	seq_printf(m, "GT awake? %s (epoch %d)\n",
+		   yesno(dev_priv->gt.awake), dev_priv->gt.epoch);
 	seq_printf(m, "Global active requests: %d\n",
 		   dev_priv->gt.active_requests);
 	seq_printf(m, "CS timestamp frequency: %u kHz\n",
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 317953484fec..98e8385d1bb0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2302,6 +2302,11 @@ struct drm_i915_private {
 		struct i915_gem_timeline global_timeline;
 		u32 active_requests;
 
+		/**
+		 * The number of times we have woken up.
+		 */
+		u32 epoch;
+
 		/**
 		 * Is the GPU currently considered idle, or busy executing
 		 * userspace requests? Whilst idle, we allow runtime power
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a0f451b4a4e8..f0fab070a3a0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -274,6 +274,7 @@ static void mark_busy(struct drm_i915_private *i915)
 	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
 
 	i915->gt.awake = true;
+	i915->gt.epoch++;
 
 	intel_enable_gt_powersave(i915);
 	i915_update_gfx_val(i915);
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-19 15:23 [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up Chris Wilson
@ 2018-01-19 15:23 ` Chris Wilson
  2018-01-24 10:32   ` Tvrtko Ursulin
  2018-01-19 16:13 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2018-01-19 15:23 UTC (permalink / raw)
  To: intel-gfx

When we finally decide the gpu is idle, that is a good time to shrink
our kmem_caches.

v3: Defer until an rcu grace period after we idle.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 65 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7f0684ccc724..6a8fbcae835b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3341,12 +3341,59 @@ new_requests_since_last_retire(const struct drm_i915_private *i915)
 		work_pending(&i915->gt.idle_work.work));
 }
 
+static void shrink_caches(struct drm_i915_private *i915)
+{
+	/*
+	 * kmem_cache_shrink() discards empty slabs and reorders partially
+	 * filled slabs to prioritise allocating from the mostly full slabs,
+	 * with the aim of reducing fragmentation.
+	 */
+	kmem_cache_shrink(i915->priorities);
+	kmem_cache_shrink(i915->dependencies);
+	kmem_cache_shrink(i915->requests);
+	kmem_cache_shrink(i915->luts);
+	kmem_cache_shrink(i915->vmas);
+	kmem_cache_shrink(i915->objects);
+}
+
+struct sleep_rcu_work {
+	struct drm_i915_private *i915;
+	struct rcu_head rcu;
+	struct work_struct work;
+	u32 epoch;
+};
+
+static void __sleep_work(struct work_struct *work)
+{
+	struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
+	struct drm_i915_private *i915 = s->i915;
+	u32 epoch = s->epoch;
+
+	kfree(s);
+	if (epoch == READ_ONCE(i915->gt.epoch))
+		shrink_caches(i915);
+}
+
+static void __sleep_rcu(struct rcu_head *rcu)
+{
+	struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
+	struct drm_i915_private *i915 = s->i915;
+
+	if (s->epoch == READ_ONCE(i915->gt.epoch)) {
+		INIT_WORK(&s->work, __sleep_work);
+		queue_work(i915->wq, &s->work);
+	} else {
+		kfree(s);
+	}
+}
+
 static void
 i915_gem_idle_work_handler(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
 	bool rearm_hangcheck;
+	u32 epoch = 0;
 	ktime_t end;
 
 	if (!READ_ONCE(dev_priv->gt.awake))
@@ -3406,6 +3453,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	GEM_BUG_ON(!dev_priv->gt.awake);
 	dev_priv->gt.awake = false;
 	rearm_hangcheck = false;
+	epoch = dev_priv->gt.epoch;
 
 	if (INTEL_GEN(dev_priv) >= 6)
 		gen6_rps_idle(dev_priv);
@@ -3421,6 +3469,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
 		GEM_BUG_ON(!dev_priv->gt.awake);
 		i915_queue_hangcheck(dev_priv);
 	}
+
+	/*
+	 * When we are idle, it is an opportune time to reap our caches.
+	 * However, we have many objects that utilise RCU and the ordered
+	 * i915->wq that this work is executing on. To try and flush any
+	 * pending frees now we are idle, we first wait for an RCU grace
+	 * period, and then queue a task (that will run last on the wq) to
+	 * shrink and re-optimize the caches.
+	 */
+	if (epoch == READ_ONCE(dev_priv->gt.epoch)) {
+		struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
+		if (s) {
+			s->i915 = dev_priv;
+			s->epoch = epoch;
+			call_rcu(&s->rcu, __sleep_rcu);
+		}
+	}
 }
 
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
-- 
2.15.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up
  2018-01-19 15:23 [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up Chris Wilson
  2018-01-19 15:23 ` [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
@ 2018-01-19 16:13 ` Patchwork
  2018-01-19 21:56 ` ✗ Fi.CI.IGT: failure " Patchwork
  2018-01-24 10:33 ` [PATCH 1/2] " Tvrtko Ursulin
  3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-01-19 16:13 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up
URL   : https://patchwork.freedesktop.org/series/36802/
State : success

== Summary ==

Series 36802v1 series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up
https://patchwork.freedesktop.org/api/1.0/series/36802/revisions/1/mbox/

Test debugfs_test:
        Subgroup read_all_entries:
                dmesg-warn -> DMESG-FAIL (fi-elk-e7500) fdo#103989
Test kms_chamelium:
        Subgroup dp-crc-fast:
                pass       -> DMESG-FAIL (fi-kbl-7500u) fdo#103841

fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:429s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:432s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:382s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:518s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:293s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:501s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:507s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:491s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:482s
fi-elk-e7500     total:224  pass:168  dwarn:9   dfail:1   fail:0   skip:45 
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:306s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:535s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:400s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:409s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:421s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:469s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:423s
fi-kbl-7500u     total:288  pass:262  dwarn:1   dfail:1   fail:0   skip:24  time:463s
fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:505s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:460s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:513s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:630s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:445s
fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:519s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:542s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:499s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:495s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:441s
fi-snb-2520m     total:245  pass:211  dwarn:0   dfail:0   fail:0   skip:33 
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:403s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:256  dwarn:0   dfail:0   fail:3   skip:26  time:562s
fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:491s
fi-skl-guc       total:288  pass:213  dwarn:47  dfail:0   fail:0   skip:28  time:412s

fd10fae2b0f673861816511f99d4798d6b619b40 drm-tip: 2018y-01m-19d-14h-22m-15s UTC integration manifest
483d552db630 drm/i915: Shrink the GEM kmem_caches upon idling
a40e944809ae drm/i915: Track the number of times we have woken the GPU up

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7725/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* ✗ Fi.CI.IGT: failure for series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up
  2018-01-19 15:23 [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up Chris Wilson
  2018-01-19 15:23 ` [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
  2018-01-19 16:13 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up Patchwork
@ 2018-01-19 21:56 ` Patchwork
  2018-01-24 10:33 ` [PATCH 1/2] " Tvrtko Ursulin
  3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-01-19 21:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up
URL   : https://patchwork.freedesktop.org/series/36802/
State : failure

== Summary ==

Warning: bzip CI_DRM_3658/shard-glkb6/results22.json.bz2 wasn't in correct JSON format
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                pass       -> SKIP       (shard-snb) fdo#103375 +1
Test gem_exec_schedule:
        Subgroup wide-vebox:
                pass       -> FAIL       (shard-apl) fdo#102848
Test perf:
        Subgroup oa-exponents:
                pass       -> FAIL       (shard-apl) fdo#102254
        Subgroup polling:
                fail       -> PASS       (shard-hsw) fdo#102252
Test kms_flip:
        Subgroup 2x-plain-flip-fb-recreate-interruptible:
                pass       -> FAIL       (shard-hsw)
        Subgroup vblank-vs-suspend-interruptible:
                pass       -> INCOMPLETE (shard-hsw) fdo#100368
        Subgroup flip-vs-modeset-vs-hang:
                dmesg-warn -> PASS       (shard-snb) fdo#103821
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-offscren-pri-shrfb-draw-render:
                fail       -> PASS       (shard-snb) fdo#101623 +1

fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#102848 https://bugs.freedesktop.org/show_bug.cgi?id=102848
fdo#102254 https://bugs.freedesktop.org/show_bug.cgi?id=102254
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#103821 https://bugs.freedesktop.org/show_bug.cgi?id=103821
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623

shard-apl        total:2780 pass:1714 dwarn:1   dfail:0   fail:24  skip:1041 time:14671s
shard-hsw        total:2598 pass:1593 dwarn:1   dfail:1   fail:14  skip:987 time:13414s
shard-snb        total:2780 pass:1315 dwarn:1   dfail:1   fail:12  skip:1451 time:7997s
Blacklisted hosts:
shard-kbl        total:2758 pass:1818 dwarn:1   dfail:1   fail:24  skip:913 time:10528s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7725/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-19 15:23 ` [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
@ 2018-01-24 10:32   ` Tvrtko Ursulin
  2018-01-24 10:37     ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Tvrtko Ursulin @ 2018-01-24 10:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/01/2018 15:23, Chris Wilson wrote:
> When we finally decide the gpu is idle, that is a good time to shrink
> our kmem_caches.
> 
> v3: Defer until an rcu grace period after we idle.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 65 +++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 65 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7f0684ccc724..6a8fbcae835b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3341,12 +3341,59 @@ new_requests_since_last_retire(const struct drm_i915_private *i915)
>   		work_pending(&i915->gt.idle_work.work));
>   }
>   
> +static void shrink_caches(struct drm_i915_private *i915)
> +{
> +	/*
> +	 * kmem_cache_shrink() discards empty slabs and reorders partially
> +	 * filled slabs to prioritise allocating from the mostly full slabs,
> +	 * with the aim of reducing fragmentation.
> +	 */
> +	kmem_cache_shrink(i915->priorities);
> +	kmem_cache_shrink(i915->dependencies);
> +	kmem_cache_shrink(i915->requests);
> +	kmem_cache_shrink(i915->luts);
> +	kmem_cache_shrink(i915->vmas);
> +	kmem_cache_shrink(i915->objects);
> +}
> +
> +struct sleep_rcu_work {
> +	struct drm_i915_private *i915;
> +	struct rcu_head rcu;
> +	struct work_struct work;
> +	u32 epoch;
> +};
> +
> +static void __sleep_work(struct work_struct *work)
> +{
> +	struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
> +	struct drm_i915_private *i915 = s->i915;
> +	u32 epoch = s->epoch;
> +
> +	kfree(s);
> +	if (epoch == READ_ONCE(i915->gt.epoch))
> +		shrink_caches(i915);
> +}
> +
> +static void __sleep_rcu(struct rcu_head *rcu)
> +{
> +	struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
> +	struct drm_i915_private *i915 = s->i915;
> +
> +	if (s->epoch == READ_ONCE(i915->gt.epoch)) {
> +		INIT_WORK(&s->work, __sleep_work);
> +		queue_work(i915->wq, &s->work);
> +	} else {
> +		kfree(s);
> +	}
> +}
> +
>   static void
>   i915_gem_idle_work_handler(struct work_struct *work)
>   {
>   	struct drm_i915_private *dev_priv =
>   		container_of(work, typeof(*dev_priv), gt.idle_work.work);
>   	bool rearm_hangcheck;
> +	u32 epoch = 0;
>   	ktime_t end;
>   
>   	if (!READ_ONCE(dev_priv->gt.awake))
> @@ -3406,6 +3453,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
>   	GEM_BUG_ON(!dev_priv->gt.awake);
>   	dev_priv->gt.awake = false;
>   	rearm_hangcheck = false;
> +	epoch = dev_priv->gt.epoch;
>   
>   	if (INTEL_GEN(dev_priv) >= 6)
>   		gen6_rps_idle(dev_priv);
> @@ -3421,6 +3469,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
>   		GEM_BUG_ON(!dev_priv->gt.awake);
>   		i915_queue_hangcheck(dev_priv);
>   	}
> +
> +	/*
> +	 * When we are idle, it is an opportune time to reap our caches.
> +	 * However, we have many objects that utilise RCU and the ordered
> +	 * i915->wq that this work is executing on. To try and flush any
> +	 * pending frees now we are idle, we first wait for an RCU grace
> +	 * period, and then queue a task (that will run last on the wq) to
> +	 * shrink and re-optimize the caches.
> +	 */
> +	if (epoch == READ_ONCE(dev_priv->gt.epoch)) {

Theoretically this can be true on epoch wrap-around, when trylock 
failed. It's one in four billion busy transitions but it could be just 
worth handling it explicitly. Simplest probably to ensure gt.epoch is 
never zero when incrementing?

> +		struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
> +		if (s) {
> +			s->i915 = dev_priv;
> +			s->epoch = epoch;
> +			call_rcu(&s->rcu, __sleep_rcu);
> +		}
> +	}
>   }
>   
>   void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
> 

Otherwise it sounds believable and looks correct.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up
  2018-01-19 15:23 [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up Chris Wilson
                   ` (2 preceding siblings ...)
  2018-01-19 21:56 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-01-24 10:33 ` Tvrtko Ursulin
  2018-01-24 10:42   ` Chris Wilson
  3 siblings, 1 reply; 9+ messages in thread
From: Tvrtko Ursulin @ 2018-01-24 10:33 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 19/01/2018 15:23, Chris Wilson wrote:
> By counting the number of times we have woken up, we have a very simple
> means of defining an epoch, which will come in handy if we want to
> perform deferred tasks at the end of an epoch (i.e. while we are going
> to sleep) without imposing on the next activity cycle.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c     | 7 ++++---
>   drivers/gpu/drm/i915/i915_drv.h         | 5 +++++
>   drivers/gpu/drm/i915/i915_gem_request.c | 1 +
>   3 files changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index cc659b4b2a45..1aac3ec7d14d 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2717,7 +2717,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
>   	if (!HAS_RUNTIME_PM(dev_priv))
>   		seq_puts(m, "Runtime power management not supported\n");
>   
> -	seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
> +	seq_printf(m, "GPU idle: %s (epoch %d)\n",
> +		   yesno(!dev_priv->gt.awake), dev_priv->gt.epoch);
>   	seq_printf(m, "IRQs disabled: %s\n",
>   		   yesno(!intel_irqs_enabled(dev_priv)));
>   #ifdef CONFIG_PM
> @@ -3150,8 +3151,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>   
>   	intel_runtime_pm_get(dev_priv);
>   
> -	seq_printf(m, "GT awake? %s\n",
> -		   yesno(dev_priv->gt.awake));
> +	seq_printf(m, "GT awake? %s (epoch %d)\n",
> +		   yesno(dev_priv->gt.awake), dev_priv->gt.epoch);
>   	seq_printf(m, "Global active requests: %d\n",
>   		   dev_priv->gt.active_requests);
>   	seq_printf(m, "CS timestamp frequency: %u kHz\n",
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 317953484fec..98e8385d1bb0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2302,6 +2302,11 @@ struct drm_i915_private {
>   		struct i915_gem_timeline global_timeline;
>   		u32 active_requests;
>   
> +		/**
> +		 * The number of times we have woken up.
> +		 */
> +		u32 epoch;
> +
>   		/**
>   		 * Is the GPU currently considered idle, or busy executing
>   		 * userspace requests? Whilst idle, we allow runtime power
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index a0f451b4a4e8..f0fab070a3a0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -274,6 +274,7 @@ static void mark_busy(struct drm_i915_private *i915)
>   	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
>   
>   	i915->gt.awake = true;
> +	i915->gt.epoch++;
>   
>   	intel_enable_gt_powersave(i915);
>   	i915_update_gfx_val(i915);
> 

I'd be tempted to use a standard type like unsigned int where explicit 
width is not needed but it's minor.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-24 10:32   ` Tvrtko Ursulin
@ 2018-01-24 10:37     ` Chris Wilson
  2018-01-24 11:06       ` Tvrtko Ursulin
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2018-01-24 10:37 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-24 10:32:16)
> 
> On 19/01/2018 15:23, Chris Wilson wrote:
> > When we finally decide the gpu is idle, that is a good time to shrink
> > our kmem_caches.
> > 
> > v3: Defer until an rcu grace period after we idle.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c | 65 +++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 65 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 7f0684ccc724..6a8fbcae835b 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3341,12 +3341,59 @@ new_requests_since_last_retire(const struct drm_i915_private *i915)
> >               work_pending(&i915->gt.idle_work.work));
> >   }
> >   
> > +static void shrink_caches(struct drm_i915_private *i915)
> > +{
> > +     /*
> > +      * kmem_cache_shrink() discards empty slabs and reorders partially
> > +      * filled slabs to prioritise allocating from the mostly full slabs,
> > +      * with the aim of reducing fragmentation.
> > +      */
> > +     kmem_cache_shrink(i915->priorities);
> > +     kmem_cache_shrink(i915->dependencies);
> > +     kmem_cache_shrink(i915->requests);
> > +     kmem_cache_shrink(i915->luts);
> > +     kmem_cache_shrink(i915->vmas);
> > +     kmem_cache_shrink(i915->objects);
> > +}
> > +
> > +struct sleep_rcu_work {
> > +     struct drm_i915_private *i915;
> > +     struct rcu_head rcu;
> > +     struct work_struct work;
> > +     u32 epoch;
> > +};
> > +
> > +static void __sleep_work(struct work_struct *work)
> > +{
> > +     struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
> > +     struct drm_i915_private *i915 = s->i915;
> > +     u32 epoch = s->epoch;
> > +
> > +     kfree(s);
> > +     if (epoch == READ_ONCE(i915->gt.epoch))
> > +             shrink_caches(i915);
> > +}
> > +
> > +static void __sleep_rcu(struct rcu_head *rcu)
> > +{
> > +     struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
> > +     struct drm_i915_private *i915 = s->i915;
> > +
> > +     if (s->epoch == READ_ONCE(i915->gt.epoch)) {
> > +             INIT_WORK(&s->work, __sleep_work);
> > +             queue_work(i915->wq, &s->work);
> > +     } else {
> > +             kfree(s);
> > +     }
> > +}
> > +
> >   static void
> >   i915_gem_idle_work_handler(struct work_struct *work)
> >   {
> >       struct drm_i915_private *dev_priv =
> >               container_of(work, typeof(*dev_priv), gt.idle_work.work);
> >       bool rearm_hangcheck;
> > +     u32 epoch = 0;
> >       ktime_t end;
> >   
> >       if (!READ_ONCE(dev_priv->gt.awake))
> > @@ -3406,6 +3453,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
> >       GEM_BUG_ON(!dev_priv->gt.awake);
> >       dev_priv->gt.awake = false;
> >       rearm_hangcheck = false;
> > +     epoch = dev_priv->gt.epoch;
> >   
> >       if (INTEL_GEN(dev_priv) >= 6)
> >               gen6_rps_idle(dev_priv);
> > @@ -3421,6 +3469,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
> >               GEM_BUG_ON(!dev_priv->gt.awake);
> >               i915_queue_hangcheck(dev_priv);
> >       }
> > +
> > +     /*
> > +      * When we are idle, it is an opportune time to reap our caches.
> > +      * However, we have many objects that utilise RCU and the ordered
> > +      * i915->wq that this work is executing on. To try and flush any
> > +      * pending frees now we are idle, we first wait for an RCU grace
> > +      * period, and then queue a task (that will run last on the wq) to
> > +      * shrink and re-optimize the caches.
> > +      */
> > +     if (epoch == READ_ONCE(dev_priv->gt.epoch)) {
> 
> Theoretically this can be true on epoch wrap-around, when trylock 
> failed. It's one in four billion busy transitions but it could be just 
> worth handling it explicitly. Simplest probably to ensure gt.epoch is 
> never zero when incrementing?

I was working on the principle that a u32 wraparound takes at least
100ms * 2^32, or about 326 years. :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up
  2018-01-24 10:33 ` [PATCH 1/2] " Tvrtko Ursulin
@ 2018-01-24 10:42   ` Chris Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2018-01-24 10:42 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-01-24 10:33:29)
> 
> On 19/01/2018 15:23, Chris Wilson wrote:
> > By counting the number of times we have woken up, we have a very simple
> > means of defining an epoch, which will come in handy if we want to
> > perform deferred tasks at the end of an epoch (i.e. while we are going
> > to sleep) without imposing on the next activity cycle.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c     | 7 ++++---
> >   drivers/gpu/drm/i915/i915_drv.h         | 5 +++++
> >   drivers/gpu/drm/i915/i915_gem_request.c | 1 +
> >   3 files changed, 10 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index cc659b4b2a45..1aac3ec7d14d 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -2717,7 +2717,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
> >       if (!HAS_RUNTIME_PM(dev_priv))
> >               seq_puts(m, "Runtime power management not supported\n");
> >   
> > -     seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
> > +     seq_printf(m, "GPU idle: %s (epoch %d)\n",
> > +                yesno(!dev_priv->gt.awake), dev_priv->gt.epoch);
> >       seq_printf(m, "IRQs disabled: %s\n",
> >                  yesno(!intel_irqs_enabled(dev_priv)));
> >   #ifdef CONFIG_PM
> > @@ -3150,8 +3151,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
> >   
> >       intel_runtime_pm_get(dev_priv);
> >   
> > -     seq_printf(m, "GT awake? %s\n",
> > -                yesno(dev_priv->gt.awake));
> > +     seq_printf(m, "GT awake? %s (epoch %d)\n",
> > +                yesno(dev_priv->gt.awake), dev_priv->gt.epoch);
> >       seq_printf(m, "Global active requests: %d\n",
> >                  dev_priv->gt.active_requests);
> >       seq_printf(m, "CS timestamp frequency: %u kHz\n",
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 317953484fec..98e8385d1bb0 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2302,6 +2302,11 @@ struct drm_i915_private {
> >               struct i915_gem_timeline global_timeline;
> >               u32 active_requests;
> >   
> > +             /**
> > +              * The number of times we have woken up.
> > +              */
> > +             u32 epoch;
> > +
> >               /**
> >                * Is the GPU currently considered idle, or busy executing
> >                * userspace requests? Whilst idle, we allow runtime power
> > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> > index a0f451b4a4e8..f0fab070a3a0 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_request.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> > @@ -274,6 +274,7 @@ static void mark_busy(struct drm_i915_private *i915)
> >       intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
> >   
> >       i915->gt.awake = true;
> > +     i915->gt.epoch++;
> >   
> >       intel_enable_gt_powersave(i915);
> >       i915_update_gfx_val(i915);
> > 
> 
> I'd be tempted to use a standard type like unsigned int where explicit 
> width is not needed but it's minor.

Yeah, I was just copying the neighbouring type. active_requests is bound
to u32 by inflight_seqnos (which is bound by the hw seqno being u32).

Still no reason to impose u32 here, it raises the question of why must
it be exactly 32b.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling
  2018-01-24 10:37     ` Chris Wilson
@ 2018-01-24 11:06       ` Tvrtko Ursulin
  0 siblings, 0 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2018-01-24 11:06 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 24/01/2018 10:37, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-01-24 10:32:16)
>>
>> On 19/01/2018 15:23, Chris Wilson wrote:
>>> When we finally decide the gpu is idle, that is a good time to shrink
>>> our kmem_caches.
>>>
>>> v3: Defer until an rcu grace period after we idle.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem.c | 65 +++++++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 65 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index 7f0684ccc724..6a8fbcae835b 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -3341,12 +3341,59 @@ new_requests_since_last_retire(const struct drm_i915_private *i915)
>>>                work_pending(&i915->gt.idle_work.work));
>>>    }
>>>    
>>> +static void shrink_caches(struct drm_i915_private *i915)
>>> +{
>>> +     /*
>>> +      * kmem_cache_shrink() discards empty slabs and reorders partially
>>> +      * filled slabs to prioritise allocating from the mostly full slabs,
>>> +      * with the aim of reducing fragmentation.
>>> +      */
>>> +     kmem_cache_shrink(i915->priorities);
>>> +     kmem_cache_shrink(i915->dependencies);
>>> +     kmem_cache_shrink(i915->requests);
>>> +     kmem_cache_shrink(i915->luts);
>>> +     kmem_cache_shrink(i915->vmas);
>>> +     kmem_cache_shrink(i915->objects);
>>> +}
>>> +
>>> +struct sleep_rcu_work {
>>> +     struct drm_i915_private *i915;
>>> +     struct rcu_head rcu;
>>> +     struct work_struct work;
>>> +     u32 epoch;
>>> +};
>>> +
>>> +static void __sleep_work(struct work_struct *work)
>>> +{
>>> +     struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
>>> +     struct drm_i915_private *i915 = s->i915;
>>> +     u32 epoch = s->epoch;
>>> +
>>> +     kfree(s);
>>> +     if (epoch == READ_ONCE(i915->gt.epoch))
>>> +             shrink_caches(i915);
>>> +}
>>> +
>>> +static void __sleep_rcu(struct rcu_head *rcu)
>>> +{
>>> +     struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
>>> +     struct drm_i915_private *i915 = s->i915;
>>> +
>>> +     if (s->epoch == READ_ONCE(i915->gt.epoch)) {
>>> +             INIT_WORK(&s->work, __sleep_work);
>>> +             queue_work(i915->wq, &s->work);
>>> +     } else {
>>> +             kfree(s);
>>> +     }
>>> +}
>>> +
>>>    static void
>>>    i915_gem_idle_work_handler(struct work_struct *work)
>>>    {
>>>        struct drm_i915_private *dev_priv =
>>>                container_of(work, typeof(*dev_priv), gt.idle_work.work);
>>>        bool rearm_hangcheck;
>>> +     u32 epoch = 0;
>>>        ktime_t end;
>>>    
>>>        if (!READ_ONCE(dev_priv->gt.awake))
>>> @@ -3406,6 +3453,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
>>>        GEM_BUG_ON(!dev_priv->gt.awake);
>>>        dev_priv->gt.awake = false;
>>>        rearm_hangcheck = false;
>>> +     epoch = dev_priv->gt.epoch;
>>>    
>>>        if (INTEL_GEN(dev_priv) >= 6)
>>>                gen6_rps_idle(dev_priv);
>>> @@ -3421,6 +3469,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
>>>                GEM_BUG_ON(!dev_priv->gt.awake);
>>>                i915_queue_hangcheck(dev_priv);
>>>        }
>>> +
>>> +     /*
>>> +      * When we are idle, it is an opportune time to reap our caches.
>>> +      * However, we have many objects that utilise RCU and the ordered
>>> +      * i915->wq that this work is executing on. To try and flush any
>>> +      * pending frees now we are idle, we first wait for an RCU grace
>>> +      * period, and then queue a task (that will run last on the wq) to
>>> +      * shrink and re-optimize the caches.
>>> +      */
>>> +     if (epoch == READ_ONCE(dev_priv->gt.epoch)) {
>>
>> Theoretically this can be true on epoch wrap-around, when trylock
>> failed. It's one in four billion busy transitions but it could be just
>> worth handling it explicitly. Simplest probably to ensure gt.epoch is
>> never zero when incrementing?
> 
> I was working on the principle that a u32 wraparound takes at least
> 100ms * 2^32, or about 326 years. :)

Ah forgot there is a time delay.. okay then. :)

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

To be kept if/when you change to unsigned int.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-01-24 11:06 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-19 15:23 [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up Chris Wilson
2018-01-19 15:23 ` [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
2018-01-24 10:32   ` Tvrtko Ursulin
2018-01-24 10:37     ` Chris Wilson
2018-01-24 11:06       ` Tvrtko Ursulin
2018-01-19 16:13 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up Patchwork
2018-01-19 21:56 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-01-24 10:33 ` [PATCH 1/2] " Tvrtko Ursulin
2018-01-24 10:42   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.