* [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up
@ 2018-01-19 15:23 Chris Wilson
2018-01-19 15:23 ` [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Chris Wilson @ 2018-01-19 15:23 UTC (permalink / raw)
To: intel-gfx
By counting the number of times we have woken up, we have a very simple
means of defining an epoch, which will come in handy if we want to
perform deferred tasks at the end of an epoch (i.e. while we are going
to sleep) without imposing on the next activity cycle.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_debugfs.c | 7 ++++---
drivers/gpu/drm/i915/i915_drv.h | 5 +++++
drivers/gpu/drm/i915/i915_gem_request.c | 1 +
3 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index cc659b4b2a45..1aac3ec7d14d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2717,7 +2717,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
if (!HAS_RUNTIME_PM(dev_priv))
seq_puts(m, "Runtime power management not supported\n");
- seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
+ seq_printf(m, "GPU idle: %s (epoch %d)\n",
+ yesno(!dev_priv->gt.awake), dev_priv->gt.epoch);
seq_printf(m, "IRQs disabled: %s\n",
yesno(!intel_irqs_enabled(dev_priv)));
#ifdef CONFIG_PM
@@ -3150,8 +3151,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
intel_runtime_pm_get(dev_priv);
- seq_printf(m, "GT awake? %s\n",
- yesno(dev_priv->gt.awake));
+ seq_printf(m, "GT awake? %s (epoch %d)\n",
+ yesno(dev_priv->gt.awake), dev_priv->gt.epoch);
seq_printf(m, "Global active requests: %d\n",
dev_priv->gt.active_requests);
seq_printf(m, "CS timestamp frequency: %u kHz\n",
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 317953484fec..98e8385d1bb0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2302,6 +2302,11 @@ struct drm_i915_private {
struct i915_gem_timeline global_timeline;
u32 active_requests;
+ /**
+ * The number of times we have woken up.
+ */
+ u32 epoch;
+
/**
* Is the GPU currently considered idle, or busy executing
* userspace requests? Whilst idle, we allow runtime power
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a0f451b4a4e8..f0fab070a3a0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -274,6 +274,7 @@ static void mark_busy(struct drm_i915_private *i915)
intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
i915->gt.awake = true;
+ i915->gt.epoch++;
intel_enable_gt_powersave(i915);
i915_update_gfx_val(i915);
--
2.15.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling
2018-01-19 15:23 [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up Chris Wilson
@ 2018-01-19 15:23 ` Chris Wilson
2018-01-24 10:32 ` Tvrtko Ursulin
2018-01-19 16:13 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up Patchwork
` (2 subsequent siblings)
3 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2018-01-19 15:23 UTC (permalink / raw)
To: intel-gfx
When we finally decide the gpu is idle, that is a good time to shrink
our kmem_caches.
v3: Defer until an rcu grace period after we idle.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 65 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 65 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7f0684ccc724..6a8fbcae835b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3341,12 +3341,59 @@ new_requests_since_last_retire(const struct drm_i915_private *i915)
work_pending(&i915->gt.idle_work.work));
}
+static void shrink_caches(struct drm_i915_private *i915)
+{
+ /*
+ * kmem_cache_shrink() discards empty slabs and reorders partially
+ * filled slabs to prioritise allocating from the mostly full slabs,
+ * with the aim of reducing fragmentation.
+ */
+ kmem_cache_shrink(i915->priorities);
+ kmem_cache_shrink(i915->dependencies);
+ kmem_cache_shrink(i915->requests);
+ kmem_cache_shrink(i915->luts);
+ kmem_cache_shrink(i915->vmas);
+ kmem_cache_shrink(i915->objects);
+}
+
+struct sleep_rcu_work {
+ struct drm_i915_private *i915;
+ struct rcu_head rcu;
+ struct work_struct work;
+ u32 epoch;
+};
+
+static void __sleep_work(struct work_struct *work)
+{
+ struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
+ struct drm_i915_private *i915 = s->i915;
+ u32 epoch = s->epoch;
+
+ kfree(s);
+ if (epoch == READ_ONCE(i915->gt.epoch))
+ shrink_caches(i915);
+}
+
+static void __sleep_rcu(struct rcu_head *rcu)
+{
+ struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
+ struct drm_i915_private *i915 = s->i915;
+
+ if (s->epoch == READ_ONCE(i915->gt.epoch)) {
+ INIT_WORK(&s->work, __sleep_work);
+ queue_work(i915->wq, &s->work);
+ } else {
+ kfree(s);
+ }
+}
+
static void
i915_gem_idle_work_handler(struct work_struct *work)
{
struct drm_i915_private *dev_priv =
container_of(work, typeof(*dev_priv), gt.idle_work.work);
bool rearm_hangcheck;
+ u32 epoch = 0;
ktime_t end;
if (!READ_ONCE(dev_priv->gt.awake))
@@ -3406,6 +3453,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
GEM_BUG_ON(!dev_priv->gt.awake);
dev_priv->gt.awake = false;
rearm_hangcheck = false;
+ epoch = dev_priv->gt.epoch;
if (INTEL_GEN(dev_priv) >= 6)
gen6_rps_idle(dev_priv);
@@ -3421,6 +3469,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
GEM_BUG_ON(!dev_priv->gt.awake);
i915_queue_hangcheck(dev_priv);
}
+
+ /*
+ * When we are idle, it is an opportune time to reap our caches.
+ * However, we have many objects that utilise RCU and the ordered
+ * i915->wq that this work is executing on. To try and flush any
+ * pending frees now we are idle, we first wait for an RCU grace
+ * period, and then queue a task (that will run last on the wq) to
+ * shrink and re-optimize the caches.
+ */
+ if (epoch == READ_ONCE(dev_priv->gt.epoch)) {
+ struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (s) {
+ s->i915 = dev_priv;
+ s->epoch = epoch;
+ call_rcu(&s->rcu, __sleep_rcu);
+ }
+ }
}
void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
--
2.15.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up
2018-01-19 15:23 [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up Chris Wilson
2018-01-19 15:23 ` [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
@ 2018-01-19 16:13 ` Patchwork
2018-01-19 21:56 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-01-24 10:33 ` [PATCH 1/2] " Tvrtko Ursulin
3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-01-19 16:13 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up
URL : https://patchwork.freedesktop.org/series/36802/
State : success
== Summary ==
Series 36802v1 series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up
https://patchwork.freedesktop.org/api/1.0/series/36802/revisions/1/mbox/
Test debugfs_test:
Subgroup read_all_entries:
dmesg-warn -> DMESG-FAIL (fi-elk-e7500) fdo#103989
Test kms_chamelium:
Subgroup dp-crc-fast:
pass -> DMESG-FAIL (fi-kbl-7500u) fdo#103841
fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841
fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:429s
fi-bdw-gvtdvm total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:432s
fi-blb-e6850 total:288 pass:223 dwarn:1 dfail:0 fail:0 skip:64 time:382s
fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:518s
fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:293s
fi-bxt-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:501s
fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:507s
fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:491s
fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:482s
fi-elk-e7500 total:224 pass:168 dwarn:9 dfail:1 fail:0 skip:45
fi-gdg-551 total:288 pass:179 dwarn:0 dfail:0 fail:1 skip:108 time:306s
fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:535s
fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:400s
fi-hsw-4770r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:409s
fi-ilk-650 total:288 pass:228 dwarn:0 dfail:0 fail:0 skip:60 time:421s
fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:469s
fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:423s
fi-kbl-7500u total:288 pass:262 dwarn:1 dfail:1 fail:0 skip:24 time:463s
fi-kbl-7560u total:288 pass:269 dwarn:0 dfail:0 fail:0 skip:19 time:505s
fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:460s
fi-kbl-r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:513s
fi-pnv-d510 total:288 pass:222 dwarn:1 dfail:0 fail:0 skip:65 time:630s
fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:445s
fi-skl-6600u total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:519s
fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:542s
fi-skl-6700k2 total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:499s
fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:495s
fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:441s
fi-snb-2520m total:245 pass:211 dwarn:0 dfail:0 fail:0 skip:33
fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:403s
Blacklisted hosts:
fi-cfl-s2 total:288 pass:256 dwarn:0 dfail:0 fail:3 skip:26 time:562s
fi-glk-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:491s
fi-skl-guc total:288 pass:213 dwarn:47 dfail:0 fail:0 skip:28 time:412s
fd10fae2b0f673861816511f99d4798d6b619b40 drm-tip: 2018y-01m-19d-14h-22m-15s UTC integration manifest
483d552db630 drm/i915: Shrink the GEM kmem_caches upon idling
a40e944809ae drm/i915: Track the number of times we have woken the GPU up
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7725/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* ✗ Fi.CI.IGT: failure for series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up
2018-01-19 15:23 [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up Chris Wilson
2018-01-19 15:23 ` [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
2018-01-19 16:13 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up Patchwork
@ 2018-01-19 21:56 ` Patchwork
2018-01-24 10:33 ` [PATCH 1/2] " Tvrtko Ursulin
3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2018-01-19 21:56 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up
URL : https://patchwork.freedesktop.org/series/36802/
State : failure
== Summary ==
Warning: bzip CI_DRM_3658/shard-glkb6/results22.json.bz2 wasn't in correct JSON format
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-a:
pass -> SKIP (shard-snb) fdo#103375 +1
Test gem_exec_schedule:
Subgroup wide-vebox:
pass -> FAIL (shard-apl) fdo#102848
Test perf:
Subgroup oa-exponents:
pass -> FAIL (shard-apl) fdo#102254
Subgroup polling:
fail -> PASS (shard-hsw) fdo#102252
Test kms_flip:
Subgroup 2x-plain-flip-fb-recreate-interruptible:
pass -> FAIL (shard-hsw)
Subgroup vblank-vs-suspend-interruptible:
pass -> INCOMPLETE (shard-hsw) fdo#100368
Subgroup flip-vs-modeset-vs-hang:
dmesg-warn -> PASS (shard-snb) fdo#103821
Test kms_frontbuffer_tracking:
Subgroup fbc-1p-offscren-pri-shrfb-draw-render:
fail -> PASS (shard-snb) fdo#101623 +1
fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#102848 https://bugs.freedesktop.org/show_bug.cgi?id=102848
fdo#102254 https://bugs.freedesktop.org/show_bug.cgi?id=102254
fdo#102252 https://bugs.freedesktop.org/show_bug.cgi?id=102252
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#103821 https://bugs.freedesktop.org/show_bug.cgi?id=103821
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623
shard-apl total:2780 pass:1714 dwarn:1 dfail:0 fail:24 skip:1041 time:14671s
shard-hsw total:2598 pass:1593 dwarn:1 dfail:1 fail:14 skip:987 time:13414s
shard-snb total:2780 pass:1315 dwarn:1 dfail:1 fail:12 skip:1451 time:7997s
Blacklisted hosts:
shard-kbl total:2758 pass:1818 dwarn:1 dfail:1 fail:24 skip:913 time:10528s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7725/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling
2018-01-19 15:23 ` [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
@ 2018-01-24 10:32 ` Tvrtko Ursulin
2018-01-24 10:37 ` Chris Wilson
0 siblings, 1 reply; 9+ messages in thread
From: Tvrtko Ursulin @ 2018-01-24 10:32 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On 19/01/2018 15:23, Chris Wilson wrote:
> When we finally decide the gpu is idle, that is a good time to shrink
> our kmem_caches.
>
> v3: Defer until an rcu grace period after we idle.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 65 +++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 65 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7f0684ccc724..6a8fbcae835b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3341,12 +3341,59 @@ new_requests_since_last_retire(const struct drm_i915_private *i915)
> work_pending(&i915->gt.idle_work.work));
> }
>
> +static void shrink_caches(struct drm_i915_private *i915)
> +{
> + /*
> + * kmem_cache_shrink() discards empty slabs and reorders partially
> + * filled slabs to prioritise allocating from the mostly full slabs,
> + * with the aim of reducing fragmentation.
> + */
> + kmem_cache_shrink(i915->priorities);
> + kmem_cache_shrink(i915->dependencies);
> + kmem_cache_shrink(i915->requests);
> + kmem_cache_shrink(i915->luts);
> + kmem_cache_shrink(i915->vmas);
> + kmem_cache_shrink(i915->objects);
> +}
> +
> +struct sleep_rcu_work {
> + struct drm_i915_private *i915;
> + struct rcu_head rcu;
> + struct work_struct work;
> + u32 epoch;
> +};
> +
> +static void __sleep_work(struct work_struct *work)
> +{
> + struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
> + struct drm_i915_private *i915 = s->i915;
> + u32 epoch = s->epoch;
> +
> + kfree(s);
> + if (epoch == READ_ONCE(i915->gt.epoch))
> + shrink_caches(i915);
> +}
> +
> +static void __sleep_rcu(struct rcu_head *rcu)
> +{
> + struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
> + struct drm_i915_private *i915 = s->i915;
> +
> + if (s->epoch == READ_ONCE(i915->gt.epoch)) {
> + INIT_WORK(&s->work, __sleep_work);
> + queue_work(i915->wq, &s->work);
> + } else {
> + kfree(s);
> + }
> +}
> +
> static void
> i915_gem_idle_work_handler(struct work_struct *work)
> {
> struct drm_i915_private *dev_priv =
> container_of(work, typeof(*dev_priv), gt.idle_work.work);
> bool rearm_hangcheck;
> + u32 epoch = 0;
> ktime_t end;
>
> if (!READ_ONCE(dev_priv->gt.awake))
> @@ -3406,6 +3453,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
> GEM_BUG_ON(!dev_priv->gt.awake);
> dev_priv->gt.awake = false;
> rearm_hangcheck = false;
> + epoch = dev_priv->gt.epoch;
>
> if (INTEL_GEN(dev_priv) >= 6)
> gen6_rps_idle(dev_priv);
> @@ -3421,6 +3469,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
> GEM_BUG_ON(!dev_priv->gt.awake);
> i915_queue_hangcheck(dev_priv);
> }
> +
> + /*
> + * When we are idle, it is an opportune time to reap our caches.
> + * However, we have many objects that utilise RCU and the ordered
> + * i915->wq that this work is executing on. To try and flush any
> + * pending frees now we are idle, we first wait for an RCU grace
> + * period, and then queue a task (that will run last on the wq) to
> + * shrink and re-optimize the caches.
> + */
> + if (epoch == READ_ONCE(dev_priv->gt.epoch)) {
Theoretically this can be true on epoch wrap-around, when trylock
failed. It's one in four billion busy transitions but it could be just
worth handling it explicitly. Simplest probably to ensure gt.epoch is
never zero when incrementing?
> + struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
> + if (s) {
> + s->i915 = dev_priv;
> + s->epoch = epoch;
> + call_rcu(&s->rcu, __sleep_rcu);
> + }
> + }
> }
>
> void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
>
Otherwise it sounds believable and looks correct.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up
2018-01-19 15:23 [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up Chris Wilson
` (2 preceding siblings ...)
2018-01-19 21:56 ` ✗ Fi.CI.IGT: failure " Patchwork
@ 2018-01-24 10:33 ` Tvrtko Ursulin
2018-01-24 10:42 ` Chris Wilson
3 siblings, 1 reply; 9+ messages in thread
From: Tvrtko Ursulin @ 2018-01-24 10:33 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On 19/01/2018 15:23, Chris Wilson wrote:
> By counting the number of times we have woken up, we have a very simple
> means of defining an epoch, which will come in handy if we want to
> perform deferred tasks at the end of an epoch (i.e. while we are going
> to sleep) without imposing on the next activity cycle.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 7 ++++---
> drivers/gpu/drm/i915/i915_drv.h | 5 +++++
> drivers/gpu/drm/i915/i915_gem_request.c | 1 +
> 3 files changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index cc659b4b2a45..1aac3ec7d14d 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2717,7 +2717,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
> if (!HAS_RUNTIME_PM(dev_priv))
> seq_puts(m, "Runtime power management not supported\n");
>
> - seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
> + seq_printf(m, "GPU idle: %s (epoch %d)\n",
> + yesno(!dev_priv->gt.awake), dev_priv->gt.epoch);
> seq_printf(m, "IRQs disabled: %s\n",
> yesno(!intel_irqs_enabled(dev_priv)));
> #ifdef CONFIG_PM
> @@ -3150,8 +3151,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>
> intel_runtime_pm_get(dev_priv);
>
> - seq_printf(m, "GT awake? %s\n",
> - yesno(dev_priv->gt.awake));
> + seq_printf(m, "GT awake? %s (epoch %d)\n",
> + yesno(dev_priv->gt.awake), dev_priv->gt.epoch);
> seq_printf(m, "Global active requests: %d\n",
> dev_priv->gt.active_requests);
> seq_printf(m, "CS timestamp frequency: %u kHz\n",
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 317953484fec..98e8385d1bb0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2302,6 +2302,11 @@ struct drm_i915_private {
> struct i915_gem_timeline global_timeline;
> u32 active_requests;
>
> + /**
> + * The number of times we have woken up.
> + */
> + u32 epoch;
> +
> /**
> * Is the GPU currently considered idle, or busy executing
> * userspace requests? Whilst idle, we allow runtime power
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index a0f451b4a4e8..f0fab070a3a0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -274,6 +274,7 @@ static void mark_busy(struct drm_i915_private *i915)
> intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
>
> i915->gt.awake = true;
> + i915->gt.epoch++;
>
> intel_enable_gt_powersave(i915);
> i915_update_gfx_val(i915);
>
I'd be tempted to use a standard type like unsigned int where explicit
width is not needed but it's minor.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling
2018-01-24 10:32 ` Tvrtko Ursulin
@ 2018-01-24 10:37 ` Chris Wilson
2018-01-24 11:06 ` Tvrtko Ursulin
0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2018-01-24 10:37 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
Quoting Tvrtko Ursulin (2018-01-24 10:32:16)
>
> On 19/01/2018 15:23, Chris Wilson wrote:
> > When we finally decide the gpu is idle, that is a good time to shrink
> > our kmem_caches.
> >
> > v3: Defer until an rcu grace period after we idle.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_gem.c | 65 +++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 65 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 7f0684ccc724..6a8fbcae835b 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3341,12 +3341,59 @@ new_requests_since_last_retire(const struct drm_i915_private *i915)
> > work_pending(&i915->gt.idle_work.work));
> > }
> >
> > +static void shrink_caches(struct drm_i915_private *i915)
> > +{
> > + /*
> > + * kmem_cache_shrink() discards empty slabs and reorders partially
> > + * filled slabs to prioritise allocating from the mostly full slabs,
> > + * with the aim of reducing fragmentation.
> > + */
> > + kmem_cache_shrink(i915->priorities);
> > + kmem_cache_shrink(i915->dependencies);
> > + kmem_cache_shrink(i915->requests);
> > + kmem_cache_shrink(i915->luts);
> > + kmem_cache_shrink(i915->vmas);
> > + kmem_cache_shrink(i915->objects);
> > +}
> > +
> > +struct sleep_rcu_work {
> > + struct drm_i915_private *i915;
> > + struct rcu_head rcu;
> > + struct work_struct work;
> > + u32 epoch;
> > +};
> > +
> > +static void __sleep_work(struct work_struct *work)
> > +{
> > + struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
> > + struct drm_i915_private *i915 = s->i915;
> > + u32 epoch = s->epoch;
> > +
> > + kfree(s);
> > + if (epoch == READ_ONCE(i915->gt.epoch))
> > + shrink_caches(i915);
> > +}
> > +
> > +static void __sleep_rcu(struct rcu_head *rcu)
> > +{
> > + struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
> > + struct drm_i915_private *i915 = s->i915;
> > +
> > + if (s->epoch == READ_ONCE(i915->gt.epoch)) {
> > + INIT_WORK(&s->work, __sleep_work);
> > + queue_work(i915->wq, &s->work);
> > + } else {
> > + kfree(s);
> > + }
> > +}
> > +
> > static void
> > i915_gem_idle_work_handler(struct work_struct *work)
> > {
> > struct drm_i915_private *dev_priv =
> > container_of(work, typeof(*dev_priv), gt.idle_work.work);
> > bool rearm_hangcheck;
> > + u32 epoch = 0;
> > ktime_t end;
> >
> > if (!READ_ONCE(dev_priv->gt.awake))
> > @@ -3406,6 +3453,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
> > GEM_BUG_ON(!dev_priv->gt.awake);
> > dev_priv->gt.awake = false;
> > rearm_hangcheck = false;
> > + epoch = dev_priv->gt.epoch;
> >
> > if (INTEL_GEN(dev_priv) >= 6)
> > gen6_rps_idle(dev_priv);
> > @@ -3421,6 +3469,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
> > GEM_BUG_ON(!dev_priv->gt.awake);
> > i915_queue_hangcheck(dev_priv);
> > }
> > +
> > + /*
> > + * When we are idle, it is an opportune time to reap our caches.
> > + * However, we have many objects that utilise RCU and the ordered
> > + * i915->wq that this work is executing on. To try and flush any
> > + * pending frees now we are idle, we first wait for an RCU grace
> > + * period, and then queue a task (that will run last on the wq) to
> > + * shrink and re-optimize the caches.
> > + */
> > + if (epoch == READ_ONCE(dev_priv->gt.epoch)) {
>
> Theoretically this can be true on epoch wrap-around, when trylock
> failed. It's one in four billion busy transitions but it could be just
> worth handling it explicitly. Simplest probably to ensure gt.epoch is
> never zero when incrementing?
I was working on the principle that a u32 wraparound takes at least
100ms * 2^32, or about 326 years. :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up
2018-01-24 10:33 ` [PATCH 1/2] " Tvrtko Ursulin
@ 2018-01-24 10:42 ` Chris Wilson
0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2018-01-24 10:42 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
Quoting Tvrtko Ursulin (2018-01-24 10:33:29)
>
> On 19/01/2018 15:23, Chris Wilson wrote:
> > By counting the number of times we have woken up, we have a very simple
> > means of defining an epoch, which will come in handy if we want to
> > perform deferred tasks at the end of an epoch (i.e. while we are going
> > to sleep) without imposing on the next activity cycle.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > drivers/gpu/drm/i915/i915_debugfs.c | 7 ++++---
> > drivers/gpu/drm/i915/i915_drv.h | 5 +++++
> > drivers/gpu/drm/i915/i915_gem_request.c | 1 +
> > 3 files changed, 10 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index cc659b4b2a45..1aac3ec7d14d 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -2717,7 +2717,8 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
> > if (!HAS_RUNTIME_PM(dev_priv))
> > seq_puts(m, "Runtime power management not supported\n");
> >
> > - seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
> > + seq_printf(m, "GPU idle: %s (epoch %d)\n",
> > + yesno(!dev_priv->gt.awake), dev_priv->gt.epoch);
> > seq_printf(m, "IRQs disabled: %s\n",
> > yesno(!intel_irqs_enabled(dev_priv)));
> > #ifdef CONFIG_PM
> > @@ -3150,8 +3151,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
> >
> > intel_runtime_pm_get(dev_priv);
> >
> > - seq_printf(m, "GT awake? %s\n",
> > - yesno(dev_priv->gt.awake));
> > + seq_printf(m, "GT awake? %s (epoch %d)\n",
> > + yesno(dev_priv->gt.awake), dev_priv->gt.epoch);
> > seq_printf(m, "Global active requests: %d\n",
> > dev_priv->gt.active_requests);
> > seq_printf(m, "CS timestamp frequency: %u kHz\n",
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 317953484fec..98e8385d1bb0 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2302,6 +2302,11 @@ struct drm_i915_private {
> > struct i915_gem_timeline global_timeline;
> > u32 active_requests;
> >
> > + /**
> > + * The number of times we have woken up.
> > + */
> > + u32 epoch;
> > +
> > /**
> > * Is the GPU currently considered idle, or busy executing
> > * userspace requests? Whilst idle, we allow runtime power
> > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> > index a0f451b4a4e8..f0fab070a3a0 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_request.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> > @@ -274,6 +274,7 @@ static void mark_busy(struct drm_i915_private *i915)
> > intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
> >
> > i915->gt.awake = true;
> > + i915->gt.epoch++;
> >
> > intel_enable_gt_powersave(i915);
> > i915_update_gfx_val(i915);
> >
>
> I'd be tempted to use a standard type like unsigned int where explicit
> width is not needed but it's minor.
Yeah, I was just copying the neighbouring type. active_requests is bound
to u32 by inflight_seqnos (which is bound by the hw seqno being u32).
Still no reason to impose u32 here, it raises the question of why must
it be exactly 32b.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling
2018-01-24 10:37 ` Chris Wilson
@ 2018-01-24 11:06 ` Tvrtko Ursulin
0 siblings, 0 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2018-01-24 11:06 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On 24/01/2018 10:37, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-01-24 10:32:16)
>>
>> On 19/01/2018 15:23, Chris Wilson wrote:
>>> When we finally decide the gpu is idle, that is a good time to shrink
>>> our kmem_caches.
>>>
>>> v3: Defer until an rcu grace period after we idle.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_gem.c | 65 +++++++++++++++++++++++++++++++++++++++++
>>> 1 file changed, 65 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index 7f0684ccc724..6a8fbcae835b 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -3341,12 +3341,59 @@ new_requests_since_last_retire(const struct drm_i915_private *i915)
>>> work_pending(&i915->gt.idle_work.work));
>>> }
>>>
>>> +static void shrink_caches(struct drm_i915_private *i915)
>>> +{
>>> + /*
>>> + * kmem_cache_shrink() discards empty slabs and reorders partially
>>> + * filled slabs to prioritise allocating from the mostly full slabs,
>>> + * with the aim of reducing fragmentation.
>>> + */
>>> + kmem_cache_shrink(i915->priorities);
>>> + kmem_cache_shrink(i915->dependencies);
>>> + kmem_cache_shrink(i915->requests);
>>> + kmem_cache_shrink(i915->luts);
>>> + kmem_cache_shrink(i915->vmas);
>>> + kmem_cache_shrink(i915->objects);
>>> +}
>>> +
>>> +struct sleep_rcu_work {
>>> + struct drm_i915_private *i915;
>>> + struct rcu_head rcu;
>>> + struct work_struct work;
>>> + u32 epoch;
>>> +};
>>> +
>>> +static void __sleep_work(struct work_struct *work)
>>> +{
>>> + struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
>>> + struct drm_i915_private *i915 = s->i915;
>>> + u32 epoch = s->epoch;
>>> +
>>> + kfree(s);
>>> + if (epoch == READ_ONCE(i915->gt.epoch))
>>> + shrink_caches(i915);
>>> +}
>>> +
>>> +static void __sleep_rcu(struct rcu_head *rcu)
>>> +{
>>> + struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
>>> + struct drm_i915_private *i915 = s->i915;
>>> +
>>> + if (s->epoch == READ_ONCE(i915->gt.epoch)) {
>>> + INIT_WORK(&s->work, __sleep_work);
>>> + queue_work(i915->wq, &s->work);
>>> + } else {
>>> + kfree(s);
>>> + }
>>> +}
>>> +
>>> static void
>>> i915_gem_idle_work_handler(struct work_struct *work)
>>> {
>>> struct drm_i915_private *dev_priv =
>>> container_of(work, typeof(*dev_priv), gt.idle_work.work);
>>> bool rearm_hangcheck;
>>> + u32 epoch = 0;
>>> ktime_t end;
>>>
>>> if (!READ_ONCE(dev_priv->gt.awake))
>>> @@ -3406,6 +3453,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
>>> GEM_BUG_ON(!dev_priv->gt.awake);
>>> dev_priv->gt.awake = false;
>>> rearm_hangcheck = false;
>>> + epoch = dev_priv->gt.epoch;
>>>
>>> if (INTEL_GEN(dev_priv) >= 6)
>>> gen6_rps_idle(dev_priv);
>>> @@ -3421,6 +3469,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
>>> GEM_BUG_ON(!dev_priv->gt.awake);
>>> i915_queue_hangcheck(dev_priv);
>>> }
>>> +
>>> + /*
>>> + * When we are idle, it is an opportune time to reap our caches.
>>> + * However, we have many objects that utilise RCU and the ordered
>>> + * i915->wq that this work is executing on. To try and flush any
>>> + * pending frees now we are idle, we first wait for an RCU grace
>>> + * period, and then queue a task (that will run last on the wq) to
>>> + * shrink and re-optimize the caches.
>>> + */
>>> + if (epoch == READ_ONCE(dev_priv->gt.epoch)) {
>>
>> Theoretically this can be true on epoch wrap-around, when trylock
>> failed. It's one in four billion busy transitions but it could be just
>> worth handling it explicitly. Simplest probably to ensure gt.epoch is
>> never zero when incrementing?
>
> I was working on the principle that a u32 wraparound takes at least
> 100ms * 2^32, or about 326 years. :)
Ah forgot there is a time delay.. okay then. :)
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
To be kept if/when you change to unsigned int.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2018-01-24 11:06 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-19 15:23 [PATCH 1/2] drm/i915: Track the number of times we have woken the GPU up Chris Wilson
2018-01-19 15:23 ` [PATCH 2/2] drm/i915: Shrink the GEM kmem_caches upon idling Chris Wilson
2018-01-24 10:32 ` Tvrtko Ursulin
2018-01-24 10:37 ` Chris Wilson
2018-01-24 11:06 ` Tvrtko Ursulin
2018-01-19 16:13 ` ✓ Fi.CI.BAT: success for series starting with [1/2] drm/i915: Track the number of times we have woken the GPU up Patchwork
2018-01-19 21:56 ` ✗ Fi.CI.IGT: failure " Patchwork
2018-01-24 10:33 ` [PATCH 1/2] " Tvrtko Ursulin
2018-01-24 10:42 ` Chris Wilson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.