* [RFC 1/6] drm/i915/pmu: Fix enable count array size and bounds checking
2018-01-22 18:43 [RFC v2 0/6] Queued/runnable/running engine stats Tvrtko Ursulin
@ 2018-01-22 18:43 ` Tvrtko Ursulin
2018-01-22 18:43 ` [RFC 2/6] drm/i915: Keep a count of requests waiting for a slot on GPU Tvrtko Ursulin
` (7 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-01-22 18:43 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Enable count array is supposed to have one counter for each possible
engine sampler. As such array sizing and bounds checking is not
correct when more engine samplers are added.
At the same time tidy the assert for readability and robustness.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: b46a33e271ed ("drm/i915/pmu: Expose a PMU interface for perf queries")
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_pmu.c | 13 +++++++++----
drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 065a28c713c4..cbfca4a255ab 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -476,7 +476,8 @@ static void i915_pmu_enable(struct perf_event *event)
* Update the bitmask of enabled events and increment
* the event reference counter.
*/
- GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
+ BUILD_BUG_ON(ARRAY_SIZE(i915->pmu.enable_count) != I915_PMU_MASK_BITS);
+ GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0);
i915->pmu.enable |= BIT_ULL(bit);
i915->pmu.enable_count[bit]++;
@@ -500,7 +501,10 @@ static void i915_pmu_enable(struct perf_event *event)
GEM_BUG_ON(!engine);
engine->pmu.enable |= BIT(sample);
- GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
+ BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) !=
+ (1 << I915_PMU_SAMPLE_BITS));
+ GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
+ GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
if (engine->pmu.enable_count[sample]++ == 0) {
/*
@@ -554,7 +558,8 @@ static void i915_pmu_disable(struct perf_event *event)
engine_event_class(event),
engine_event_instance(event));
GEM_BUG_ON(!engine);
- GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
+ GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count));
+ GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample));
GEM_BUG_ON(engine->pmu.enable_count[sample] == 0);
/*
* Decrement the reference count and clear the enabled
@@ -582,7 +587,7 @@ static void i915_pmu_disable(struct perf_event *event)
}
}
- GEM_BUG_ON(bit >= I915_PMU_MASK_BITS);
+ GEM_BUG_ON(bit >= ARRAY_SIZE(i915->pmu.enable_count));
GEM_BUG_ON(i915->pmu.enable_count[bit] == 0);
/*
* Decrement the reference count and clear the enabled
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..27a0c47db51e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -358,7 +358,7 @@ struct intel_engine_cs {
*
* Index number corresponds to the bit number from @enable.
*/
- unsigned int enable_count[I915_PMU_SAMPLE_BITS];
+ unsigned int enable_count[1 << I915_PMU_SAMPLE_BITS];
/**
* @sample: Counter values for sampling events.
*
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [RFC 2/6] drm/i915: Keep a count of requests waiting for a slot on GPU
2018-01-22 18:43 [RFC v2 0/6] Queued/runnable/running engine stats Tvrtko Ursulin
2018-01-22 18:43 ` [RFC 1/6] drm/i915/pmu: Fix enable count array size and bounds checking Tvrtko Ursulin
@ 2018-01-22 18:43 ` Tvrtko Ursulin
2018-01-22 18:48 ` Chris Wilson
2018-01-22 18:43 ` [RFC 3/6] drm/i915: Keep a count of requests submitted from userspace Tvrtko Ursulin
` (6 subsequent siblings)
8 siblings, 1 reply; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-01-22 18:43 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Keep a per-engine number of runnable (waiting for GPU time) requests.
v2:
* Move queued increment from insert_request to execlist_submit_request to
avoid bumping when re-ordering for priority.
* Support the counter on the ringbuffer submission path as well, albeit
just notionally. (Chris Wilson)
v3:
* Rebase.
v4:
* Rename and move the stats into a container structure. (Chris Wilson)
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_gem_request.c | 7 +++++++
drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++--
drivers/gpu/drm/i915/intel_lrc.c | 2 ++
drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +++++++++
4 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a0f451b4a4e8..8da350bacff1 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -502,6 +502,9 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
engine->emit_breadcrumb(request,
request->ring->vaddr + request->postfix);
+ GEM_BUG_ON(engine->request_stats.runnable == 0);
+ engine->request_stats.runnable--;
+
spin_lock(&request->timeline->lock);
list_move_tail(&request->link, &timeline->requests);
spin_unlock(&request->timeline->lock);
@@ -517,6 +520,8 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->timeline->lock, flags);
+ engine->request_stats.runnable++;
+
__i915_gem_request_submit(request);
spin_unlock_irqrestore(&engine->timeline->lock, flags);
@@ -548,6 +553,8 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
timeline = request->timeline;
GEM_BUG_ON(timeline == engine->timeline);
+ engine->request_stats.runnable++;
+
spin_lock(&timeline->lock);
list_move(&request->link, &timeline->requests);
spin_unlock(&timeline->lock);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 7eebfbb95e89..8377a77cfbe7 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1731,12 +1731,13 @@ void intel_engine_dump(struct intel_engine_cs *engine,
if (i915_terminally_wedged(&engine->i915->gpu_error))
drm_printf(m, "*** WEDGED ***\n");
- drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n",
+ drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d, runnable %u\n",
intel_engine_get_seqno(engine),
intel_engine_last_submit(engine),
engine->hangcheck.seqno,
jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp),
- engine->timeline->inflight_seqnos);
+ engine->timeline->inflight_seqnos,
+ engine->request_stats.runnable);
drm_printf(m, "\tReset count: %d (global %d)\n",
i915_reset_engine_count(error, engine),
i915_reset_count(error));
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 51e61b04a555..319937e67a6e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -965,6 +965,8 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->timeline->lock, flags);
+ engine->request_stats.runnable++;
+
insert_request(engine, &request->priotree, request->priotree.priority);
GEM_BUG_ON(!engine->execlists.first);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 27a0c47db51e..d7ee7831288d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -303,6 +303,15 @@ struct intel_engine_cs {
struct intel_ring *buffer;
struct intel_timeline *timeline;
+ struct {
+ /**
+ * @runnable: Number of runnable requests sent to the backend.
+ *
+ * Count of requests waiting for the GPU to execute them.
+ */
+ unsigned int runnable;
+ } request_stats;
+
struct drm_i915_gem_object *default_state;
atomic_t irq_count;
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [RFC 2/6] drm/i915: Keep a count of requests waiting for a slot on GPU
2018-01-22 18:43 ` [RFC 2/6] drm/i915: Keep a count of requests waiting for a slot on GPU Tvrtko Ursulin
@ 2018-01-22 18:48 ` Chris Wilson
0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-22 18:48 UTC (permalink / raw)
To: Tvrtko Ursulin, Intel-gfx
Quoting Tvrtko Ursulin (2018-01-22 18:43:54)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Keep a per-engine number of runnable (waiting for GPU time) requests.
>
> v2:
> * Move queued increment from insert_request to execlist_submit_request to
> avoid bumping when re-ordering for priority.
> * Support the counter on the ringbuffer submission path as well, albeit
> just notionally. (Chris Wilson)
>
> v3:
> * Rebase.
>
> v4:
> * Rename and move the stats into a container structure. (Chris Wilson)
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_request.c | 7 +++++++
> drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++--
> drivers/gpu/drm/i915/intel_lrc.c | 2 ++
> drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +++++++++
> 4 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index a0f451b4a4e8..8da350bacff1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -502,6 +502,9 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
> engine->emit_breadcrumb(request,
> request->ring->vaddr + request->postfix);
>
> + GEM_BUG_ON(engine->request_stats.runnable == 0);
> + engine->request_stats.runnable--;
> +
> spin_lock(&request->timeline->lock);
> list_move_tail(&request->link, &timeline->requests);
> spin_unlock(&request->timeline->lock);
> @@ -517,6 +520,8 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request)
> /* Will be called from irq-context when using foreign fences. */
> spin_lock_irqsave(&engine->timeline->lock, flags);
>
> + engine->request_stats.runnable++;
> +
> __i915_gem_request_submit(request);
>
> spin_unlock_irqrestore(&engine->timeline->lock, flags);
> @@ -548,6 +553,8 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
> timeline = request->timeline;
> GEM_BUG_ON(timeline == engine->timeline);
>
> + engine->request_stats.runnable++;
> +
> spin_lock(&timeline->lock);
> list_move(&request->link, &timeline->requests);
> spin_unlock(&timeline->lock);
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 7eebfbb95e89..8377a77cfbe7 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1731,12 +1731,13 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> if (i915_terminally_wedged(&engine->i915->gpu_error))
> drm_printf(m, "*** WEDGED ***\n");
>
> - drm_printf(m, " current seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n",
> + drm_printf(m, " current seqno %x, last %x, hangcheck %x [%d ms], inflight %d, runnable %u\n",
> intel_engine_get_seqno(engine),
> intel_engine_last_submit(engine),
> engine->hangcheck.seqno,
> jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp),
> - engine->timeline->inflight_seqnos);
> + engine->timeline->inflight_seqnos,
> + engine->request_stats.runnable);
> drm_printf(m, " Reset count: %d (global %d)\n",
> i915_reset_engine_count(error, engine),
> i915_reset_count(error));
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 51e61b04a555..319937e67a6e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -965,6 +965,8 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
> /* Will be called from irq-context when using foreign fences. */
> spin_lock_irqsave(&engine->timeline->lock, flags);
>
> + engine->request_stats.runnable++;
> +
> insert_request(engine, &request->priotree, request->priotree.priority);
>
> GEM_BUG_ON(!engine->execlists.first);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 27a0c47db51e..d7ee7831288d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -303,6 +303,15 @@ struct intel_engine_cs {
> struct intel_ring *buffer;
> struct intel_timeline *timeline;
>
> + struct {
> + /**
> + * @runnable: Number of runnable requests sent to the backend.
> + *
> + * Count of requests waiting for the GPU to execute them.
> + */
> + unsigned int runnable;
> + } request_stats;
> +
> struct drm_i915_gem_object *default_state;
Just thinking about easy holes, probably want to keep the pointer above
next to the other pointers. I'll let you argue cachelines ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* [RFC 3/6] drm/i915: Keep a count of requests submitted from userspace
2018-01-22 18:43 [RFC v2 0/6] Queued/runnable/running engine stats Tvrtko Ursulin
2018-01-22 18:43 ` [RFC 1/6] drm/i915/pmu: Fix enable count array size and bounds checking Tvrtko Ursulin
2018-01-22 18:43 ` [RFC 2/6] drm/i915: Keep a count of requests waiting for a slot on GPU Tvrtko Ursulin
@ 2018-01-22 18:43 ` Tvrtko Ursulin
2018-01-22 18:43 ` [RFC 4/6] drm/i915/pmu: Add queued counter Tvrtko Ursulin
` (5 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-01-22 18:43 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Keep a count of requests submitted from userspace and not yet runnable due
unresolved dependencies.
v2: Rename and move under the container struct. (Chris Wilson)
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_gem_request.c | 3 +++
drivers/gpu/drm/i915/intel_engine_cs.c | 3 ++-
drivers/gpu/drm/i915/intel_ringbuffer.h | 8 ++++++++
3 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8da350bacff1..125a598b886c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -599,6 +599,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
rcu_read_lock();
request->engine->submit_request(request);
rcu_read_unlock();
+ atomic_dec(&request->engine->request_stats.queued);
break;
case FENCE_FREE:
@@ -1067,6 +1068,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
if (engine->schedule)
engine->schedule(request, request->ctx->priority);
+ atomic_inc(&engine->request_stats.queued);
+
local_bh_disable();
i915_sw_fence_commit(&request->submit);
local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 8377a77cfbe7..46b2a92cb7a2 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1731,12 +1731,13 @@ void intel_engine_dump(struct intel_engine_cs *engine,
if (i915_terminally_wedged(&engine->i915->gpu_error))
drm_printf(m, "*** WEDGED ***\n");
- drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d, runnable %u\n",
+ drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d, queued %u, runnable %u\n",
intel_engine_get_seqno(engine),
intel_engine_last_submit(engine),
engine->hangcheck.seqno,
jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp),
engine->timeline->inflight_seqnos,
+ atomic_read(&engine->request_stats.queued),
engine->request_stats.runnable);
drm_printf(m, "\tReset count: %d (global %d)\n",
i915_reset_engine_count(error, engine),
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d7ee7831288d..4519788cc5a1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -304,6 +304,14 @@ struct intel_engine_cs {
struct intel_timeline *timeline;
struct {
+ /**
+ * @queued: Number of submitted requests with dependencies.
+ *
+ * Count of requests waiting for dependencies before they can be
+ * submitted to the backend.
+ */
+ atomic_t queued;
+
/**
* @runnable: Number of runnable requests sent to the backend.
*
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [RFC 4/6] drm/i915/pmu: Add queued counter
2018-01-22 18:43 [RFC v2 0/6] Queued/runnable/running engine stats Tvrtko Ursulin
` (2 preceding siblings ...)
2018-01-22 18:43 ` [RFC 3/6] drm/i915: Keep a count of requests submitted from userspace Tvrtko Ursulin
@ 2018-01-22 18:43 ` Tvrtko Ursulin
2018-01-22 18:56 ` Chris Wilson
2018-01-22 18:43 ` [RFC 5/6] drm/i915/pmu: Add runnable counter Tvrtko Ursulin
` (4 subsequent siblings)
8 siblings, 1 reply; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-01-22 18:43 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
We add a PMU counter to expose the number of requests which have been
submitted from userspace but are not yet runnable due dependencies and
unsignaled fences.
This is useful to analyze the overall load of the system.
v2:
* Rebase for name change and re-order.
* Drop floating point constant. (Chris Wilson)
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_pmu.c | 40 +++++++++++++++++++++++++++++----
drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
include/uapi/drm/i915_drm.h | 9 +++++++-
3 files changed, 45 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index cbfca4a255ab..8eefdf09a30a 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -36,7 +36,8 @@
#define ENGINE_SAMPLE_MASK \
(BIT(I915_SAMPLE_BUSY) | \
BIT(I915_SAMPLE_WAIT) | \
- BIT(I915_SAMPLE_SEMA))
+ BIT(I915_SAMPLE_SEMA) | \
+ BIT(I915_SAMPLE_QUEUED))
#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
@@ -220,6 +221,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+
+ if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+ update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+ I915_SAMPLE_QUEUED_DIVISOR,
+ atomic_read(&engine->request_stats.queued));
}
if (fw)
@@ -297,6 +303,7 @@ engine_event_status(struct intel_engine_cs *engine,
switch (sample) {
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
+ case I915_SAMPLE_QUEUED:
break;
case I915_SAMPLE_SEMA:
if (INTEL_GEN(engine->i915) < 6)
@@ -407,6 +414,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
} else {
val = engine->pmu.sample[sample].cur;
}
+
+ if (sample == I915_SAMPLE_QUEUED)
+ val = div_u64(val, FREQUENCY);
} else {
switch (event->attr.config) {
case I915_PMU_ACTUAL_FREQUENCY:
@@ -719,6 +729,16 @@ static const struct attribute_group *i915_pmu_attr_groups[] = {
{ \
.sample = (__sample), \
.name = (__name), \
+ .suffix = "unit", \
+ .value = "ns", \
+}
+
+#define __engine_event_scale(__sample, __name, __scale) \
+{ \
+ .sample = (__sample), \
+ .name = (__name), \
+ .suffix = "scale", \
+ .value = (__scale), \
}
static struct i915_ext_attribute *
@@ -746,6 +766,9 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
return ++attr;
}
+/* No brackets or quotes below please. */
+#define I915_SAMPLE_QUEUED_SCALE 0.01
+
static struct attribute **
create_event_attributes(struct drm_i915_private *i915)
{
@@ -762,10 +785,14 @@ create_event_attributes(struct drm_i915_private *i915)
static const struct {
enum drm_i915_pmu_engine_sample sample;
char *name;
+ char *suffix;
+ char *value;
} engine_events[] = {
__engine_event(I915_SAMPLE_BUSY, "busy"),
__engine_event(I915_SAMPLE_SEMA, "sema"),
__engine_event(I915_SAMPLE_WAIT, "wait"),
+ __engine_event_scale(I915_SAMPLE_QUEUED, "queued",
+ __stringify(I915_SAMPLE_QUEUED_SCALE)),
};
unsigned int count = 0;
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -775,6 +802,9 @@ create_event_attributes(struct drm_i915_private *i915)
enum intel_engine_id id;
unsigned int i;
+ BUILD_BUG_ON(I915_SAMPLE_QUEUED_DIVISOR !=
+ (1 / I915_SAMPLE_QUEUED_SCALE));
+
/* Count how many counters we will be exposing. */
for (i = 0; i < ARRAY_SIZE(events); i++) {
if (!config_status(i915, events[i].config))
@@ -852,13 +882,15 @@ create_event_attributes(struct drm_i915_private *i915)
engine->instance,
engine_events[i].sample));
- str = kasprintf(GFP_KERNEL, "%s-%s.unit",
- engine->name, engine_events[i].name);
+ str = kasprintf(GFP_KERNEL, "%s-%s.%s",
+ engine->name, engine_events[i].name,
+ engine_events[i].suffix);
if (!str)
goto err;
*attr_iter++ = &pmu_iter->attr.attr;
- pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
+ pmu_iter = add_pmu_attr(pmu_iter, str,
+ engine_events[i].value);
}
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4519788cc5a1..580f07b2a5dd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -381,7 +381,7 @@ struct intel_engine_cs {
*
* Our internal timer stores the current counters in this field.
*/
-#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_QUEUED + 1)
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
/**
* @busy_stats: Has enablement of engine stats tracking been
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 536ee4febd74..968bdc3269cb 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -110,9 +110,13 @@ enum drm_i915_gem_engine_class {
enum drm_i915_pmu_engine_sample {
I915_SAMPLE_BUSY = 0,
I915_SAMPLE_WAIT = 1,
- I915_SAMPLE_SEMA = 2
+ I915_SAMPLE_SEMA = 2,
+ I915_SAMPLE_QUEUED = 3
};
+ /* Divide counter value by divisor to get the real value. */
+#define I915_SAMPLE_QUEUED_DIVISOR (100)
+
#define I915_PMU_SAMPLE_BITS (4)
#define I915_PMU_SAMPLE_MASK (0xf)
#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
@@ -133,6 +137,9 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [RFC 4/6] drm/i915/pmu: Add queued counter
2018-01-22 18:43 ` [RFC 4/6] drm/i915/pmu: Add queued counter Tvrtko Ursulin
@ 2018-01-22 18:56 ` Chris Wilson
2018-01-24 18:01 ` Tvrtko Ursulin
0 siblings, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2018-01-22 18:56 UTC (permalink / raw)
To: Tvrtko Ursulin, Intel-gfx
Quoting Tvrtko Ursulin (2018-01-22 18:43:56)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> We add a PMU counter to expose the number of requests which have been
> submitted from userspace but are not yet runnable due dependencies and
> unsignaled fences.
>
> This is useful to analyze the overall load of the system.
>
> v2:
> * Rebase for name change and re-order.
> * Drop floating point constant. (Chris Wilson)
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> drivers/gpu/drm/i915/i915_pmu.c | 40 +++++++++++++++++++++++++++++----
> drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
> include/uapi/drm/i915_drm.h | 9 +++++++-
> 3 files changed, 45 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index cbfca4a255ab..8eefdf09a30a 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -36,7 +36,8 @@
> #define ENGINE_SAMPLE_MASK \
> (BIT(I915_SAMPLE_BUSY) | \
> BIT(I915_SAMPLE_WAIT) | \
> - BIT(I915_SAMPLE_SEMA))
> + BIT(I915_SAMPLE_SEMA) | \
> + BIT(I915_SAMPLE_QUEUED))
>
> #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
>
> @@ -220,6 +221,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>
> update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
> PERIOD, !!(val & RING_WAIT_SEMAPHORE));
> +
> + if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
> + update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED],
> + I915_SAMPLE_QUEUED_DIVISOR,
> + atomic_read(&engine->request_stats.queued));
engine->request_stats.foo works for me, and reads quite nicely.
> +/* No brackets or quotes below please. */
> +#define I915_SAMPLE_QUEUED_SCALE 0.01
> + /* Divide counter value by divisor to get the real value. */
> +#define I915_SAMPLE_QUEUED_DIVISOR (100)
I'm just thinking of favouring the sampler arithmetic by using 128. As
far as userspace the difference is not going to that noticeable, less if
you chose 256.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC 4/6] drm/i915/pmu: Add queued counter
2018-01-22 18:56 ` Chris Wilson
@ 2018-01-24 18:01 ` Tvrtko Ursulin
0 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-01-24 18:01 UTC (permalink / raw)
To: Chris Wilson, Tvrtko Ursulin, Intel-gfx
On 22/01/2018 18:56, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-01-22 18:43:56)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> We add a PMU counter to expose the number of requests which have been
>> submitted from userspace but are not yet runnable due dependencies and
>> unsignaled fences.
>>
>> This is useful to analyze the overall load of the system.
>>
>> v2:
>> * Rebase for name change and re-order.
>> * Drop floating point constant. (Chris Wilson)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_pmu.c | 40 +++++++++++++++++++++++++++++----
>> drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
>> include/uapi/drm/i915_drm.h | 9 +++++++-
>> 3 files changed, 45 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>> index cbfca4a255ab..8eefdf09a30a 100644
>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>> @@ -36,7 +36,8 @@
>> #define ENGINE_SAMPLE_MASK \
>> (BIT(I915_SAMPLE_BUSY) | \
>> BIT(I915_SAMPLE_WAIT) | \
>> - BIT(I915_SAMPLE_SEMA))
>> + BIT(I915_SAMPLE_SEMA) | \
>> + BIT(I915_SAMPLE_QUEUED))
>>
>> #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
>>
>> @@ -220,6 +221,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
>>
>> update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
>> PERIOD, !!(val & RING_WAIT_SEMAPHORE));
>> +
>> + if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
>> + update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED],
>> + I915_SAMPLE_QUEUED_DIVISOR,
>> + atomic_read(&engine->request_stats.queued));
>
> engine->request_stats.foo works for me, and reads quite nicely.
>
>> +/* No brackets or quotes below please. */
>> +#define I915_SAMPLE_QUEUED_SCALE 0.01
>
>> + /* Divide counter value by divisor to get the real value. */
>> +#define I915_SAMPLE_QUEUED_DIVISOR (100)
>
> I'm just thinking of favouring the sampler arithmetic by using 128. As
> far as userspace the difference is not going to that noticeable, less if
> you chose 256.
I'll do 1024 then, but the CPU usage in the sampling thread is so low
anyway.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* [RFC 5/6] drm/i915/pmu: Add runnable counter
2018-01-22 18:43 [RFC v2 0/6] Queued/runnable/running engine stats Tvrtko Ursulin
` (3 preceding siblings ...)
2018-01-22 18:43 ` [RFC 4/6] drm/i915/pmu: Add queued counter Tvrtko Ursulin
@ 2018-01-22 18:43 ` Tvrtko Ursulin
2018-01-22 18:43 ` [RFC 6/6] drm/i915/pmu: Add running counter Tvrtko Ursulin
` (3 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-01-22 18:43 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
We add a PMU counter to expose the number of requests with resolved
dependencies waiting for a slot on the GPU to run.
This is useful to analyze the overall load of the system.
v2: Don't limit to gen8+.
v3:
* Rebase for dynamic sysfs.
* Drop currently executing requests.
v4:
* Sync with internal renaming.
* Drop floating point constant. (Chris Wilson)
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_pmu.c | 18 ++++++++++++++++--
drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
include/uapi/drm/i915_drm.h | 7 ++++++-
3 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 8eefdf09a30a..f332eff6d057 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -37,7 +37,8 @@
(BIT(I915_SAMPLE_BUSY) | \
BIT(I915_SAMPLE_WAIT) | \
BIT(I915_SAMPLE_SEMA) | \
- BIT(I915_SAMPLE_QUEUED))
+ BIT(I915_SAMPLE_QUEUED) | \
+ BIT(I915_SAMPLE_RUNNABLE))
#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
@@ -226,6 +227,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED],
I915_SAMPLE_QUEUED_DIVISOR,
atomic_read(&engine->request_stats.queued));
+
+ if (engine->pmu.enable & BIT(I915_SAMPLE_RUNNABLE))
+ update_sample(&engine->pmu.sample[I915_SAMPLE_RUNNABLE],
+ I915_SAMPLE_RUNNABLE_DIVISOR,
+ engine->request_stats.runnable);
}
if (fw)
@@ -304,6 +310,7 @@ engine_event_status(struct intel_engine_cs *engine,
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
case I915_SAMPLE_QUEUED:
+ case I915_SAMPLE_RUNNABLE:
break;
case I915_SAMPLE_SEMA:
if (INTEL_GEN(engine->i915) < 6)
@@ -415,7 +422,8 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
val = engine->pmu.sample[sample].cur;
}
- if (sample == I915_SAMPLE_QUEUED)
+ if (sample == I915_SAMPLE_QUEUED ||
+ sample == I915_SAMPLE_RUNNABLE)
val = div_u64(val, FREQUENCY);
} else {
switch (event->attr.config) {
@@ -768,6 +776,7 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
/* No brackets or quotes below please. */
#define I915_SAMPLE_QUEUED_SCALE 0.01
+#define I915_SAMPLE_RUNNABLE_SCALE 0.01
static struct attribute **
create_event_attributes(struct drm_i915_private *i915)
@@ -793,6 +802,8 @@ create_event_attributes(struct drm_i915_private *i915)
__engine_event(I915_SAMPLE_WAIT, "wait"),
__engine_event_scale(I915_SAMPLE_QUEUED, "queued",
__stringify(I915_SAMPLE_QUEUED_SCALE)),
+ __engine_event_scale(I915_SAMPLE_RUNNABLE, "runnable",
+ __stringify(I915_SAMPLE_RUNNABLE_SCALE)),
};
unsigned int count = 0;
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -805,6 +816,9 @@ create_event_attributes(struct drm_i915_private *i915)
BUILD_BUG_ON(I915_SAMPLE_QUEUED_DIVISOR !=
(1 / I915_SAMPLE_QUEUED_SCALE));
+ BUILD_BUG_ON(I915_SAMPLE_RUNNABLE_DIVISOR !=
+ (1 / I915_SAMPLE_RUNNABLE_SCALE));
+
/* Count how many counters we will be exposing. */
for (i = 0; i < ARRAY_SIZE(events); i++) {
if (!config_status(i915, events[i].config))
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 580f07b2a5dd..a06f1fc0c150 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -381,7 +381,7 @@ struct intel_engine_cs {
*
* Our internal timer stores the current counters in this field.
*/
-#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_QUEUED + 1)
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_RUNNABLE + 1)
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
/**
* @busy_stats: Has enablement of engine stats tracking been
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 968bdc3269cb..05951839abe0 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -111,11 +111,13 @@ enum drm_i915_pmu_engine_sample {
I915_SAMPLE_BUSY = 0,
I915_SAMPLE_WAIT = 1,
I915_SAMPLE_SEMA = 2,
- I915_SAMPLE_QUEUED = 3
+ I915_SAMPLE_QUEUED = 3,
+ I915_SAMPLE_RUNNABLE = 4,
};
/* Divide counter value by divisor to get the real value. */
#define I915_SAMPLE_QUEUED_DIVISOR (100)
+#define I915_SAMPLE_RUNNABLE_DIVISOR (100)
#define I915_PMU_SAMPLE_BITS (4)
#define I915_PMU_SAMPLE_MASK (0xf)
@@ -140,6 +142,9 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_QUEUED(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+#define I915_PMU_ENGINE_RUNNABLE(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_RUNNABLE)
+
#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [RFC 6/6] drm/i915/pmu: Add running counter
2018-01-22 18:43 [RFC v2 0/6] Queued/runnable/running engine stats Tvrtko Ursulin
` (4 preceding siblings ...)
2018-01-22 18:43 ` [RFC 5/6] drm/i915/pmu: Add runnable counter Tvrtko Ursulin
@ 2018-01-22 18:43 ` Tvrtko Ursulin
2018-01-22 18:52 ` [RFC v2 0/6] Queued/runnable/running engine stats Chris Wilson
` (2 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-01-22 18:43 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
We add a PMU counter to expose the number of requests currently executing
on the GPU.
This is useful to analyze the overall load of the system.
v2:
* Rebase.
* Drop floating point constant. (Chris Wilson)
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_pmu.c | 18 ++++++++++++++++--
drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
include/uapi/drm/i915_drm.h | 5 +++++
3 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index f332eff6d057..86d9b9fb6aef 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -38,7 +38,8 @@
BIT(I915_SAMPLE_WAIT) | \
BIT(I915_SAMPLE_SEMA) | \
BIT(I915_SAMPLE_QUEUED) | \
- BIT(I915_SAMPLE_RUNNABLE))
+ BIT(I915_SAMPLE_RUNNABLE) | \
+ BIT(I915_SAMPLE_RUNNING))
#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
@@ -232,6 +233,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
update_sample(&engine->pmu.sample[I915_SAMPLE_RUNNABLE],
I915_SAMPLE_RUNNABLE_DIVISOR,
engine->request_stats.runnable);
+
+ if (engine->pmu.enable & BIT(I915_SAMPLE_RUNNING))
+ update_sample(&engine->pmu.sample[I915_SAMPLE_RUNNING],
+ I915_SAMPLE_RUNNING_DIVISOR,
+ last_seqno - current_seqno);
}
if (fw)
@@ -311,6 +317,7 @@ engine_event_status(struct intel_engine_cs *engine,
case I915_SAMPLE_WAIT:
case I915_SAMPLE_QUEUED:
case I915_SAMPLE_RUNNABLE:
+ case I915_SAMPLE_RUNNING:
break;
case I915_SAMPLE_SEMA:
if (INTEL_GEN(engine->i915) < 6)
@@ -423,7 +430,8 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
}
if (sample == I915_SAMPLE_QUEUED ||
- sample == I915_SAMPLE_RUNNABLE)
+ sample == I915_SAMPLE_RUNNABLE ||
+ sample == I915_SAMPLE_RUNNING)
val = div_u64(val, FREQUENCY);
} else {
switch (event->attr.config) {
@@ -777,6 +785,7 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
/* No brackets or quotes below please. */
#define I915_SAMPLE_QUEUED_SCALE 0.01
#define I915_SAMPLE_RUNNABLE_SCALE 0.01
+#define I915_SAMPLE_RUNNING_SCALE 0.01
static struct attribute **
create_event_attributes(struct drm_i915_private *i915)
@@ -804,6 +813,8 @@ create_event_attributes(struct drm_i915_private *i915)
__stringify(I915_SAMPLE_QUEUED_SCALE)),
__engine_event_scale(I915_SAMPLE_RUNNABLE, "runnable",
__stringify(I915_SAMPLE_RUNNABLE_SCALE)),
+ __engine_event_scale(I915_SAMPLE_RUNNING, "running",
+ __stringify(I915_SAMPLE_RUNNING_SCALE)),
};
unsigned int count = 0;
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -819,6 +830,9 @@ create_event_attributes(struct drm_i915_private *i915)
BUILD_BUG_ON(I915_SAMPLE_RUNNABLE_DIVISOR !=
(1 / I915_SAMPLE_RUNNABLE_SCALE));
+ BUILD_BUG_ON(I915_SAMPLE_RUNNING_DIVISOR !=
+ (1 / I915_SAMPLE_RUNNING_SCALE));
+
/* Count how many counters we will be exposing. */
for (i = 0; i < ARRAY_SIZE(events); i++) {
if (!config_status(i915, events[i].config))
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a06f1fc0c150..2adc87e48dab 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -381,7 +381,7 @@ struct intel_engine_cs {
*
* Our internal timer stores the current counters in this field.
*/
-#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_RUNNABLE + 1)
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_RUNNING + 1)
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
/**
* @busy_stats: Has enablement of engine stats tracking been
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 05951839abe0..1618da74d8d8 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -113,11 +113,13 @@ enum drm_i915_pmu_engine_sample {
I915_SAMPLE_SEMA = 2,
I915_SAMPLE_QUEUED = 3,
I915_SAMPLE_RUNNABLE = 4,
+ I915_SAMPLE_RUNNING = 5,
};
/* Divide counter value by divisor to get the real value. */
#define I915_SAMPLE_QUEUED_DIVISOR (100)
#define I915_SAMPLE_RUNNABLE_DIVISOR (100)
+#define I915_SAMPLE_RUNNING_DIVISOR (100)
#define I915_PMU_SAMPLE_BITS (4)
#define I915_PMU_SAMPLE_MASK (0xf)
@@ -145,6 +147,9 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_RUNNABLE(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_RUNNABLE)
+#define I915_PMU_ENGINE_RUNNING(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_RUNNING)
+
#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [RFC v2 0/6] Queued/runnable/running engine stats
2018-01-22 18:43 [RFC v2 0/6] Queued/runnable/running engine stats Tvrtko Ursulin
` (5 preceding siblings ...)
2018-01-22 18:43 ` [RFC 6/6] drm/i915/pmu: Add running counter Tvrtko Ursulin
@ 2018-01-22 18:52 ` Chris Wilson
2018-01-24 18:01 ` Tvrtko Ursulin
2018-01-22 19:21 ` ✓ Fi.CI.BAT: success for " Patchwork
2018-01-23 5:11 ` ✗ Fi.CI.IGT: failure " Patchwork
8 siblings, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2018-01-22 18:52 UTC (permalink / raw)
To: Tvrtko Ursulin, Intel-gfx
Quoting Tvrtko Ursulin (2018-01-22 18:43:52)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Per-engine queue depths are an interesting metric for analyzing the system load
> and also for users who wish to use it to load balance their submissions based
> on it.
>
> In this version I have split the metrics into three separate counters:
>
> 1. QUEUED - From execbuf time to request being runnable - runnable meaning until
> dependencies have been resolved and fences signaled.
> 2. RUNNABLE - From runnable to running on the GPU.
> 3. RUNNING - Running on the GPU.
>
> When inspected with perf stat the output looks roughly like this:
>
> # time counts unit events
> 201.160490145 0.01 i915/rcs0-queued/
> 201.160490145 19.13 i915/rcs0-runnable/
> 201.160490145 2.39 i915/rcs0-running/
>
> The reported numbers are average queue depths for the last query period.
>
> Having split out metrics should be more flexible for all users, and it is still
> possible to fetch an atomic snapshot of all using the perf groups for those
> wanting to combine them.
>
> For users wanting instantanous numbers instead of averaged, we could potentially
> expose them using the query API Lionel is working on.
> (https://patchwork.freedesktop.org/series/36622/)
>
> For instance a query packet could look like:
>
> #define DRM_I915_QUERY_ENGINE_QUEUES 0x04
>
> struct drm_i915_query_engine_queues {
> __u8 class;
> __u8 instance
>
> __u8 pad[2];
>
> __u32 queued;
> __u32 runnable;
> __u32 running;
> };
>
> I also have patches to expose this via intel-gpu-top, using the perf API.
Can you stick a ewma loadavg just after the hostname in intel-gpu-overlay,
pretty please? :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC v2 0/6] Queued/runnable/running engine stats
2018-01-22 18:52 ` [RFC v2 0/6] Queued/runnable/running engine stats Chris Wilson
@ 2018-01-24 18:01 ` Tvrtko Ursulin
2018-01-24 18:07 ` Chris Wilson
0 siblings, 1 reply; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-01-24 18:01 UTC (permalink / raw)
To: Chris Wilson, Tvrtko Ursulin, Intel-gfx
On 22/01/2018 18:52, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-01-22 18:43:52)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Per-engine queue depths are an interesting metric for analyzing the system load
>> and also for users who wish to use it to load balance their submissions based
>> on it.
>>
>> In this version I have split the metrics into three separate counters:
>>
>> 1. QUEUED - From execbuf time to request being runnable - runnable meaning until
>> dependencies have been resolved and fences signaled.
>> 2. RUNNABLE - From runnable to running on the GPU.
>> 3. RUNNING - Running on the GPU.
>>
>> When inspected with perf stat the output looks roughly like this:
>>
>> # time counts unit events
>> 201.160490145 0.01 i915/rcs0-queued/
>> 201.160490145 19.13 i915/rcs0-runnable/
>> 201.160490145 2.39 i915/rcs0-running/
>>
>> The reported numbers are average queue depths for the last query period.
>>
>> Having split out metrics should be more flexible for all users, and it is still
>> possible to fetch an atomic snapshot of all using the perf groups for those
>> wanting to combine them.
>>
>> For users wanting instantanous numbers instead of averaged, we could potentially
>> expose them using the query API Lionel is working on.
>> (https://patchwork.freedesktop.org/series/36622/)
>>
>> For instance a query packet could look like:
>>
>> #define DRM_I915_QUERY_ENGINE_QUEUES 0x04
>>
>> struct drm_i915_query_engine_queues {
>> __u8 class;
>> __u8 instance
>>
>> __u8 pad[2];
>>
>> __u32 queued;
>> __u32 runnable;
>> __u32 running;
>> };
>>
>> I also have patches to expose this via intel-gpu-top, using the perf API.
>
> Can you stick a ewma loadavg just after the hostname in intel-gpu-overlay,
> pretty please? :)
Sure, just one period and all three counters aggregated?
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC v2 0/6] Queued/runnable/running engine stats
2018-01-24 18:01 ` Tvrtko Ursulin
@ 2018-01-24 18:07 ` Chris Wilson
0 siblings, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2018-01-24 18:07 UTC (permalink / raw)
To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx
Quoting Tvrtko Ursulin (2018-01-24 18:01:14)
>
> On 22/01/2018 18:52, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-01-22 18:43:52)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> Per-engine queue depths are an interesting metric for analyzing the system load
> >> and also for users who wish to use it to load balance their submissions based
> >> on it.
> >>
> >> In this version I have split the metrics into three separate counters:
> >>
> >> 1. QUEUED - From execbuf time to request being runnable - runnable meaning until
> >> dependencies have been resolved and fences signaled.
> >> 2. RUNNABLE - From runnable to running on the GPU.
> >> 3. RUNNING - Running on the GPU.
> >>
> >> When inspected with perf stat the output looks roughly like this:
> >>
> >> # time counts unit events
> >> 201.160490145 0.01 i915/rcs0-queued/
> >> 201.160490145 19.13 i915/rcs0-runnable/
> >> 201.160490145 2.39 i915/rcs0-running/
> >>
> >> The reported numbers are average queue depths for the last query period.
> >>
> >> Having split out metrics should be more flexible for all users, and it is still
> >> possible to fetch an atomic snapshot of all using the perf groups for those
> >> wanting to combine them.
> >>
> >> For users wanting instantanous numbers instead of averaged, we could potentially
> >> expose them using the query API Lionel is working on.
> >> (https://patchwork.freedesktop.org/series/36622/)
> >>
> >> For instance a query packet could look like:
> >>
> >> #define DRM_I915_QUERY_ENGINE_QUEUES 0x04
> >>
> >> struct drm_i915_query_engine_queues {
> >> __u8 class;
> >> __u8 instance
> >>
> >> __u8 pad[2];
> >>
> >> __u32 queued;
> >> __u32 runnable;
> >> __u32 running;
> >> };
> >>
> >> I also have patches to expose this via intel-gpu-top, using the perf API.
> >
> > Can you stick a ewma loadavg just after the hostname in intel-gpu-overlay,
> > pretty please? :)
>
> Sure, just one period and all three counters aggregated?
Hmm, just runnable + running I think matches loadavg best. (For the cpu
that is the number of tasks in the runqueue.) I think having the 1s,
30s, 15m figures would be useful but they can be computed in userspace
from the single (combined) sampler.
But the problem with just runnable + running is that we don't see those
inter-engine dependencies so clearly (but it does hide inter-device waits
etc), so I don't know.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* ✓ Fi.CI.BAT: success for Queued/runnable/running engine stats
2018-01-22 18:43 [RFC v2 0/6] Queued/runnable/running engine stats Tvrtko Ursulin
` (6 preceding siblings ...)
2018-01-22 18:52 ` [RFC v2 0/6] Queued/runnable/running engine stats Chris Wilson
@ 2018-01-22 19:21 ` Patchwork
2018-01-23 5:11 ` ✗ Fi.CI.IGT: failure " Patchwork
8 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-01-22 19:21 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
== Series Details ==
Series: Queued/runnable/running engine stats
URL : https://patchwork.freedesktop.org/series/36926/
State : success
== Summary ==
Series 36926v1 Queued/runnable/running engine stats
https://patchwork.freedesktop.org/api/1.0/series/36926/revisions/1/mbox/
Test gem_mmap_gtt:
Subgroup basic-small-bo-tiledx:
fail -> PASS (fi-gdg-551) fdo#102575
Test gem_ringfill:
Subgroup basic-default-hang:
dmesg-warn -> INCOMPLETE (fi-blb-e6850) fdo#101600 +1
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-b:
incomplete -> PASS (fi-snb-2520m) fdo#103713
fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
fdo#101600 https://bugs.freedesktop.org/show_bug.cgi?id=101600
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:420s
fi-bdw-gvtdvm total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:422s
fi-blb-e6850 total:146 pass:114 dwarn:0 dfail:0 fail:0 skip:31
fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:483s
fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:280s
fi-bxt-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:485s
fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:482s
fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:468s
fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:455s
fi-elk-e7500 total:224 pass:168 dwarn:9 dfail:1 fail:0 skip:45
fi-gdg-551 total:288 pass:180 dwarn:0 dfail:0 fail:0 skip:108 time:278s
fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:517s
fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:390s
fi-hsw-4770r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:398s
fi-ilk-650 total:288 pass:228 dwarn:0 dfail:0 fail:0 skip:60 time:413s
fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:452s
fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:410s
fi-kbl-7500u total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:456s
fi-kbl-7560u total:288 pass:269 dwarn:0 dfail:0 fail:0 skip:19 time:502s
fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:455s
fi-kbl-r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:502s
fi-pnv-d510 total:146 pass:113 dwarn:0 dfail:0 fail:0 skip:32
fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:428s
fi-skl-6600u total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:506s
fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:525s
fi-skl-6700k2 total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:485s
fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:481s
fi-skl-guc total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:416s
fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:430s
fi-snb-2520m total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:520s
fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:399s
Blacklisted hosts:
fi-cfl-s2 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:569s
fi-glk-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:470s
06c8efda323ac918fad0e26d81e8884574ec8b84 drm-tip: 2018y-01m-22d-17h-43m-26s UTC integration manifest
04a7a34b2eb0 drm/i915/pmu: Add running counter
72966345ddb8 drm/i915/pmu: Add runnable counter
a1536333b48a drm/i915/pmu: Add queued counter
ccc54acc0b69 drm/i915: Keep a count of requests submitted from userspace
209a5eb505fd drm/i915: Keep a count of requests waiting for a slot on GPU
b7740c97908f drm/i915/pmu: Fix enable count array size and bounds checking
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7744/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* ✗ Fi.CI.IGT: failure for Queued/runnable/running engine stats
2018-01-22 18:43 [RFC v2 0/6] Queued/runnable/running engine stats Tvrtko Ursulin
` (7 preceding siblings ...)
2018-01-22 19:21 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2018-01-23 5:11 ` Patchwork
8 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2018-01-23 5:11 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
== Series Details ==
Series: Queued/runnable/running engine stats
URL : https://patchwork.freedesktop.org/series/36926/
State : failure
== Summary ==
Test kms_flip:
Subgroup busy-flip-interruptible:
pass -> FAIL (shard-apl) fdo#103257
Subgroup flip-vs-panning-vs-hang-interruptible:
dmesg-warn -> PASS (shard-snb) fdo#103821
Test gem_eio:
Subgroup in-flight-external:
pass -> INCOMPLETE (shard-snb)
fail -> INCOMPLETE (shard-hsw) fdo#104676 +1
pass -> INCOMPLETE (shard-apl)
Subgroup in-flight-internal:
pass -> INCOMPLETE (shard-snb)
pass -> INCOMPLETE (shard-apl)
Test kms_frontbuffer_tracking:
Subgroup fbc-1p-offscren-pri-shrfb-draw-blt:
pass -> FAIL (shard-snb) fdo#101623 +1
Subgroup fbc-tilingchange:
fail -> PASS (shard-apl)
Subgroup fbc-1p-shrfb-fliptrack:
pass -> DMESG-FAIL (shard-apl) fdo#103167 +2
Test gem_softpin:
Subgroup noreloc-s4:
fail -> SKIP (shard-snb) fdo#103375
Test gem_exec_suspend:
Subgroup basic-s4-devices:
pass -> INCOMPLETE (shard-snb)
Test perf:
Subgroup buffer-fill:
pass -> FAIL (shard-apl) fdo#103755
Subgroup oa-exponents:
fail -> PASS (shard-apl) fdo#102254
Test kms_cursor_legacy:
Subgroup cursor-vs-flip-legacy:
fail -> PASS (shard-apl) fdo#103355
fdo#103257 https://bugs.freedesktop.org/show_bug.cgi?id=103257
fdo#103821 https://bugs.freedesktop.org/show_bug.cgi?id=103821
fdo#104676 https://bugs.freedesktop.org/show_bug.cgi?id=104676
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623
fdo#103167 https://bugs.freedesktop.org/show_bug.cgi?id=103167
fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#103755 https://bugs.freedesktop.org/show_bug.cgi?id=103755
fdo#102254 https://bugs.freedesktop.org/show_bug.cgi?id=102254
fdo#103355 https://bugs.freedesktop.org/show_bug.cgi?id=103355
shard-apl total:2670 pass:1658 dwarn:1 dfail:1 fail:26 skip:982 time:12752s
shard-hsw total:2721 pass:1707 dwarn:1 dfail:0 fail:10 skip:1001 time:15055s
shard-snb total:2598 pass:1241 dwarn:1 dfail:0 fail:7 skip:1346 time:7137s
Blacklisted hosts:
shard-kbl total:2670 pass:1788 dwarn:1 dfail:0 fail:22 skip:857 time:10063s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_7744/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* [RFC 4/6] drm/i915/pmu: Add queued counter
2018-01-18 10:41 [RFC 0/6] Submitted queue depth stats Tvrtko Ursulin
@ 2018-01-18 10:41 ` Tvrtko Ursulin
0 siblings, 0 replies; 16+ messages in thread
From: Tvrtko Ursulin @ 2018-01-18 10:41 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
We add a PMU counter to expose the number of requests which are ready to
run and waiting on a free slot on the GPU.
This is useful to analyze the overall load of the system.
v2: Don't limit to gen8+.
v3:
* Rebase for dynamic sysfs.
* Drop currently executing requests.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_pmu.c | 34 +++++++++++++++++++++++++++++----
drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
include/uapi/drm/i915_drm.h | 8 +++++++-
3 files changed, 38 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index cbfca4a255ab..aaf48e85c35e 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -36,7 +36,8 @@
#define ENGINE_SAMPLE_MASK \
(BIT(I915_SAMPLE_BUSY) | \
BIT(I915_SAMPLE_WAIT) | \
- BIT(I915_SAMPLE_SEMA))
+ BIT(I915_SAMPLE_SEMA) | \
+ BIT(I915_SAMPLE_QUEUED))
#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
@@ -220,6 +221,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+
+ if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+ update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+ 1 / I915_SAMPLE_QUEUED_SCALE,
+ engine->queued);
}
if (fw)
@@ -297,6 +303,7 @@ engine_event_status(struct intel_engine_cs *engine,
switch (sample) {
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
+ case I915_SAMPLE_QUEUED:
break;
case I915_SAMPLE_SEMA:
if (INTEL_GEN(engine->i915) < 6)
@@ -407,6 +414,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
} else {
val = engine->pmu.sample[sample].cur;
}
+
+ if (sample == I915_SAMPLE_QUEUED)
+ val = div_u64(val, FREQUENCY);
} else {
switch (event->attr.config) {
case I915_PMU_ACTUAL_FREQUENCY:
@@ -719,6 +729,16 @@ static const struct attribute_group *i915_pmu_attr_groups[] = {
{ \
.sample = (__sample), \
.name = (__name), \
+ .suffix = "unit", \
+ .value = "ns", \
+}
+
+#define __engine_event_scale(__sample, __name, __scale) \
+{ \
+ .sample = (__sample), \
+ .name = (__name), \
+ .suffix = "scale", \
+ .value = (__scale), \
}
static struct i915_ext_attribute *
@@ -762,10 +782,14 @@ create_event_attributes(struct drm_i915_private *i915)
static const struct {
enum drm_i915_pmu_engine_sample sample;
char *name;
+ char *suffix;
+ char *value;
} engine_events[] = {
__engine_event(I915_SAMPLE_BUSY, "busy"),
__engine_event(I915_SAMPLE_SEMA, "sema"),
__engine_event(I915_SAMPLE_WAIT, "wait"),
+ __engine_event_scale(I915_SAMPLE_QUEUED, "queued",
+ __stringify(I915_SAMPLE_QUEUED_SCALE)),
};
unsigned int count = 0;
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -852,13 +876,15 @@ create_event_attributes(struct drm_i915_private *i915)
engine->instance,
engine_events[i].sample));
- str = kasprintf(GFP_KERNEL, "%s-%s.unit",
- engine->name, engine_events[i].name);
+ str = kasprintf(GFP_KERNEL, "%s-%s.%s",
+ engine->name, engine_events[i].name,
+ engine_events[i].suffix);
if (!str)
goto err;
*attr_iter++ = &pmu_iter->attr.attr;
- pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
+ pmu_iter = add_pmu_attr(pmu_iter, str,
+ engine_events[i].value);
}
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 77fff2488cde..84541b91bcd8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -379,7 +379,7 @@ struct intel_engine_cs {
*
* Our internal timer stores the current counters in this field.
*/
-#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_QUEUED + 1)
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
/**
* @busy_stats: Has enablement of engine stats tracking been
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 536ee4febd74..83458e5b1ac7 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -110,9 +110,12 @@ enum drm_i915_gem_engine_class {
enum drm_i915_pmu_engine_sample {
I915_SAMPLE_BUSY = 0,
I915_SAMPLE_WAIT = 1,
- I915_SAMPLE_SEMA = 2
+ I915_SAMPLE_SEMA = 2,
+ I915_SAMPLE_QUEUED = 3
};
+#define I915_SAMPLE_QUEUED_SCALE 1e-2 /* No braces please. */
+
#define I915_PMU_SAMPLE_BITS (4)
#define I915_PMU_SAMPLE_MASK (0xf)
#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
@@ -133,6 +136,9 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread