All of lore.kernel.org
 help / color / mirror / Atom feed
From: sourab.gupta@intel.com
To: intel-gfx@lists.freedesktop.org
Cc: Insoo Woo <insoo.woo@intel.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Jabin Wu <jabin.wu@intel.com>,
	Sourab Gupta <sourab.gupta@intel.com>
Subject: [RFC 1/8] drm/i915: Add a new PMU for handling non-OA counter data profiling requests
Date: Wed,  5 Aug 2015 11:25:37 +0530	[thread overview]
Message-ID: <1438754144-20435-2-git-send-email-sourab.gupta@intel.com> (raw)
In-Reply-To: <1438754144-20435-1-git-send-email-sourab.gupta@intel.com>

From: Sourab Gupta <sourab.gupta@intel.com>

The current perf PMU driver is specific for collection of OA counter
statistics (which may be done in a periodic or asynchronous way). Since
this enables us (and limits us) to render ring, we have no means for
collection of data pertaining to other rings.

To overcome this limitation, we need to have a new PMU driver which enables
data collection for other rings also (in a non-OA specific mode).
This patch adds a new perf PMU to i915 device private, for handling
profiling requests for non-OA counter data.This data may encompass
timestamps, mmio register values, etc. for the relevant ring.
The new perf PMU will serve these purposes, without constraining itself to
type of data being dumped (which may restrict the user to specific ring
like in case of OA counters).

The patch introduces this PMU driver alongwith its associated callbacks.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c     |   2 +
 drivers/gpu/drm/i915/i915_drv.h     |  19 ++++
 drivers/gpu/drm/i915/i915_oa_perf.c | 215 ++++++++++++++++++++++++++++++++++++
 3 files changed, 236 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0553f20..4b91504 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -822,6 +822,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	 * otherwise i915_oa_context_pin_notify() will lock an un-initialized
 	 * spinlock, upsetting lockdep checks */
 	i915_oa_pmu_register(dev);
+	i915_gen_pmu_register(dev);
 
 	intel_pm_setup(dev);
 
@@ -1072,6 +1073,7 @@ int i915_driver_unload(struct drm_device *dev)
 		return ret;
 	}
 
+	i915_gen_pmu_unregister(dev);
 	i915_oa_pmu_unregister(dev);
 	intel_power_domains_fini(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d5d9156..66f9ee9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1988,6 +1988,21 @@ struct drm_i915_private {
 		int sample_info_flags;
 	} oa_pmu;
 
+	struct {
+		struct pmu pmu;
+		spinlock_t lock;
+		struct hrtimer timer;
+		struct pt_regs dummy_regs;
+		struct perf_event *exclusive_event;
+		bool event_active;
+
+		struct {
+			struct drm_i915_gem_object *obj;
+			u32 gtt_offset;
+			u8 *addr;
+		} buffer;
+	} gen_pmu;
+
 	void (*emit_profiling_data[I915_PROFILE_MAX])
 		(struct drm_i915_gem_request *req, u32 global_ctx_id, u32 tag);
 #endif
@@ -3295,10 +3310,14 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 /* i915_oa_perf.c */
 #ifdef CONFIG_PERF_EVENTS
 extern void i915_oa_pmu_register(struct drm_device *dev);
+extern void i915_gen_pmu_register(struct drm_device *dev);
 extern void i915_oa_pmu_unregister(struct drm_device *dev);
+extern void i915_gen_pmu_unregister(struct drm_device *dev);
 #else
 static inline void i915_oa_pmu_register(struct drm_device *dev) {}
+static inline void i915_gen_pmu_register(struct drm_device *dev) {}
 static inline void i915_oa_pmu_unregister(struct drm_device *dev) {}
+static inline void i915_gen_pmu_unregister(struct drm_device *dev) {}
 #endif
 
 /* i915_suspend.c */
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index 48591fc..37ff0a9 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -414,6 +414,13 @@ static void forward_oa_rcs_work_fn(struct work_struct *__work)
 	forward_oa_rcs_snapshots(dev_priv);
 }
 
+static void forward_gen_pmu_snapshots(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!dev_priv->gen_pmu.buffer.addr);
+
+	/* TODO: routine for forwarding snapshots to userspace */
+}
+
 static void
 oa_rcs_buffer_destroy(struct drm_i915_private *i915)
 {
@@ -551,6 +558,34 @@ out:
 	spin_unlock(&dev_priv->oa_pmu.lock);
 }
 
+static void gen_buffer_destroy(struct drm_i915_private *i915)
+{
+	mutex_lock(&i915->dev->struct_mutex);
+	vunmap(i915->gen_pmu.buffer.addr);
+	i915_gem_object_ggtt_unpin(i915->gen_pmu.buffer.obj);
+	drm_gem_object_unreference(&i915->gen_pmu.buffer.obj->base);
+	mutex_unlock(&i915->dev->struct_mutex);
+
+	spin_lock(&i915->gen_pmu.lock);
+	i915->gen_pmu.buffer.obj = NULL;
+	i915->gen_pmu.buffer.gtt_offset = 0;
+	i915->gen_pmu.buffer.addr = NULL;
+	spin_unlock(&i915->gen_pmu.lock);
+}
+
+static void i915_gen_event_destroy(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), gen_pmu.pmu);
+
+	WARN_ON(event->parent);
+
+	gen_buffer_destroy(i915);
+
+	BUG_ON(i915->gen_pmu.exclusive_event != event);
+	i915->gen_pmu.exclusive_event = NULL;
+}
+
 static int alloc_obj(struct drm_i915_private *dev_priv,
 				struct drm_i915_gem_object **obj)
 {
@@ -712,6 +747,41 @@ static int init_oa_rcs_buffer(struct perf_event *event)
 	return 0;
 }
 
+static int init_gen_pmu_buffer(struct perf_event *event)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+	struct drm_i915_gem_object *bo;
+	int ret;
+
+	BUG_ON(dev_priv->gen_pmu.buffer.obj);
+
+	ret = alloc_obj(dev_priv, &bo);
+	if (ret)
+		return ret;
+
+	dev_priv->gen_pmu.buffer.obj = bo;
+	dev_priv->gen_pmu.buffer.gtt_offset =
+				i915_gem_obj_ggtt_offset(bo);
+	dev_priv->gen_pmu.buffer.addr = vmap_oa_buffer(bo);
+
+	DRM_DEBUG_DRIVER("Gen PMU Buffer initialized, vaddr = %p",
+			 dev_priv->gen_pmu.buffer.addr);
+
+	return 0;
+}
+
+static enum hrtimer_restart hrtimer_sample_gen(struct hrtimer *hrtimer)
+{
+	struct drm_i915_private *i915 =
+		container_of(hrtimer, typeof(*i915), gen_pmu.timer);
+
+	forward_gen_pmu_snapshots(i915);
+
+	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
+	return HRTIMER_RESTART;
+}
+
 static enum hrtimer_restart hrtimer_sample(struct hrtimer *hrtimer)
 {
 	struct drm_i915_private *i915 =
@@ -1224,6 +1294,106 @@ static int i915_oa_event_event_idx(struct perf_event *event)
 	return 0;
 }
 
+static int i915_gen_event_init(struct perf_event *event)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+	int ret = 0;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* To avoid the complexity of having to accurately filter
+	 * data and marshal to the appropriate client
+	 * we currently only allow exclusive access */
+	if (dev_priv->gen_pmu.buffer.obj)
+		return -EBUSY;
+
+	/*
+	 * We need to check for CAP_SYS_ADMIN capability as we profile all
+	 * the running contexts
+	 */
+	if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+
+	ret = init_gen_pmu_buffer(event);
+	if (ret)
+		return ret;
+
+	BUG_ON(dev_priv->gen_pmu.exclusive_event);
+	dev_priv->gen_pmu.exclusive_event = event;
+
+	event->destroy = i915_gen_event_destroy;
+
+	return 0;
+}
+
+static void i915_gen_event_start(struct perf_event *event, int flags)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+
+	spin_lock(&dev_priv->gen_pmu.lock);
+	dev_priv->gen_pmu.event_active = true;
+	spin_unlock(&dev_priv->gen_pmu.lock);
+
+	__hrtimer_start_range_ns(&dev_priv->gen_pmu.timer, ns_to_ktime(PERIOD),
+					0, HRTIMER_MODE_REL_PINNED, 0);
+
+	event->hw.state = 0;
+}
+
+static void i915_gen_event_stop(struct perf_event *event, int flags)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
+
+	spin_lock(&dev_priv->gen_pmu.lock);
+	dev_priv->gen_pmu.event_active = false;
+	spin_unlock(&dev_priv->gen_pmu.lock);
+
+	hrtimer_cancel(&dev_priv->gen_pmu.timer);
+	forward_gen_pmu_snapshots(dev_priv);
+
+	event->hw.state = PERF_HES_STOPPED;
+}
+
+static int i915_gen_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		i915_gen_event_start(event, flags);
+
+	return 0;
+}
+
+static void i915_gen_event_del(struct perf_event *event, int flags)
+{
+	i915_gen_event_stop(event, flags);
+}
+
+static void i915_gen_event_read(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), gen_pmu.pmu);
+
+	/* XXX: What counter would be useful here? */
+	local64_set(&event->count, 0);
+}
+
+static int i915_gen_event_flush(struct perf_event *event)
+{
+	struct drm_i915_private *i915 =
+		container_of(event->pmu, typeof(*i915), gen_pmu.pmu);
+
+	forward_gen_pmu_snapshots(i915);
+	return 0;
+}
+
+static int i915_gen_event_event_idx(struct perf_event *event)
+{
+	return 0;
+}
+
 void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
 				struct intel_context *context)
 {
@@ -1352,3 +1522,48 @@ void i915_oa_pmu_unregister(struct drm_device *dev)
 	perf_pmu_unregister(&i915->oa_pmu.pmu);
 	i915->oa_pmu.pmu.event_init = NULL;
 }
+
+void i915_gen_pmu_register(struct drm_device *dev)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+
+	if (!(IS_HASWELL(dev) || IS_VALLEYVIEW(dev) || IS_BROADWELL(dev)))
+		return;
+
+	i915->gen_pmu.dummy_regs = *task_pt_regs(current);
+
+	hrtimer_init(&i915->gen_pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	i915->gen_pmu.timer.function = hrtimer_sample_gen;
+
+	spin_lock_init(&i915->gen_pmu.lock);
+
+	i915->gen_pmu.pmu.capabilities  = PERF_PMU_CAP_IS_DEVICE;
+
+	/* Effectively disallow opening an event with a specific pid
+	 * since we aren't interested in processes running on the cpu...
+	 */
+	i915->gen_pmu.pmu.task_ctx_nr   = perf_invalid_context;
+
+	i915->gen_pmu.pmu.event_init    = i915_gen_event_init;
+	i915->gen_pmu.pmu.add	       = i915_gen_event_add;
+	i915->gen_pmu.pmu.del	       = i915_gen_event_del;
+	i915->gen_pmu.pmu.start	       = i915_gen_event_start;
+	i915->gen_pmu.pmu.stop	       = i915_gen_event_stop;
+	i915->gen_pmu.pmu.read	       = i915_gen_event_read;
+	i915->gen_pmu.pmu.flush	       = i915_gen_event_flush;
+	i915->gen_pmu.pmu.event_idx     = i915_gen_event_event_idx;
+
+	if (perf_pmu_register(&i915->gen_pmu.pmu, "i915_gen", -1))
+		i915->gen_pmu.pmu.event_init = NULL;
+}
+
+void i915_gen_pmu_unregister(struct drm_device *dev)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+
+	if (i915->gen_pmu.pmu.event_init == NULL)
+		return;
+
+	perf_pmu_unregister(&i915->gen_pmu.pmu);
+	i915->gen_pmu.pmu.event_init = NULL;
+}
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2015-08-05  5:53 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-05  5:55 [RFC 0/8] Introduce framework for forwarding generic non-OA performance sourab.gupta
2015-08-05  5:55 ` sourab.gupta [this message]
2015-08-05  9:22   ` [RFC 1/8] drm/i915: Add a new PMU for handling non-OA counter data profiling requests Chris Wilson
2015-08-05  9:40     ` Gupta, Sourab
2015-08-05  9:38   ` Chris Wilson
2015-08-05  9:45     ` Gupta, Sourab
2015-08-05  9:49       ` Gupta, Sourab
2015-08-05 11:08         ` Chris Wilson
2015-08-05  9:56       ` Chris Wilson
2015-08-05  5:55 ` [RFC 2/8] drm/i915: Add mechanism for forwarding the timestamp data through perf sourab.gupta
2015-08-05  9:55   ` Chris Wilson
2015-08-05  5:55 ` [RFC 3/8] drm/i915: Handle event stop and destroy for GPU commands submitted sourab.gupta
2015-08-05  5:55 ` [RFC 4/8] drm/i915: Insert commands for capturing timestamps in the ring sourab.gupta
2015-08-05  9:30   ` Chris Wilson
2015-08-05  9:54     ` Gupta, Sourab
2015-08-05  5:55 ` [RFC 5/8] drm/i915: Add support for forwarding ring id in sample metadata through perf sourab.gupta
2015-08-05  9:26   ` Chris Wilson
2015-08-05  5:55 ` [RFC 6/8] drm/i915: Add support for forwarding pid in timestamp " sourab.gupta
2015-08-05  5:55 ` [RFC 7/8] drm/i915: Add support for forwarding execbuffer tags in timestamp sample metadata sourab.gupta
2015-08-05  9:17   ` Chris Wilson
2015-08-05  9:29     ` Daniel Vetter
2015-08-05 13:59       ` Robert Bragg
2015-08-05 15:25         ` Daniel Vetter
2015-08-05 16:48           ` Robert Bragg
2015-08-05  5:55 ` [RFC 8/8] drm/i915: Support for retrieving MMIO register values alongwith timestamps through perf sourab.gupta
2015-08-05 10:03   ` Chris Wilson
2015-08-05 10:18     ` Gupta, Sourab
2015-08-05 10:30       ` Chris Wilson
2015-08-05 14:22         ` Gupta, Sourab
2015-08-05 20:19   ` Robert Bragg
  -- strict thread matches above, loose matches on Subject: below --
2015-07-15  8:51 [RFC 0/8] Introduce framework for forwarding generic non-OA performance sourab.gupta
2015-07-15  8:51 ` [RFC 1/8] drm/i915: Add a new PMU for handling non-OA counter data profiling requests sourab.gupta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1438754144-20435-2-git-send-email-sourab.gupta@intel.com \
    --to=sourab.gupta@intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=insoo.woo@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=jabin.wu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.