All of lore.kernel.org
 help / color / mirror / Atom feed
From: sourab.gupta@intel.com
To: intel-gfx@lists.freedesktop.org
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>,
	Sourab Gupta <sourab.gupta@intel.com>,
	Matthew Auld <matthew.auld@intel.com>
Subject: [PATCH 6/6] drm/i915: Support for capturing MMIO register values
Date: Thu, 16 Mar 2017 11:50:11 +0530	[thread overview]
Message-ID: <1489645211-25729-7-git-send-email-sourab.gupta@intel.com> (raw)
In-Reply-To: <1489645211-25729-1-git-send-email-sourab.gupta@intel.com>

From: Sourab Gupta <sourab.gupta@intel.com>

This patch adds support for capturing MMIO register values through
i915 perf interface.
The userspace can request upto 8 MMIO register values to be dumped.
The addresses of these registers can be passed through the corresponding
property 'value' field while opening the stream.
The commands to dump the values of these MMIO registers are then
inserted into the ring alongwith other commands.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  11 +++
 drivers/gpu/drm/i915/i915_perf.c | 157 ++++++++++++++++++++++++++++++++++++++-
 include/uapi/drm/i915_drm.h      |  14 ++++
 3 files changed, 179 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b25ed24..e2cb29e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2194,6 +2194,14 @@ struct i915_perf_cs_sample {
 	u32 ts_offset;
 
 	/**
+	 * @mmio_offset: Offset into ``&drm_i915_private->
+	 * perf.command_stream_buf`` where the mmio reg values for this perf
+	 * sample will be collected (if the stream is configured for collection
+	 * of mmio data)
+	 */
+	u32 mmio_offset;
+
+	/**
 	 * @size: buffer size corresponding to this perf sample
 	 */
 	u32 size;
@@ -2576,6 +2584,9 @@ struct drm_i915_private {
 		wait_queue_head_t poll_wq[I915_NUM_ENGINES];
 		bool pollin[I915_NUM_ENGINES];
 
+		u32 num_mmio;
+		u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
+
 		struct {
 			u32 specific_ctx_id;
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 412fce2..9223e70 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -312,6 +312,7 @@ struct sample_data {
 	u64 gpu_ts;
 	u64 clk_monoraw;
 	const u8 *report;
+	const u8 *mmio;
 };
 
 /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
@@ -368,6 +369,7 @@ struct sample_data {
 #define SAMPLE_TAG		(1<<4)
 #define SAMPLE_TS		(1<<5)
 #define SAMPLE_CLK_MONO_RAW	(1<<6)
+#define SAMPLE_MMIO		(1<<7)
 
 /**
  * struct perf_open_properties - for validated properties given to open a stream
@@ -625,6 +627,9 @@ static void insert_perf_sample(struct i915_perf_stream *stream,
 		sample_ts = true;
 	}
 
+	if (sample_flags & SAMPLE_MMIO)
+		sample_size += 4*dev_priv->perf.num_mmio;
+
 	spin_lock(&dev_priv->perf.sample_lock[id]);
 	if (list_empty(&dev_priv->perf.cs_samples[id])) {
 		offset = 0;
@@ -689,6 +694,10 @@ static void insert_perf_sample(struct i915_perf_stream *stream,
 		sample->ts_offset = ALIGN(sample->ts_offset, TS_ADDR_ALIGN);
 		offset = sample->ts_offset + I915_PERF_TS_SAMPLE_SIZE;
 	}
+	if (sample_flags & SAMPLE_MMIO) {
+		sample->mmio_offset = offset;
+		offset = sample->mmio_offset + 4*dev_priv->perf.num_mmio;
+	}
 
 	list_add_tail(&sample->link, &dev_priv->perf.cs_samples[id]);
 
@@ -802,6 +811,49 @@ static int i915_engine_stream_capture_ts(struct drm_i915_gem_request *request,
 }
 
 /**
+ * i915_engine_stream_capture_mmio - Insert the commands to capture mmio
+ * data into the GPU command stream
+ * @request: request in whose context the mmio data being collected.
+ * @offset: command stream buffer offset where the data needs to be collected
+ */
+static int i915_engine_stream_capture_mmio(struct drm_i915_gem_request *request,
+						u32 offset)
+{
+	struct drm_i915_private *dev_priv = request->i915;
+	enum intel_engine_id id = request->engine->id;
+	int i, num_mmio = dev_priv->perf.num_mmio;
+	u32 mmio_addr;
+	u32 cmd, *cs;
+
+	cs = intel_ring_begin(request, 4*num_mmio);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	mmio_addr =
+		dev_priv->perf.command_stream_buf[id].vma->node.start + offset;
+
+	if (INTEL_GEN(dev_priv) >= 8)
+		cmd = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+	else
+		cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+
+	for (i = 0; i < num_mmio; i++) {
+		uint32_t cmd;
+
+		*cs++ = cmd;
+		*cs++ = dev_priv->perf.mmio_list[i];
+		*cs++ = mmio_addr + 4*i;
+
+		if (INTEL_GEN(dev_priv) >= 8)
+			*cs++ = 0;
+		else
+			*cs++ = MI_NOOP;
+	}
+	intel_ring_advance(request, cs);
+	return 0;
+}
+
+/**
  * i915_engine_stream_capture_cs_data - Insert the commands to capture perf
  * metrics into the GPU command stream
  * @stream: An i915-perf stream opened for GPU metrics
@@ -847,6 +899,13 @@ static void i915_engine_stream_capture_cs_data(struct i915_perf_stream *stream,
 			goto err_unref;
 	}
 
+	if (sample_flags & SAMPLE_MMIO) {
+		ret = i915_engine_stream_capture_mmio(request,
+				sample->mmio_offset);
+		if (ret)
+			goto err_unref;
+	}
+
 	i915_gem_active_set(&stream->last_request, request);
 	i915_vma_move_to_active(dev_priv->perf.command_stream_buf[id].vma,
 					request, EXEC_OBJECT_WRITE);
@@ -1300,6 +1359,12 @@ static int append_sample(struct i915_perf_stream *stream,
 		buf += I915_PERF_TS_SAMPLE_SIZE;
 	}
 
+	if (sample_flags & SAMPLE_MMIO) {
+		if (copy_to_user(buf, data->mmio, 4*dev_priv->perf.num_mmio))
+			return -EFAULT;
+		buf += 4*dev_priv->perf.num_mmio;
+	}
+
 	if (sample_flags & SAMPLE_OA_REPORT) {
 		if (copy_to_user(buf, data->report, report_size))
 			return -EFAULT;
@@ -1377,6 +1442,7 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
 	struct drm_i915_private *dev_priv =  stream->dev_priv;
 	u32 sample_flags = stream->sample_flags;
 	struct sample_data data = { 0 };
+	u32 mmio_list_dummy[I915_PERF_MMIO_NUM_MAX] = { 0 };
 
 	if (sample_flags & SAMPLE_OA_SOURCE_INFO)
 		data.source = I915_PERF_OA_EVENT_SOURCE_PERIODIC;
@@ -1399,6 +1465,10 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
 		data.clk_monoraw = get_clk_monoraw_from_gpu_ts(stream, gpu_ts);
 	}
 
+	/* Periodic OA samples don't have mmio associated with them */
+	if (sample_flags & SAMPLE_MMIO)
+		data.mmio = (u8 *)mmio_list_dummy;
+
 	if (sample_flags & SAMPLE_OA_REPORT)
 		data.report = report;
 
@@ -2012,6 +2082,10 @@ static int append_one_cs_sample(struct i915_perf_stream *stream,
 				get_clk_monoraw_from_gpu_ts(stream, gpu_ts);
 	}
 
+	if (sample_flags & SAMPLE_MMIO)
+		data.mmio = dev_priv->perf.command_stream_buf[id].vaddr +
+				node->mmio_offset;
+
 	return append_sample(stream, buf, count, offset, &data);
 }
 
@@ -3132,10 +3206,12 @@ static int i915_engine_stream_init(struct i915_perf_stream *stream,
 	bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
 						      SAMPLE_OA_SOURCE_INFO);
 	bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
-						      SAMPLE_TAG);
+						      SAMPLE_TAG |
+						      SAMPLE_MMIO);
 	bool cs_sample_data = props->sample_flags & (SAMPLE_OA_REPORT |
 							SAMPLE_TS |
-							SAMPLE_CLK_MONO_RAW);
+							SAMPLE_CLK_MONO_RAW |
+							SAMPLE_MMIO);
 	int ret;
 
 	/* To avoid the complexity of having to accurately filter
@@ -3299,7 +3375,8 @@ static int i915_engine_stream_init(struct i915_perf_stream *stream,
 	}
 
 	if (require_cs_mode && !props->cs_mode) {
-		DRM_ERROR("PID/TAG/TS sampling requires engine to be specified");
+		DRM_ERROR(
+			"PID/TAG/TS/MMIO sampling requires engine to be specified");
 		ret = -EINVAL;
 		goto err_enable;
 	}
@@ -3338,6 +3415,11 @@ static int i915_engine_stream_init(struct i915_perf_stream *stream,
 			stream->sample_size += 4;
 		}
 
+		if (props->sample_flags & SAMPLE_MMIO) {
+			stream->sample_flags |= SAMPLE_MMIO;
+			stream->sample_size += 4 * dev_priv->perf.num_mmio;
+		}
+
 		ret = alloc_command_stream_buf(stream);
 		if (ret)
 			goto err_enable;
@@ -3985,6 +4067,69 @@ static int i915_perf_release(struct inode *inode, struct file *file)
 	return ret;
 }
 
+static int check_mmio_whitelist(struct drm_i915_private *dev_priv, u32 num_mmio)
+{
+#define GEN_RANGE(l, h) GENMASK(h, l)
+	static const struct register_whitelist {
+		i915_reg_t mmio;
+		uint32_t size;
+		/* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
+		uint32_t gen_bitmask;
+	} whitelist[] = {
+		{ GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) },
+		{ GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) },
+	};
+	int i, count;
+
+	for (count = 0; count < num_mmio; count++) {
+		/* Coarse check on mmio reg addresses being non zero */
+		if (!dev_priv->perf.mmio_list[count])
+			return -EINVAL;
+
+		for (i = 0; i < ARRAY_SIZE(whitelist); i++) {
+			if (( i915_mmio_reg_offset(whitelist[i].mmio) ==
+				dev_priv->perf.mmio_list[count]) &&
+			    (1 << INTEL_INFO(dev_priv)->gen &
+					whitelist[i].gen_bitmask))
+				break;
+		}
+
+		if (i == ARRAY_SIZE(whitelist))
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int copy_mmio_list(struct drm_i915_private *dev_priv,
+				void __user *mmio)
+{
+	void __user *mmio_list = ((u8 __user *)mmio + 4);
+	u32 num_mmio;
+	int ret;
+
+	if (!mmio)
+		return -EINVAL;
+
+	ret = get_user(num_mmio, (u32 __user *)mmio);
+	if (ret)
+		return ret;
+
+	if (num_mmio > I915_PERF_MMIO_NUM_MAX)
+		return -EINVAL;
+
+	memset(dev_priv->perf.mmio_list, 0, I915_PERF_MMIO_NUM_MAX);
+	if (copy_from_user(dev_priv->perf.mmio_list, mmio_list, 4*num_mmio))
+		return -EINVAL;
+
+	ret = check_mmio_whitelist(dev_priv, num_mmio);
+	if (ret)
+		return ret;
+
+	dev_priv->perf.num_mmio = num_mmio;
+
+	return 0;
+}
+
 /**
  * read_properties_unlocked - validate + copy userspace stream open properties
  * @dev_priv: i915 device instance
@@ -4139,6 +4284,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 		case DRM_I915_PERF_PROP_SAMPLE_CLOCK_MONOTONIC_RAW:
 			props->sample_flags |= SAMPLE_CLK_MONO_RAW;
 			break;
+		case DRM_I915_PERF_PROP_SAMPLE_MMIO:
+			ret = copy_mmio_list(dev_priv, (u64 __user *)value);
+			if (ret)
+				return ret;
+			props->sample_flags |= SAMPLE_MMIO;
+			break;
 		default:
 			MISSING_CASE(id);
 			DRM_DEBUG("Unknown i915 perf property ID\n");
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index fbe91d9..bb4ed4a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1324,6 +1324,12 @@ enum drm_i915_perf_oa_event_source {
 	I915_PERF_OA_EVENT_SOURCE_MAX	/* non-ABI */
 };
 
+#define I915_PERF_MMIO_NUM_MAX	8
+struct drm_i915_perf_mmio_list {
+	__u32 num_mmio;
+	__u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
+};
+
 enum drm_i915_perf_property_id {
 	/**
 	 * Open the stream for a specific context handle (as used with
@@ -1402,6 +1408,13 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_SAMPLE_CLOCK_MONOTONIC_RAW,
 
+	/**
+	 * This property requests inclusion of mmio register values in the perf
+	 * sample data. The value of this property specifies the address of user
+	 * struct having the register addresses.
+	 */
+	DRM_I915_PERF_PROP_SAMPLE_MMIO,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
@@ -1473,6 +1486,7 @@ enum drm_i915_perf_record_type {
 	 *     { u32 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG
 	 *     { u64 gpu_ts; } && DRM_I915_PERF_PROP_SAMPLE_TS
 	 *     { u64 clk_mono; } && DRM_I915_PERF_PROP_SAMPLE_CLOCK_MONOTONIC
+	 *     { u32 mmio[]; } && DRM_I915_PERF_PROP_SAMPLE_MMIO
 	 *     { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
 	 * };
 	 */
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2017-03-16  6:18 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-16  6:20 [PATCH 0/6] Collect command stream based GPU metrics for all engines using i915 perf sourab.gupta
2017-03-16  6:20 ` [PATCH 1/6] drm/i915: Extend i915 perf framework for collecting timestamps on all gpu engines sourab.gupta
2017-03-16  7:49   ` Chris Wilson
2017-03-16  8:33     ` sourab gupta
2017-03-16  6:20 ` [PATCH 2/6] drm/i915: Extract raw GPU timestamps from OA reports to forward in perf samples sourab.gupta
2017-03-16  6:20 ` [PATCH 3/6] drm/i915: Support opening multiple concurrent perf streams sourab.gupta
2017-03-16  6:20 ` [PATCH 4/6] time: Expose current clocksource in use by timekeeping framework sourab.gupta
2017-03-16  8:10   ` Thomas Gleixner
2017-03-16  6:20 ` [PATCH 5/6] drm/i915: Mechanism to forward clock monotonic raw time in perf samples sourab.gupta
2017-03-16  6:20 ` sourab.gupta [this message]
2017-03-16  7:54   ` [PATCH 6/6] drm/i915: Support for capturing MMIO register values Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1489645211-25729-7-git-send-email-sourab.gupta@intel.com \
    --to=sourab.gupta@intel.com \
    --cc=daniel.vetter@ffwll.ch \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.