All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xiaolin Zhang <xiaolin.zhang@intel.com>
To: intel-gfx@lists.freedesktop.org
Cc: Hang <hang.yuan@intel.com>,
	Gong@freedesktop.org, Zhiyuan Lv <zhiyuan.lv@intel.com>,
	Yuan@freedesktop.org, Fei <fei.jiang@intel.com>,
	Jiang@freedesktop.org, intel-gvt-dev@lists.freedesktop.org,
	He@freedesktop.org
Subject: [PATCH v2 3/5] drm/i915: context submission pvmmio optimization
Date: Fri, 19 Oct 2018 15:27:12 +0800	[thread overview]
Message-ID: <1539934034-31343-4-git-send-email-xiaolin.zhang@intel.com> (raw)
In-Reply-To: <1539934034-31343-1-git-send-email-xiaolin.zhang@intel.com>

It is performance optimization to reduce mmio trap numbers from 4 to
1 durning ELSP porting writing (context submission).

When context subission, to cache elsp_data[4] values in
the shared page, the last elsp_data[0] port writing will be trapped
to gvt for real context submission.

Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization.

v0: RFC
v1: rebase
v2: added pv ops for pv context submission. to maximize code resuse,
introduced 2 more ops (submit_ports & preempt_context) instead of 1 op
(set_default_submission) in engine structure. pv version of
submit_ports and preempt_context implemented.

Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: He, Min <min.he@intel.com>
Cc: Jiang, Fei <fei.jiang@intel.com>
Cc: Gong, Zhipeng <zhipeng.gong@intel.com>
Cc: Yuan, Hang <hang.yuan@intel.com>
Cc: Zhiyuan Lv <zhiyuan.lv@intel.com>
Signed-off-by: Xiaolin Zhang <xiaolin.zhang@intel.com>
---
 drivers/gpu/drm/i915/i915_vgpu.c        |  2 +
 drivers/gpu/drm/i915/intel_lrc.c        | 88 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++
 3 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index cb409d5..9870ea6 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -66,6 +66,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
 
 	BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 
+	dev_priv->vgpu.pv_caps = PVMMIO_ELSP_SUBMIT;
+
 	magic = __raw_i915_read64(dev_priv, vgtif_reg(magic));
 	if (magic != VGT_MAGIC)
 		return;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 22b57b8..9e6ccf9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -460,6 +460,60 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
+static void execlists_submit_ports_pv(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+	struct execlist_port *port = execlists->port;
+	u32 __iomem *elsp =
+		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	u32 *elsp_data;
+	unsigned int n;
+	u32 descs[4];
+	int i = 0;
+
+	/*
+	 * ELSQ note: the submit queue is not cleared after being submitted
+	 * to the HW so we need to make sure we always clean it up. This is
+	 * currently ensured by the fact that we always write the same number
+	 * of elsq entries, keep this in mind before changing the loop below.
+	 */
+	for (n = execlists_num_ports(execlists); n--; ) {
+		struct i915_request *rq;
+		unsigned int count;
+		u64 desc;
+
+		rq = port_unpack(&port[n], &count);
+		if (rq) {
+			GEM_BUG_ON(count > !n);
+			if (!count++)
+				execlists_context_schedule_in(rq);
+			port_set(&port[n], port_pack(rq, count));
+			desc = execlists_update_context(rq);
+		} else {
+			GEM_BUG_ON(!n);
+			desc = 0;
+		}
+		GEM_BUG_ON(i >= 4);
+		descs[i] = upper_32_bits(desc);
+		descs[i + 1] = lower_32_bits(desc);
+		i += 2;
+	}
+
+	spin_lock(&engine->i915->vgpu.shared_page_lock);
+	elsp_data = engine->i915->vgpu.shared_page->elsp_data;
+	*elsp_data = descs[0];
+	*(elsp_data + 1) = descs[1];
+	*(elsp_data + 2) = descs[2];
+	writel(descs[3], elsp);
+	spin_unlock(&engine->i915->vgpu.shared_page_lock);
+
+	/* we need to manually load the submit queue */
+	if (execlists->ctrl_reg)
+		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+}
+
 static bool ctx_single_port_submission(const struct intel_context *ce)
 {
 	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
@@ -497,7 +551,6 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 
 	GEM_BUG_ON(execlists->preempt_complete_status !=
 		   upper_32_bits(ce->lrc_desc));
-
 	/*
 	 * Switch to our empty preempt context so
 	 * the state of the GPU is known (idle).
@@ -516,6 +569,27 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static void inject_preempt_context_pv(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+	struct intel_context *ce =
+		to_intel_context(engine->i915->preempt_context, engine);
+	u32 __iomem *elsp =
+		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+	u32 *elsp_data;
+
+	GEM_BUG_ON(execlists->preempt_complete_status !=
+		   upper_32_bits(ce->lrc_desc));
+
+	spin_lock(&engine->i915->vgpu.shared_page_lock);
+	elsp_data = engine->i915->vgpu.shared_page->elsp_data;
+	*elsp_data = 0;
+	*(elsp_data + 1) = 0;
+	*(elsp_data + 2) = upper_32_bits(ce->lrc_desc);
+	writel(lower_32_bits(ce->lrc_desc), elsp);
+	spin_unlock(&engine->i915->vgpu.shared_page_lock);
+}
+
 static void complete_preempt_context(struct intel_engine_execlists *execlists)
 {
 	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
@@ -583,7 +657,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			return;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
-			inject_preempt_context(engine);
+			engine->preempt_context(engine);
 			return;
 		}
 
@@ -705,7 +779,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 	if (submit) {
 		port_assign(port, last);
-		execlists_submit_ports(engine);
+		engine->submit_ports(engine);
 	}
 
 	/* We must always keep the beast fed if we have work piled up */
@@ -2134,6 +2208,14 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 
 	engine->reset.prepare = execlists_reset_prepare;
 
+	engine->preempt_context = inject_preempt_context;
+	engine->submit_ports = execlists_submit_ports;
+
+	if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) {
+		engine->preempt_context = inject_preempt_context_pv;
+		engine->submit_ports = execlists_submit_ports_pv;
+	}
+
 	engine->park = NULL;
 	engine->unpark = NULL;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f6ec48a..e9895bf 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -523,6 +523,9 @@ struct intel_engine_cs {
 	void		(*irq_seqno_barrier)(struct intel_engine_cs *engine);
 	void		(*cleanup)(struct intel_engine_cs *engine);
 
+	void		(*preempt_context)(struct intel_engine_cs *engine);
+	void		(*submit_ports)(struct intel_engine_cs *engine);
+
 	/* GEN8 signal/wait table - never trust comments!
 	 *	  signal to	signal to    signal to   signal to      signal to
 	 *	    RCS		   VCS          BCS        VECS		 VCS2
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2018-10-19  7:27 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-19  7:27 [PATCH v2 0/5] i915 pvmmio to improve GVTg performance Xiaolin Zhang
2018-10-19  7:27 ` [PATCH v2 1/5] drm/i915: introduced pv capability for vgpu Xiaolin Zhang
2018-10-31  9:18   ` Zhang, Xiaolin
2018-10-19  7:27 ` [PATCH v2 2/5] drm/i915: get ready of memory for pvmmio Xiaolin Zhang
2018-10-31  9:18   ` Zhang, Xiaolin
2018-10-19  7:27 ` Xiaolin Zhang [this message]
2018-10-31  9:18   ` [PATCH v2 3/5] drm/i915: context submission pvmmio optimization Zhang, Xiaolin
2018-10-19  7:27 ` [PATCH v2 4/5] drm/i915: master irq " Xiaolin Zhang
2018-10-31  9:18   ` Zhang, Xiaolin
2018-10-19  7:27 ` [PATCH v2 5/5] drm/i915: ppgtt update " Xiaolin Zhang
2018-10-31  9:19   ` Zhang, Xiaolin
2018-10-22 10:00 ` ✗ Fi.CI.CHECKPATCH: warning for i915 pvmmio to improve GVTg performance Patchwork
2018-10-22 10:03 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-10-22 10:25 ` ✓ Fi.CI.BAT: success " Patchwork
2018-10-22 12:42 ` ✓ Fi.CI.IGT: " Patchwork
2018-10-25  1:53 ` [PATCH v2 0/5] " Zhang, Xiaolin
2018-10-31 12:13 ` ✗ Fi.CI.BAT: failure for i915 pvmmio to improve GVTg performance (rev6) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1539934034-31343-4-git-send-email-xiaolin.zhang@intel.com \
    --to=xiaolin.zhang@intel.com \
    --cc=Gong@freedesktop.org \
    --cc=He@freedesktop.org \
    --cc=Jiang@freedesktop.org \
    --cc=Yuan@freedesktop.org \
    --cc=fei.jiang@intel.com \
    --cc=hang.yuan@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=intel-gvt-dev@lists.freedesktop.org \
    --cc=zhiyuan.lv@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.