All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: Matthew Auld <matthew.auld@intel.com>
Subject: [PATCH 11/38] drm/i915: Always allocate an object/vma for the HWSP
Date: Fri, 18 Jan 2019 14:00:42 +0000	[thread overview]
Message-ID: <20190118140109.25261-12-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20190118140109.25261-1-chris@chris-wilson.co.uk>

Currently we only allocate an object and vma if we are using a GGTT
virtual HWSP, and a plain struct page for a physical HWSP. For
convenience later on with global timelines, it will be useful to always
have the status page being tracked by a struct i915_vma. Make it so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/intel_engine_cs.c       | 109 ++++++++++---------
 drivers/gpu/drm/i915/intel_guc_submission.c  |   5 +
 drivers/gpu/drm/i915/intel_lrc.c             |  11 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  20 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  23 +---
 drivers/gpu/drm/i915/selftests/mock_engine.c |   2 +-
 6 files changed, 90 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index fc52737751e7..4b4b7358c482 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -506,27 +506,61 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
+	struct i915_vma *vma;
+
 	/* Prevent writes into HWSP after returning the page to the system */
 	intel_engine_set_hwsp_writemask(engine, ~0u);
 
-	if (HWS_NEEDS_PHYSICAL(engine->i915)) {
-		void *addr = fetch_and_zero(&engine->status_page.page_addr);
+	vma = fetch_and_zero(&engine->status_page.vma);
+	if (!vma)
+		return;
 
-		__free_page(virt_to_page(addr));
-	}
+	if (!HWS_NEEDS_PHYSICAL(engine->i915))
+		i915_vma_unpin(vma);
+
+	i915_gem_object_unpin_map(vma->obj);
+	__i915_gem_object_release_unless_active(vma->obj);
+}
+
+static int pin_ggtt_status_page(struct intel_engine_cs *engine,
+				struct i915_vma *vma)
+{
+	unsigned int flags;
+
+	flags = PIN_GLOBAL;
+	if (!HAS_LLC(engine->i915))
+		/*
+		 * On g33, we cannot place HWS above 256MiB, so
+		 * restrict its pinning to the low mappable arena.
+		 * Though this restriction is not documented for
+		 * gen4, gen5, or byt, they also behave similarly
+		 * and hang if the HWS is placed at the top of the
+		 * GTT. To generalise, it appears that all !llc
+		 * platforms have issues with us placing the HWS
+		 * above the mappable region (even though we never
+		 * actually map it).
+		 */
+		flags |= PIN_MAPPABLE;
+	else
+		flags |= PIN_HIGH;
 
-	i915_vma_unpin_and_release(&engine->status_page.vma,
-				   I915_VMA_RELEASE_MAP);
+	return i915_vma_pin(vma, 0, 0, flags);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
-	unsigned int flags;
 	void *vaddr;
 	int ret;
 
+	/*
+	 * Though the HWS register does support 36bit addresses, historically
+	 * we have had hangs and corruption reported due to wild writes if
+	 * the HWS is placed above 4G. We only allow objects to be allocated
+	 * in GFP_DMA32 for i965, and no earlier physical address users had
+	 * access to more than 4G.
+	 */
 	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("Failed to allocate status page\n");
@@ -543,61 +577,30 @@ static int init_status_page(struct intel_engine_cs *engine)
 		goto err;
 	}
 
-	flags = PIN_GLOBAL;
-	if (!HAS_LLC(engine->i915))
-		/* On g33, we cannot place HWS above 256MiB, so
-		 * restrict its pinning to the low mappable arena.
-		 * Though this restriction is not documented for
-		 * gen4, gen5, or byt, they also behave similarly
-		 * and hang if the HWS is placed at the top of the
-		 * GTT. To generalise, it appears that all !llc
-		 * platforms have issues with us placing the HWS
-		 * above the mappable region (even though we never
-		 * actually map it).
-		 */
-		flags |= PIN_MAPPABLE;
-	else
-		flags |= PIN_HIGH;
-	ret = i915_vma_pin(vma, 0, 0, flags);
-	if (ret)
-		goto err;
-
 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
-		goto err_unpin;
+		goto err;
 	}
 
+	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
 	engine->status_page.vma = vma;
-	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
-	engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
+
+	if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
+		ret = pin_ggtt_status_page(engine, vma);
+		if (ret)
+			goto err_unpin;
+	}
+
 	return 0;
 
 err_unpin:
-	i915_vma_unpin(vma);
+	i915_gem_object_unpin_map(obj);
 err:
 	i915_gem_object_put(obj);
 	return ret;
 }
 
-static int init_phys_status_page(struct intel_engine_cs *engine)
-{
-	struct page *page;
-
-	/*
-	 * Though the HWS register does support 36bit addresses, historically
-	 * we have had hangs and corruption reported due to wild writes if
-	 * the HWS is placed above 4G.
-	 */
-	page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
-	if (!page)
-		return -ENOMEM;
-
-	engine->status_page.page_addr = page_address(page);
-
-	return 0;
-}
-
 static void __intel_context_unpin(struct i915_gem_context *ctx,
 				  struct intel_engine_cs *engine)
 {
@@ -650,10 +653,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (ret)
 		goto err_unpin_preempt;
 
-	if (HWS_NEEDS_PHYSICAL(i915))
-		ret = init_phys_status_page(engine);
-	else
-		ret = init_status_page(engine);
+	ret = init_status_page(engine);
 	if (ret)
 		goto err_breadcrumbs;
 
@@ -1318,7 +1318,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 	}
 
 	if (HAS_EXECLISTS(dev_priv)) {
-		const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+		const u32 *hws =
+			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 		unsigned int idx;
 		u8 read, write;
 
@@ -1501,7 +1502,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_unlock_irqrestore(&b->rb_lock, flags);
 
 	drm_printf(m, "HWSP:\n");
-	hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
+	hexdump(m, engine->status_page.addr, PAGE_SIZE);
 
 	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 7217c7e3ee8d..b044162a41d3 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -81,6 +81,11 @@
  *
  */
 
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+	return i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_PREEMPT_ADDR;
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ebb693c93046..b2d4abe7b601 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -172,6 +172,11 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+	return i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_INDEX_ADDR;
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
 	return rb_entry(rb, struct i915_priolist, node);
@@ -1691,7 +1696,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
 		   _MASKED_BIT_DISABLE(STOP_RING));
 
 	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
-		   engine->status_page.ggtt_offset);
+		   i915_ggtt_offset(engine->status_page.vma));
 	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
 }
 
@@ -2238,10 +2243,10 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	}
 
 	execlists->csb_status =
-		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+		&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
 	execlists->csb_write =
-		&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
 
 	reset_csb_pointers(execlists);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 662907e1a286..d72012b42f20 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -43,6 +43,11 @@
  */
 #define LEGACY_REQUEST_SIZE 200
 
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+	return i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_INDEX_ADDR;
+}
+
 static unsigned int __intel_ring_space(unsigned int head,
 				       unsigned int tail,
 				       unsigned int size)
@@ -499,12 +504,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
 	I915_WRITE(HWS_PGA, addr);
 }
 
-static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+static struct page *status_page(struct intel_engine_cs *engine)
 {
-	struct page *page = virt_to_page(engine->status_page.page_addr);
-	phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
+	struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
 
-	set_hws_pga(engine, phys);
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	return sg_page(obj->mm.pages->sgl);
+}
+
+static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+{
+	set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
 	set_hwstam(engine, ~0u);
 }
 
@@ -571,7 +581,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
 
 static void ring_setup_status_page(struct intel_engine_cs *engine)
 {
-	set_hwsp(engine, engine->status_page.ggtt_offset);
+	set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
 	set_hwstam(engine, ~0u);
 
 	flush_cs_tlb(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 9ecae7de3b0a..d1a82610e0c1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -32,8 +32,7 @@ struct i915_sched_attr;
 
 struct intel_hw_status_page {
 	struct i915_vma *vma;
-	u32 *page_addr;
-	u32 ggtt_offset;
+	u32 *addr;
 };
 
 #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -679,7 +678,7 @@ static inline u32
 intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
 	/* Ensure that the compiler doesn't optimize away the load. */
-	return READ_ONCE(engine->status_page.page_addr[reg]);
+	return READ_ONCE(engine->status_page.addr[reg]);
 }
 
 static inline void
@@ -692,12 +691,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 	 */
 	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		mb();
-		clflush(&engine->status_page.page_addr[reg]);
-		engine->status_page.page_addr[reg] = value;
-		clflush(&engine->status_page.page_addr[reg]);
+		clflush(&engine->status_page.addr[reg]);
+		engine->status_page.addr[reg] = value;
+		clflush(&engine->status_page.addr[reg]);
 		mb();
 	} else {
-		WRITE_ONCE(engine->status_page.page_addr[reg], value);
+		WRITE_ONCE(engine->status_page.addr[reg], value);
 	}
 }
 
@@ -885,16 +884,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
-static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
-{
-	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
-}
-
-static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
-{
-	return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
-}
-
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 8b8d51af7d6a..968a7e139a67 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -201,7 +201,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.i915 = i915;
 	snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
 	engine->base.id = id;
-	engine->base.status_page.page_addr = (void *)(engine + 1);
+	engine->base.status_page.addr = (void *)(engine + 1);
 
 	engine->base.context_pin = mock_context_pin;
 	engine->base.request_alloc = mock_request_alloc;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2019-01-18 14:02 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-18 14:00 Keeping Tvrtko busy Chris Wilson
2019-01-18 14:00 ` [PATCH 01/38] drm/i915/execlists: Store the highest priority context Chris Wilson
2019-01-18 14:00 ` [PATCH 02/38] drm/i915: Make all GPU resets atomic Chris Wilson
2019-01-18 14:22   ` Mika Kuoppala
2019-01-18 14:00 ` [PATCH 03/38] drm/i915/guc: Disable global reset Chris Wilson
2019-01-18 14:00 ` [PATCH 04/38] drm/i915: Remove GPU reset dependence on struct_mutex Chris Wilson
2019-01-18 14:00 ` [PATCH 05/38] drm/i915/selftests: Trim struct_mutex duration for set-wedged selftest Chris Wilson
2019-01-18 14:29   ` Mika Kuoppala
2019-01-18 14:00 ` [PATCH 06/38] drm/i915: Issue engine resets onto idle engines Chris Wilson
2019-01-18 14:00 ` [PATCH 07/38] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
2019-01-18 16:03   ` Tvrtko Ursulin
2019-01-18 16:06     ` Chris Wilson
2019-01-22 14:19     ` Chris Wilson
2019-01-25 10:46       ` Tvrtko Ursulin
2019-01-25 13:38         ` Chris Wilson
2019-01-25 13:46           ` Chris Wilson
2019-01-25 14:08             ` Tvrtko Ursulin
2019-01-18 14:00 ` [PATCH 08/38] drm/i915: Pull VM lists under the VM mutex Chris Wilson
2019-01-18 16:04   ` Tvrtko Ursulin
2019-01-18 14:00 ` [PATCH 09/38] drm/i915: Move vma lookup to its own lock Chris Wilson
2019-01-18 16:14   ` Tvrtko Ursulin
2019-01-18 14:00 ` [PATCH 10/38] drm/i915/selftests: Allocate mock ring/timeline per context Chris Wilson
2019-01-18 16:26   ` Tvrtko Ursulin
2019-01-18 14:00 ` Chris Wilson [this message]
2019-01-18 14:00 ` [PATCH 12/38] drm/i915: Move list of timelines under its own lock Chris Wilson
2019-01-18 16:28   ` Tvrtko Ursulin
2019-01-18 14:00 ` [PATCH 13/38] drm/i915: Introduce concept of per-timeline (context) HWSP Chris Wilson
2019-01-18 14:00 ` [PATCH 14/38] drm/i915: Enlarge vma->pin_count Chris Wilson
2019-01-18 14:00 ` [PATCH 15/38] drm/i915: Allocate a status page for each timeline Chris Wilson
2019-01-21 11:18   ` Tvrtko Ursulin
2019-01-18 14:00 ` [PATCH 16/38] drm/i915: Share per-timeline HWSP using a slab suballocator Chris Wilson
2019-01-18 14:00 ` [PATCH 17/38] drm/i915: Keep all partially allocated HWSP on a freelist Chris Wilson
2019-01-18 14:00 ` [PATCH 18/38] drm/i915: Track the context's seqno in its own timeline HWSP Chris Wilson
2019-01-18 14:00 ` [PATCH 19/38] drm/i915: Identify active requests Chris Wilson
2019-01-18 14:00 ` [PATCH 20/38] drm/i915: Remove the intel_engine_notify tracepoint Chris Wilson
2019-01-18 14:00 ` [PATCH 21/38] drm/i915: Replace global breadcrumbs with per-context interrupt tracking Chris Wilson
2019-01-18 14:00 ` [PATCH 22/38] drm/i915: Drop fake breadcrumb irq Chris Wilson
2019-01-18 14:00 ` [PATCH 23/38] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno Chris Wilson
2019-01-18 14:00 ` [PATCH 24/38] drm/i915: Avoid presumption of execution ordering for kernel context switching Chris Wilson
2019-01-18 14:00 ` [PATCH 25/38] drm/i915/pmu: Always sample an active ringbuffer Chris Wilson
2019-01-22  9:20   ` Tvrtko Ursulin
2019-01-18 14:00 ` [PATCH 26/38] drm/i915: Remove the global per-engine execution timeline Chris Wilson
2019-01-18 14:00 ` [PATCH 27/38] drm/i915: Introduce the i915_user_extension_method Chris Wilson
2019-01-22  9:31   ` Tvrtko Ursulin
2019-01-22 10:47     ` Chris Wilson
2019-01-22 11:05       ` Tvrtko Ursulin
2019-01-18 14:00 ` [PATCH 28/38] drm/i915: Create/destroy VM (ppGTT) for use with contexts Chris Wilson
2019-01-23 11:30   ` Tvrtko Ursulin
2019-01-23 11:51     ` Chris Wilson
2019-01-23 12:03       ` Tvrtko Ursulin
2019-01-24 15:58     ` [PATCH v3] " Chris Wilson
2019-01-18 14:01 ` [PATCH 29/38] drm/i915: Expose user control over the ppGTT associated with a context Chris Wilson
2019-01-23 12:00   ` Tvrtko Ursulin
2019-01-23 12:15     ` Chris Wilson
2019-01-18 14:01 ` [PATCH 30/38] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction Chris Wilson
2019-01-18 14:01 ` [PATCH 31/38] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
2019-01-24 17:35   ` Tvrtko Ursulin
2019-01-18 14:01 ` [PATCH 32/38] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
2019-01-18 14:01 ` [PATCH 33/38] drm/i915: Remove last traces of exec-id (GEM_BUSY) Chris Wilson
2019-01-18 14:01 ` [PATCH 34/38] drm/i915: Re-arrange execbuf so context is known before engine Chris Wilson
2019-01-18 14:01 ` [PATCH 35/38] drm/i915: Allow a context to define its set of engines Chris Wilson
2019-01-18 14:01 ` [PATCH 36/38] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
2019-01-18 14:01 ` [PATCH 37/38] drm/i915: Store the BIT(engine->id) as the engine's mask Chris Wilson
2019-01-18 14:01 ` [PATCH 38/38] drm/i915: Load balancing across a virtual engine Chris Wilson
2019-01-18 14:17 ` ✗ Fi.CI.BAT: failure for series starting with [01/38] drm/i915/execlists: Store the highest priority context Patchwork
2019-01-24 16:28 ` ✗ Fi.CI.BAT: failure for series starting with [01/38] drm/i915/execlists: Store the highest priority context (rev2) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190118140109.25261-12-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.