All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michel Thierry <michel.thierry@intel.com>
To: intel-gfx@lists.freedesktop.org
Cc: Akash Goel <akash.goel@intel.com>
Subject: [PATCH v2 08/18] drm/i915/gen8: Add 4 level switching infrastructure and lrc support
Date: Wed, 10 Jun 2015 17:46:45 +0100	[thread overview]
Message-ID: <1433954816-13787-9-git-send-email-michel.thierry@intel.com> (raw)
In-Reply-To: <1433954816-13787-1-git-send-email-michel.thierry@intel.com>

In 64b (48bit canonical) PPGTT addressing, the PDP0 register contains
the base address to PML4, while the other PDP registers are ignored.

In LRC, the addressing mode must be specified in every context descriptor.

v2: PML4 update in legacy context switch is left for historic reasons,
the preferred mode of operation is with lrc context based submission.

v3: s/gen8_map_page_directory/gen8_setup_page_directory and
s/gen8_map_page_directory_pointer/gen8_setup_page_directory_pointer.
Also, clflush will be needed for bxt. (Akash)

v4: Squashed lrc-specific code and use a macro to set PML4 register.

v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
PDP update in bb_start is only for legacy 32b mode.

Cc: Akash Goel <akash.goel@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 54 ++++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_gtt.h |  2 ++
 drivers/gpu/drm/i915/i915_reg.h     |  5 +++-
 drivers/gpu/drm/i915/intel_lrc.c    | 58 +++++++++++++++++++++++++++----------
 4 files changed, 96 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index da1a964..cbc6aaf 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -211,6 +211,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
 	return pde;
 }
 
+#define gen8_pdpe_encode gen8_pde_encode
+#define gen8_pml4e_encode gen8_pde_encode
+
 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
 				 enum i915_cache_level level,
 				 bool valid, u32 unused)
@@ -590,6 +593,35 @@ static void free_pdp(struct drm_device *dev,
 	}
 }
 
+static void
+gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
+			  struct i915_page_directory_pointer *pdp,
+			  struct i915_page_directory *pd,
+			  int index)
+{
+	gen8_ppgtt_pdpe_t *page_directorypo;
+
+	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+		return;
+
+	page_directorypo = kmap_px(pdp);
+	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
+	kunmap_px(ppgtt, page_directorypo);
+}
+
+static void
+gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
+				  struct i915_pml4 *pml4,
+				  struct i915_page_directory_pointer *pdp,
+				  int index)
+{
+	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
+
+	WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
+	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
+	kunmap_px(ppgtt, pagemap);
+}
+
 #define SCRATCH_PAGE_MAGIC 0xffff00ffffff00ffULL
 
 static int alloc_scratch_page(struct i915_address_space *vm)
@@ -754,8 +786,8 @@ static int gen8_write_pdp(struct intel_engine_cs *ring,
 	return 0;
 }
 
-static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_engine_cs *ring)
+static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
+				 struct intel_engine_cs *ring)
 {
 	int i, ret;
 
@@ -770,6 +802,12 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	return 0;
 }
 
+static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
+			      struct intel_engine_cs *ring)
+{
+	return gen8_write_pdp(ring, 0, px_dma(&ppgtt->pml4));
+}
+
 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 				   uint64_t start,
 				   uint64_t length,
@@ -1180,6 +1218,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 
 		__set_bit(pdpe, pdp->used_pdpes);
 		gen8_map_pagetable_range(ppgtt, pd, start, length);
+		gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
 	}
 
 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes);
@@ -1249,6 +1288,8 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 		ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
 		if (ret)
 			goto err_out;
+
+		gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
 	}
 
 	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
@@ -1284,7 +1325,7 @@ static bool hw_wont_flush_pdp_tlbs(struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_device *dev = ppgtt->base.dev;
 
-	if (GEN8_CTX_ADDRESSING_MODE != LEGACY_32B_CONTEXT)
+	if (GEN8_CTX_ADDRESSING_MODE(dev) != LEGACY_32B_CONTEXT)
 		return false;
 
 	if (IS_GEN8(dev) || IS_GEN9(dev))
@@ -1341,8 +1382,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
 
-	ppgtt->switch_mm = gen8_mm_switch;
-
 	ret = setup_scratch(&ppgtt->base);
 	if (ret)
 		return ret;
@@ -1353,12 +1392,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 			goto clear_scratch;
 
 		ppgtt->base.total = 1ULL << 48;
+		ppgtt->switch_mm = gen8_48b_mm_switch;
 	} else {
 		ret = __pdp_init(false, &ppgtt->pdp);
 		if (ret)
 			goto clear_scratch;
 
 		ppgtt->base.total = 1ULL << 32;
+		ppgtt->switch_mm = gen8_legacy_mm_switch;
 		trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
 							      0, 0,
 							      GEN8_PML4E_SHIFT);
@@ -1565,8 +1606,9 @@ static void gen8_ppgtt_enable(struct drm_device *dev)
 	int j;
 
 	for_each_ring(ring, dev_priv, j) {
+		u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
 		I915_WRITE(RING_MODE_GEN7(ring),
-			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index b038b86..5b04211 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -39,6 +39,8 @@ struct drm_i915_file_private;
 typedef uint32_t gen6_pte_t;
 typedef uint64_t gen8_pte_t;
 typedef uint64_t gen8_pde_t;
+typedef uint64_t gen8_ppgtt_pdpe_t;
+typedef uint64_t gen8_ppgtt_pml4e_t;
 
 #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 334324b..7f03a09 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1642,6 +1642,7 @@ enum skl_disp_power_wells {
 #define   GFX_REPLAY_MODE		(1<<11)
 #define   GFX_PSMI_GRANULARITY		(1<<10)
 #define   GFX_PPGTT_ENABLE		(1<<9)
+#define   GEN8_GFX_PPGTT_48B		(1<<7)
 
 #define VLV_DISPLAY_BASE 0x180000
 #define VLV_MIPI_BASE VLV_DISPLAY_BASE
@@ -2792,7 +2793,9 @@ enum {
 };
 
 #define GEN8_CTX_ADDRESSING_MODE_SHIFT	3
-#define GEN8_CTX_ADDRESSING_MODE	LEGACY_32B_CONTEXT
+#define GEN8_CTX_ADDRESSING_MODE(dev)	(USES_FULL_48BIT_PPGTT(dev) ?\
+						LEGACY_64B_CONTEXT :\
+						LEGACY_32B_CONTEXT)
 
 /*
  * Overlay regs
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 51c0e06..55ba5a1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -189,6 +189,11 @@
 	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
 }
 
+#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
+	reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
+	reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
+}
+
 enum {
 	FAULT_AND_HANG = 0,
 	FAULT_AND_HALT, /* Debug only */
@@ -258,7 +263,7 @@ static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
 	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
 
 	desc = GEN8_CTX_VALID;
-	desc |= GEN8_CTX_ADDRESSING_MODE << GEN8_CTX_ADDRESSING_MODE_SHIFT;
+	desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
 	if (IS_GEN8(ctx_obj->base.dev))
 		desc |= GEN8_CTX_L3LLC_COHERENT;
 	desc |= GEN8_CTX_PRIVILEGE;
@@ -329,10 +334,16 @@ static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
 	reg_state[CTX_RING_TAIL+1] = tail;
 	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
-	/* True PPGTT with dynamic page allocation: update PDP registers and
-	 * point the unallocated PDPs to the scratch page
-	 */
-	if (ppgtt) {
+	if (ppgtt && USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+		/* True 64b PPGTT (48bit canonical)
+		 * PDP0_DESCRIPTOR contains the base address to PML4 and
+		 * other PDP Descriptors are ignored
+		 */
+		ASSIGN_CTX_PML4(ppgtt, reg_state);
+	} else if (ppgtt) {
+		/* True 32b PPGTT with dynamic page allocation: update PDP
+		 * registers and point the unallocated PDPs to the scratch page
+		 */
 		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
 		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
 		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
@@ -1156,12 +1167,16 @@ static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
 	 * Ideally, we should set Force PD Restore in ctx descriptor,
 	 * but we can't. Force Restore would be a second option, but
 	 * it is unsafe in case of lite-restore (because the ctx is
-	 * not idle). */
+	 * not idle). PML4 is allocated during ppgtt init so this is
+	 * not needed in 48-bit.*/
 	if (ctx->ppgtt &&
 	    (intel_ring_flag(ring) & ctx->ppgtt->pd_dirty_rings)) {
-		ret = intel_logical_ring_emit_pdps(ring, ctx);
-		if (ret)
-			return ret;
+		if (GEN8_CTX_ADDRESSING_MODE(ring->dev) == LEGACY_32B_CONTEXT){
+			ret = intel_logical_ring_emit_pdps(ring, ctx);
+
+			if (ret)
+				return ret;
+		}
 
 		ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(ring);
 	}
@@ -1805,13 +1820,24 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
 	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
 
-	/* With dynamic page allocation, PDPs may not be allocated at this point,
-	 * Point the unallocated PDPs to the scratch page
-	 */
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
-	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
+	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+		/* 64b PPGTT (48bit canonical)
+		 * PDP0_DESCRIPTOR contains the base address to PML4 and
+		 * other PDP Descriptors are ignored.
+		 */
+		ASSIGN_CTX_PML4(ppgtt, reg_state);
+	} else {
+		/* 32b PPGTT
+		 * PDP*_DESCRIPTOR contains the base address of space supported.
+		 * With dynamic page allocation, PDPs may not be allocated at
+		 * this point. Point the unallocated PDPs to the scratch page
+		 */
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
+		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
+	}
+
 	if (ring->id == RCS) {
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
-- 
2.4.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2015-06-10 16:46 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-10 16:46 [PATCH v2 00/18] 48-bit PPGTT Michel Thierry
2015-06-10 16:46 ` [PATCH v2 01/18] drm/i915/lrc: Update PDPx registers with lri commands Michel Thierry
2015-06-11 18:04   ` Mika Kuoppala
2015-06-22  9:18     ` Michel Thierry
2015-06-26 12:46   ` [PATCH v3] " Michel Thierry
2015-06-26 14:45     ` Mika Kuoppala
2015-06-10 16:46 ` [PATCH v2 02/18] drm/i915/gtt: Switch gen8_free_page_tables params Michel Thierry
2015-06-11 18:05   ` Mika Kuoppala
2015-06-26 16:38     ` Daniel Vetter
2015-06-10 16:46 ` [PATCH v2 03/18] drm/i915: Remove unnecessary gen8_clamp_pd Michel Thierry
2015-06-10 16:46 ` [PATCH v2 04/18] drm/i915/gen8: Make pdp allocation more dynamic Michel Thierry
2015-06-10 16:46 ` [PATCH v2 05/18] drm/i915/gen8: Abstract PDP usage Michel Thierry
2015-06-10 16:46 ` [PATCH v2 06/18] drm/i915/gen8: Add dynamic page trace events Michel Thierry
2015-06-10 16:46 ` [PATCH v2 07/18] drm/i915/gen8: implement alloc/free for 4lvl Michel Thierry
2015-06-10 16:46 ` Michel Thierry [this message]
2015-06-10 16:46 ` [PATCH v2 09/18] drm/i915/gen8: Generalize PTE writing for GEN8 PPGTT Michel Thierry
2015-06-10 16:46 ` [PATCH v2 10/18] drm/i915/gen8: Pass sg_iter through pte inserts Michel Thierry
2015-06-10 16:46 ` [PATCH v2 11/18] drm/i915/gen8: Add 4 level support in insert_entries and clear_range Michel Thierry
2015-06-10 16:46 ` [PATCH v2 12/18] drm/i915/gen8: Initialize PDPs Michel Thierry
2015-06-10 16:46 ` [PATCH v2 13/18] drm/i915: Expand error state's address width to 64b Michel Thierry
2015-06-10 16:46 ` [PATCH v2 14/18] drm/i915/gen8: Add ppgtt info and debug_dump Michel Thierry
2015-06-10 16:46 ` [PATCH v2 15/18] drm/i915: object size needs to be u64 Michel Thierry
2015-06-10 16:46 ` [PATCH v2 16/18] drm/i915: Check against correct user_size limit in 48b ppgtt mode Michel Thierry
2015-06-10 17:57   ` Chris Wilson
2015-06-10 16:46 ` [PATCH v2 17/18] drm/i915: Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset Michel Thierry
2015-06-10 18:09   ` Chris Wilson
2015-06-17 12:49     ` Daniel Vetter
2015-06-17 12:53       ` Chris Wilson
2015-06-17 15:03         ` Daniel Vetter
2015-06-17 17:37           ` Chris Wilson
2015-06-18  6:45             ` Daniel Vetter
2015-06-18  7:03               ` Chris Wilson
2015-06-18  7:11                 ` Daniel Vetter
2015-06-18  7:34                   ` Chris Wilson
2015-06-23 12:21   ` [PATCH v3] " Michel Thierry
2015-06-23 13:22     ` Chris Wilson
2015-06-10 16:46 ` [PATCH v2 18/18] drm/i915/gen8: Flip the 48b switch Michel Thierry
2015-06-10 16:46 ` [PATCH v2] tests/gem_ppgtt: Check Wa32bitOffsets workarounds Michel Thierry
2015-07-01 15:27 ` [PATCH v3 00/17] 48-bit PPGTT Michel Thierry
2015-07-01 15:27   ` [PATCH v3 01/17] drm/i915: Remove unnecessary gen8_clamp_pd Michel Thierry
2015-07-01 15:27   ` [PATCH v3 02/17] drm/i915/gen8: Make pdp allocation more dynamic Michel Thierry
2015-07-07 12:36     ` Goel, Akash
2015-07-07 12:56       ` Michel Thierry
2015-07-01 15:27   ` [PATCH v3 03/17] drm/i915/gen8: Abstract PDP usage Michel Thierry
2015-07-07 12:43     ` Goel, Akash
2015-07-07 13:35       ` Michel Thierry
2015-07-01 15:27   ` [PATCH v3 04/17] drm/i915/gen8: Add dynamic page trace events Michel Thierry
2015-07-01 15:27   ` [PATCH v3 05/17] drm/i915/gen8: implement alloc/free for 4lvl Michel Thierry
2015-07-07 12:48     ` Goel, Akash
2015-07-07 13:40       ` Michel Thierry
2015-07-01 15:27   ` [PATCH v3 06/17] drm/i915/gen8: Add 4 level switching infrastructure and lrc support Michel Thierry
2015-07-01 15:27   ` [PATCH v3 07/17] drm/i915/gen8: Generalize PTE writing for GEN8 PPGTT Michel Thierry
2015-07-01 15:27   ` [PATCH v3 08/17] drm/i915/gen8: Pass sg_iter through pte inserts Michel Thierry
2015-07-01 15:27   ` [PATCH v3 09/17] drm/i915/gen8: Add 4 level support in insert_entries and clear_range Michel Thierry
2015-07-07 12:51     ` Goel, Akash
2015-07-07 13:42       ` Michel Thierry
2015-07-01 15:27   ` [PATCH v3 10/17] drm/i915/gen8: Initialize PDPs Michel Thierry
2015-07-01 15:27   ` [PATCH v3 11/17] drm/i915: Expand error state's address width to 64b Michel Thierry
2015-07-07 12:53     ` Goel, Akash
2015-07-07 13:50       ` Michel Thierry
2015-07-01 15:27   ` [PATCH v3 12/17] drm/i915/gen8: Add ppgtt info and debug_dump Michel Thierry
2015-07-07 12:56     ` Goel, Akash
2015-07-07 13:51       ` Michel Thierry
2015-07-01 15:27   ` [PATCH v3 13/17] drm/i915: object size needs to be u64 Michel Thierry
2015-07-01 15:27   ` [PATCH v3 14/17] drm/i915: batch_obj vm offset must " Michel Thierry
2015-07-01 16:07     ` John Harrison
2015-07-01 15:27   ` [PATCH v3 15/17] drm/i915/userptr: Kill user_size limit check Michel Thierry
2015-07-01 15:31     ` Chris Wilson
2015-07-01 15:27   ` [PATCH v3 16/17] drm/i915: Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset Michel Thierry
2015-07-01 15:43     ` Chris Wilson
2015-07-01 15:54       ` Michel Thierry
2015-07-01 16:02     ` [PATCH v5] " Michel Thierry
2015-07-01 15:27   ` [PATCH v3 17/17] drm/i915/gen8: Flip the 48b switch Michel Thierry
2015-07-01 15:38   ` [PATCH v3 00/17] 48-bit PPGTT Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1433954816-13787-9-git-send-email-michel.thierry@intel.com \
    --to=michel.thierry@intel.com \
    --cc=akash.goel@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.