All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 11/11] drm/i915/gtt: Tidy up ppgtt insertion for gen8
Date: Sun,  7 Jul 2019 22:00:24 +0100	[thread overview]
Message-ID: <20190707210024.26192-12-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk>

Apply the new radix shift helpers to extract the multi-level indices
cleanly when inserting pte into the gtt tree.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  92 ++--------------------
 2 files changed, 49 insertions(+), 158 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 83f03f8f4d2c..e2c3c4288edb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1128,47 +1128,28 @@ static inline struct sgt_dma {
 	return (struct sgt_dma) { sg, addr, addr + sg->length };
 }
 
-struct gen8_insert_pte {
-	u16 pml4e;
-	u16 pdpe;
-	u16 pde;
-	u16 pte;
-};
-
-static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
-{
-	return (struct gen8_insert_pte) {
-		 gen8_pml4e_index(start),
-		 gen8_pdpe_index(start),
-		 gen8_pde_index(start),
-		 gen8_pte_index(start),
-	};
-}
-
-static __always_inline bool
+static __always_inline u64
 gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
 			      struct i915_page_directory *pdp,
 			      struct sgt_dma *iter,
-			      struct gen8_insert_pte *idx,
+			      u64 idx,
 			      enum i915_cache_level cache_level,
 			      u32 flags)
 {
 	struct i915_page_directory *pd;
 	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
 	gen8_pte_t *vaddr;
-	bool ret;
 
-	GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-	pd = i915_pd_entry(pdp, idx->pdpe);
-	vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+	vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
 	do {
-		vaddr[idx->pte] = pte_encode | iter->dma;
+		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
 
 		iter->dma += I915_GTT_PAGE_SIZE;
 		if (iter->dma >= iter->max) {
 			iter->sg = __sg_next(iter->sg);
 			if (!iter->sg) {
-				ret = false;
+				idx = 0;
 				break;
 			}
 
@@ -1176,30 +1157,22 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
 			iter->max = iter->dma + iter->sg->length;
 		}
 
-		if (++idx->pte == GEN8_PTES) {
-			idx->pte = 0;
-
-			if (++idx->pde == I915_PDES) {
-				idx->pde = 0;
-
+		if (gen8_pd_index(++idx, 0) == 0) {
+			if (gen8_pd_index(idx, 1) == 0) {
 				/* Limited by sg length for 3lvl */
-				if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
-					idx->pdpe = 0;
-					ret = true;
+				if (gen8_pd_index(idx, 2) == 0)
 					break;
-				}
 
-				GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-				pd = pdp->entry[idx->pdpe];
+				pd = pdp->entry[gen8_pd_index(idx, 2)];
 			}
 
 			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+			vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
 		}
 	} while (1);
 	kunmap_atomic(vaddr);
 
-	return ret;
+	return idx;
 }
 
 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
@@ -1209,9 +1182,9 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
 {
 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct sgt_dma iter = sgt_dma(vma);
-	struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
 
-	gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx,
+	gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter,
+				      vma->node.start >> GEN8_PTE_SHIFT,
 				      cache_level, flags);
 
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
@@ -1228,39 +1201,38 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 	dma_addr_t rem = iter->sg->length;
 
 	do {
-		struct gen8_insert_pte idx = gen8_insert_pte(start);
 		struct i915_page_directory *pdp =
-			i915_pdp_entry(pml4, idx.pml4e);
-		struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe);
-		unsigned int page_size;
-		bool maybe_64K = false;
+			i915_pd_entry(pml4, __gen8_pte_index(start, 3));
+		struct i915_page_directory *pd =
+			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
 		gen8_pte_t encode = pte_encode;
+		unsigned int maybe_64K = -1;
+		unsigned int page_size;
 		gen8_pte_t *vaddr;
-		u16 index, max;
+		u16 index;
 
 		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
 		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
-		    rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
-			index = idx.pde;
-			max = I915_PDES;
-			page_size = I915_GTT_PAGE_SIZE_2M;
-
+		    rem >= I915_GTT_PAGE_SIZE_2M &&
+		    !__gen8_pte_index(start, 0)) {
+			index = __gen8_pte_index(start, 1);
 			encode |= GEN8_PDE_PS_2M;
+			page_size = I915_GTT_PAGE_SIZE_2M;
 
 			vaddr = kmap_atomic_px(pd);
 		} else {
-			struct i915_page_table *pt = i915_pt_entry(pd, idx.pde);
+			struct i915_page_table *pt =
+				i915_pt_entry(pd, __gen8_pte_index(start, 1));
 
-			index = idx.pte;
-			max = GEN8_PTES;
+			index = __gen8_pte_index(start, 0);
 			page_size = I915_GTT_PAGE_SIZE;
 
 			if (!index &&
 			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
 			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
 			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-			     rem >= (max - index) * I915_GTT_PAGE_SIZE))
-				maybe_64K = true;
+			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
+				maybe_64K = __gen8_pte_index(start, 1);
 
 			vaddr = kmap_atomic_px(pt);
 		}
@@ -1281,16 +1253,16 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 				iter->dma = sg_dma_address(iter->sg);
 				iter->max = iter->dma + rem;
 
-				if (maybe_64K && index < max &&
+				if (maybe_64K != -1 && index < I915_PDES &&
 				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
 				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-				       rem >= (max - index) * I915_GTT_PAGE_SIZE)))
-					maybe_64K = false;
+				       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
+					maybe_64K = -1;
 
 				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
 					break;
 			}
-		} while (rem >= page_size && index < max);
+		} while (rem >= page_size && index < I915_PDES);
 
 		kunmap_atomic(vaddr);
 
@@ -1300,14 +1272,14 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 		 * it and have reached the end of the sg table and we have
 		 * enough padding.
 		 */
-		if (maybe_64K &&
-		    (index == max ||
+		if (maybe_64K != -1 &&
+		    (index == I915_PDES ||
 		     (i915_vm_has_scratch_64K(vma->vm) &&
 		      !iter->sg && IS_ALIGNED(vma->node.start +
 					      vma->node.size,
 					      I915_GTT_PAGE_SIZE_2M)))) {
 			vaddr = kmap_atomic_px(pd);
-			vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
+			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
 			kunmap_atomic(vaddr);
 			page_size = I915_GTT_PAGE_SIZE_64K;
 
@@ -1324,8 +1296,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 				u16 i;
 
 				encode = vma->vm->scratch[0].encode;
-				vaddr = kmap_atomic_px(i915_pt_entry(pd,
-								     idx.pde));
+				vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
 
 				for (i = 1; i < index; i += 16)
 					memset64(vaddr + i, encode, 15);
@@ -1351,13 +1322,13 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
 		gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level,
 					       flags);
 	} else {
-		struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
+		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
 
-		while (gen8_ppgtt_insert_pte_entries(ppgtt,
-						     i915_pdp_entry(pml4, idx.pml4e++),
-						     &iter, &idx, cache_level,
-						     flags))
-			GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
+		while ((idx = gen8_ppgtt_insert_pte_entries(ppgtt,
+							    i915_pd_entry(pml4, gen8_pd_index(idx, 3)),
+							    &iter, idx, cache_level,
+							    flags)))
+			;
 
 		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index d0128f52184b..4bcf61e48570 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -115,30 +115,19 @@ typedef u64 gen8_pte_t;
 #define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
 #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
 
-/* GEN8 32b style address is defined as a 3 level page table:
+/*
+ * GEN8 32b style address is defined as a 3 level page table:
  * 31:30 | 29:21 | 20:12 |  11:0
  * PDPE  |  PDE  |  PTE  | offset
  * The difference as compared to normal x86 3 level page table is the PDPEs are
  * programmed via register.
- */
-#define GEN8_3LVL_PDPES			4
-#define GEN8_PDE_SHIFT			21
-#define GEN8_PDE_MASK			0x1ff
-#define GEN8_PTE_SHIFT			12
-#define GEN8_PTE_MASK			0x1ff
-#define GEN8_PTES			I915_PTES(sizeof(gen8_pte_t))
-
-/* GEN8 48b style address is defined as a 4 level page table:
+ *
+ * GEN8 48b style address is defined as a 4 level page table:
  * 47:39 | 38:30 | 29:21 | 20:12 |  11:0
  * PML4E | PDPE  |  PDE  |  PTE  | offset
  */
-#define GEN8_PML4ES_PER_PML4		512
-#define GEN8_PML4E_SHIFT		39
-#define GEN8_PML4E_MASK			(GEN8_PML4ES_PER_PML4 - 1)
-#define GEN8_PDPE_SHIFT			30
-/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
- * tables */
-#define GEN8_PDPE_MASK			0x1ff
+#define GEN8_3LVL_PDPES			4
+#define GEN8_PTE_SHIFT			12
 
 #define PPAT_UNCACHED			(_PAGE_PWT | _PAGE_PCD)
 #define PPAT_CACHED_PDE			0 /* WB LLC */
@@ -522,15 +511,6 @@ static inline u32 gen6_pde_index(u32 addr)
 	return i915_pde_index(addr, GEN6_PDE_SHIFT);
 }
 
-static inline unsigned int
-i915_pdpes_per_pdp(const struct i915_address_space *vm)
-{
-	if (i915_vm_is_4lvl(vm))
-		return GEN8_PML4ES_PER_PML4;
-
-	return GEN8_3LVL_PDPES;
-}
-
 static inline struct i915_page_table *
 i915_pt_entry(const struct i915_page_directory * const pd,
 	      const unsigned short n)
@@ -545,66 +525,6 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
 	return pdp->entry[n];
 }
 
-static inline struct i915_page_directory *
-i915_pdp_entry(const struct i915_page_directory * const pml4,
-	       const unsigned short n)
-{
-	return pml4->entry[n];
-}
-
-/* Equivalent to the gen6 version, For each pde iterates over every pde
- * between from start until start + length. On gen8+ it simply iterates
- * over every page directory entry in a page directory.
- */
-#define gen8_for_each_pde(pt, pd, start, length, iter)			\
-	for (iter = gen8_pde_index(start);				\
-	     length > 0 && iter < I915_PDES &&				\
-		     (pt = i915_pt_entry(pd, iter), true);		\
-	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT);		\
-		    temp = min(temp - start, length);			\
-		    start += temp, length -= temp; }), ++iter)
-
-#define gen8_for_each_pdpe(pd, pdp, start, length, iter)		\
-	for (iter = gen8_pdpe_index(start);				\
-	     length > 0 && iter < i915_pdpes_per_pdp(vm) &&		\
-		     (pd = i915_pd_entry(pdp, iter), true);		\
-	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT);	\
-		    temp = min(temp - start, length);			\
-		    start += temp, length -= temp; }), ++iter)
-
-#define gen8_for_each_pml4e(pdp, pml4, start, length, iter)		\
-	for (iter = gen8_pml4e_index(start);				\
-	     length > 0 && iter < GEN8_PML4ES_PER_PML4 &&		\
-		     (pdp = i915_pdp_entry(pml4, iter), true);		\
-	     ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT);	\
-		    temp = min(temp - start, length);			\
-		    start += temp, length -= temp; }), ++iter)
-
-static inline u32 gen8_pte_index(u64 address)
-{
-	return i915_pte_index(address, GEN8_PDE_SHIFT);
-}
-
-static inline u32 gen8_pde_index(u64 address)
-{
-	return i915_pde_index(address, GEN8_PDE_SHIFT);
-}
-
-static inline u32 gen8_pdpe_index(u64 address)
-{
-	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
-}
-
-static inline u32 gen8_pml4e_index(u64 address)
-{
-	return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
-}
-
-static inline u64 gen8_pte_count(u64 address, u64 length)
-{
-	return i915_pte_count(address, length, GEN8_PDE_SHIFT);
-}
-
 static inline dma_addr_t
 i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
 {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2019-07-07 21:02 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
2019-07-07 21:00 ` [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch Chris Wilson
2019-07-09 12:24   ` Mika Kuoppala
2019-07-09 12:29     ` Chris Wilson
2019-07-09 12:41       ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 02/11] drm/i915/gtt: Wrap page_table with page_directory Chris Wilson
2019-07-09 14:43   ` Mika Kuoppala
2019-07-09 14:46     ` Chris Wilson
2019-07-07 21:00 ` [PATCH 03/11] drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc Chris Wilson
2019-07-09 14:59   ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth Chris Wilson
2019-07-10  8:17   ` Mika Kuoppala
2019-07-10  8:25     ` Chris Wilson
2019-07-10 14:25       ` Mika Kuoppala
2019-07-10 14:35         ` Chris Wilson
2019-07-10 14:50           ` Mika Kuoppala
2019-07-10 15:03             ` Chris Wilson
2019-07-10 15:11               ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels Chris Wilson
2019-07-09 15:21   ` Chris Wilson
2019-07-10  9:24   ` Mika Kuoppala
2019-07-10  9:28     ` Chris Wilson
2019-07-10 13:49   ` Mika Kuoppala
2019-07-10 13:55     ` Chris Wilson
2019-07-10 14:55     ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 06/11] drm/i915/gtt: Convert vm->scratch into an array Chris Wilson
2019-07-10 14:18   ` Mika Kuoppala
2019-07-10 14:28     ` Chris Wilson
2019-07-10 14:53       ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 07/11] drm/i915/gtt: Use NULL to encode scratch shadow entries Chris Wilson
2019-07-10 16:21   ` Mika Kuoppala
2019-07-10 17:28     ` Chris Wilson
2019-07-07 21:00 ` [PATCH 08/11] drm/i915/gtt: Recursive cleanup for gen8 Chris Wilson
2019-07-07 21:00 ` [PATCH 09/11] drm/i915/gtt: Recursive ppgtt clear " Chris Wilson
2019-07-07 21:00 ` [PATCH 10/11] drm/i915/gtt: Recursive ppgtt alloc " Chris Wilson
2019-07-07 21:00 ` Chris Wilson [this message]
2019-07-07 21:41 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/11] drm/i915/gtt: Use shallow dma pages for scratch Patchwork
2019-07-07 21:46 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-07-07 22:00 ` ✓ Fi.CI.BAT: success " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190707210024.26192-12-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.