All of lore.kernel.org
 help / color / mirror / Atom feed
* Refactor GTT recursion to be ... recursion
@ 2019-07-07 21:00 Chris Wilson
  2019-07-07 21:00 ` [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch Chris Wilson
                   ` (13 more replies)
  0 siblings, 14 replies; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

After Mika tidied up the structs such that we use the same routines
everywhere on gen8, take the next small step so that the redudant
routines are reduced by simple recursion.
-Chris


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
@ 2019-07-07 21:00 ` Chris Wilson
  2019-07-09 12:24   ` Mika Kuoppala
  2019-07-07 21:00 ` [PATCH 02/11] drm/i915/gtt: Wrap page_table with page_directory Chris Wilson
                   ` (12 subsequent siblings)
  13 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

We only use the dma pages for scratch, and so do not need to allocate
the extra storage for the shadow page directory.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 192 ++++++++++++----------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   6 +-
 2 files changed, 85 insertions(+), 113 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 236c964dd761..937236913e70 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -594,25 +594,17 @@ static void cleanup_page_dma(struct i915_address_space *vm,
 
 #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
 
-#define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
-#define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
+#define fill_px(px, v) fill_page_dma(px_base(px), (v))
+#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v))
 
-static void fill_page_dma(struct i915_address_space *vm,
-			  struct i915_page_dma *p,
-			  const u64 val)
+static void fill_page_dma(struct i915_page_dma *p, const u64 val)
 {
-	u64 * const vaddr = kmap_atomic(p->page);
-
-	memset64(vaddr, val, PAGE_SIZE / sizeof(val));
-
-	kunmap_atomic(vaddr);
+	kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES));
 }
 
-static void fill_page_dma_32(struct i915_address_space *vm,
-			     struct i915_page_dma *p,
-			     const u32 v)
+static void fill_page_dma_32(struct i915_page_dma *p, const u32 v)
 {
-	fill_page_dma(vm, p, (u64)v << 32 | v);
+	fill_page_dma(p, (u64)v << 32 | v);
 }
 
 static int
@@ -687,6 +679,21 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
 	__free_pages(p->page, order);
 }
 
+static void free_scratch(struct i915_address_space *vm)
+{
+	if (!vm->scratch_page.daddr) /* set to 0 on clones */
+		return;
+
+	if (vm->scratch_pdp.daddr)
+		cleanup_page_dma(vm, &vm->scratch_pdp);
+	if (vm->scratch_pd.daddr)
+		cleanup_page_dma(vm, &vm->scratch_pd);
+	if (vm->scratch_pt.daddr)
+		cleanup_page_dma(vm, &vm->scratch_pt);
+
+	cleanup_scratch_page(vm);
+}
+
 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 {
 	struct i915_page_table *pt;
@@ -711,18 +718,6 @@ static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
 	kfree(pt);
 }
 
-static void gen8_initialize_pt(struct i915_address_space *vm,
-			       struct i915_page_table *pt)
-{
-	fill_px(vm, pt, vm->scratch_pte);
-}
-
-static void gen6_initialize_pt(struct i915_address_space *vm,
-			       struct i915_page_table *pt)
-{
-	fill32_px(vm, pt, vm->scratch_pte);
-}
-
 static struct i915_page_directory *__alloc_pd(void)
 {
 	struct i915_page_directory *pd;
@@ -765,9 +760,11 @@ static void free_pd(struct i915_address_space *vm,
 	kfree(pd);
 }
 
-#define init_pd(vm, pd, to) {					\
-	fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \
-	memset_p((pd)->entry, (to), 512);				\
+static void init_pd(struct i915_page_directory *pd,
+		    struct i915_page_dma *scratch)
+{
+	fill_px(pd, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC));
+	memset_p(pd->entry, scratch, 512);
 }
 
 static inline void
@@ -869,12 +866,11 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 	u32 pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
-		GEM_BUG_ON(pt == vm->scratch_pt);
+		GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
 
 		atomic_inc(&pt->used);
 		gen8_ppgtt_clear_pt(vm, pt, start, length);
-		if (release_pd_entry(pd, pde, &pt->used,
-				     px_base(vm->scratch_pt)))
+		if (release_pd_entry(pd, pde, &pt->used, &vm->scratch_pt))
 			free_pt(vm, pt);
 	}
 }
@@ -890,12 +886,11 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 	unsigned int pdpe;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		GEM_BUG_ON(pd == vm->scratch_pd);
+		GEM_BUG_ON(px_base(pd) == &vm->scratch_pd);
 
 		atomic_inc(&pd->used);
 		gen8_ppgtt_clear_pd(vm, pd, start, length);
-		if (release_pd_entry(pdp, pdpe, &pd->used,
-				     px_base(vm->scratch_pd)))
+		if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd))
 			free_pd(vm, pd);
 	}
 }
@@ -921,12 +916,11 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		GEM_BUG_ON(pdp == vm->scratch_pdp);
+		GEM_BUG_ON(px_base(pdp) == &vm->scratch_pdp);
 
 		atomic_inc(&pdp->used);
 		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
-		if (release_pd_entry(pml4, pml4e, &pdp->used,
-				     px_base(vm->scratch_pdp)))
+		if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp))
 			free_pd(vm, pdp);
 	}
 }
@@ -1181,7 +1175,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
 	int i;
 
 	for (i = 0; i < I915_PDES; i++) {
-		if (pd->entry[i] != vm->scratch_pt)
+		if (pd->entry[i] != &vm->scratch_pt)
 			free_pt(vm, pd->entry[i]);
 	}
 }
@@ -1218,37 +1212,35 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 				I915_CACHE_LLC,
 				vm->has_read_only);
 
-	vm->scratch_pt = alloc_pt(vm);
-	if (IS_ERR(vm->scratch_pt)) {
-		ret = PTR_ERR(vm->scratch_pt);
+	if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) {
+		ret = -ENOMEM;
 		goto free_scratch_page;
 	}
+	fill_page_dma(&vm->scratch_pt, vm->scratch_pte);
 
-	vm->scratch_pd = alloc_pd(vm);
-	if (IS_ERR(vm->scratch_pd)) {
-		ret = PTR_ERR(vm->scratch_pd);
+	if (unlikely(setup_page_dma(vm, &vm->scratch_pd))) {
+		ret = -ENOMEM;
 		goto free_pt;
 	}
+	fill_page_dma(&vm->scratch_pd,
+		      gen8_pde_encode(vm->scratch_pd.daddr, I915_CACHE_LLC));
 
 	if (i915_vm_is_4lvl(vm)) {
-		vm->scratch_pdp = alloc_pd(vm);
-		if (IS_ERR(vm->scratch_pdp)) {
-			ret = PTR_ERR(vm->scratch_pdp);
+		if (unlikely(setup_page_dma(vm, &vm->scratch_pdp))) {
+			ret = -ENOMEM;
 			goto free_pd;
 		}
+		fill_page_dma(&vm->scratch_pdp,
+			      gen8_pde_encode(vm->scratch_pdp.daddr,
+					      I915_CACHE_LLC));
 	}
 
-	gen8_initialize_pt(vm, vm->scratch_pt);
-	init_pd(vm, vm->scratch_pd, vm->scratch_pt);
-	if (i915_vm_is_4lvl(vm))
-		init_pd(vm, vm->scratch_pdp, vm->scratch_pd);
-
 	return 0;
 
 free_pd:
-	free_pd(vm, vm->scratch_pd);
+	cleanup_page_dma(vm, &vm->scratch_pd);
 free_pt:
-	free_pt(vm, vm->scratch_pt);
+	cleanup_page_dma(vm, &vm->scratch_pt);
 free_scratch_page:
 	cleanup_scratch_page(vm);
 
@@ -1292,18 +1284,6 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 	return 0;
 }
 
-static void gen8_free_scratch(struct i915_address_space *vm)
-{
-	if (!vm->scratch_page.daddr)
-		return;
-
-	if (i915_vm_is_4lvl(vm))
-		free_pd(vm, vm->scratch_pdp);
-	free_pd(vm, vm->scratch_pd);
-	free_pt(vm, vm->scratch_pt);
-	cleanup_scratch_page(vm);
-}
-
 static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
 				    struct i915_page_directory *pdp)
 {
@@ -1311,7 +1291,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
 	int i;
 
 	for (i = 0; i < pdpes; i++) {
-		if (pdp->entry[i] == vm->scratch_pd)
+		if (pdp->entry[i] == &vm->scratch_pd)
 			continue;
 
 		gen8_free_page_tables(vm, pdp->entry[i]);
@@ -1329,7 +1309,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
 	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
 		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
 
-		if (pdp == ppgtt->vm.scratch_pdp)
+		if (px_base(pdp) == &ppgtt->vm.scratch_pdp)
 			continue;
 
 		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
@@ -1351,7 +1331,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 	else
 		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
 
-	gen8_free_scratch(vm);
+	free_scratch(vm);
 }
 
 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
@@ -1367,7 +1347,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 	gen8_for_each_pde(pt, pd, start, length, pde) {
 		const int count = gen8_pte_count(start, length);
 
-		if (pt == vm->scratch_pt) {
+		if (px_base(pt) == &vm->scratch_pt) {
 			spin_unlock(&pd->lock);
 
 			pt = fetch_and_zero(&alloc);
@@ -1379,10 +1359,10 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 			}
 
 			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
-				gen8_initialize_pt(vm, pt);
+				fill_px(pt, vm->scratch_pte);
 
 			spin_lock(&pd->lock);
-			if (pd->entry[pde] == vm->scratch_pt) {
+			if (pd->entry[pde] == &vm->scratch_pt) {
 				set_pd_entry(pd, pde, pt);
 			} else {
 				alloc = pt;
@@ -1414,7 +1394,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 
 	spin_lock(&pdp->lock);
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		if (pd == vm->scratch_pd) {
+		if (px_base(pd) == &vm->scratch_pd) {
 			spin_unlock(&pdp->lock);
 
 			pd = fetch_and_zero(&alloc);
@@ -1425,10 +1405,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 				goto unwind;
 			}
 
-			init_pd(vm, pd, vm->scratch_pt);
+			init_pd(pd, &vm->scratch_pt);
 
 			spin_lock(&pdp->lock);
-			if (pdp->entry[pdpe] == vm->scratch_pd) {
+			if (pdp->entry[pdpe] == &vm->scratch_pd) {
 				set_pd_entry(pdp, pdpe, pd);
 			} else {
 				alloc = pd;
@@ -1449,7 +1429,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 	goto out;
 
 unwind_pd:
-	if (release_pd_entry(pdp, pdpe, &pd->used, px_base(vm->scratch_pd)))
+	if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd))
 		free_pd(vm, pd);
 unwind:
 	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
@@ -1478,7 +1458,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 
 	spin_lock(&pml4->lock);
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (pdp == vm->scratch_pdp) {
+		if (px_base(pdp) == &vm->scratch_pdp) {
 			spin_unlock(&pml4->lock);
 
 			pdp = fetch_and_zero(&alloc);
@@ -1489,10 +1469,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 				goto unwind;
 			}
 
-			init_pd(vm, pdp, vm->scratch_pd);
+			init_pd(pdp, &vm->scratch_pd);
 
 			spin_lock(&pml4->lock);
-			if (pml4->entry[pml4e] == vm->scratch_pdp) {
+			if (pml4->entry[pml4e] == &vm->scratch_pdp) {
 				set_pd_entry(pml4, pml4e, pdp);
 			} else {
 				alloc = pdp;
@@ -1513,7 +1493,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 	goto out;
 
 unwind_pdp:
-	if (release_pd_entry(pml4, pml4e, &pdp->used, px_base(vm->scratch_pdp)))
+	if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp))
 		free_pd(vm, pdp);
 unwind:
 	gen8_ppgtt_clear_4lvl(vm, from, start - from);
@@ -1537,7 +1517,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 		if (IS_ERR(pd))
 			goto unwind;
 
-		init_pd(vm, pd, vm->scratch_pt);
+		init_pd(pd, &vm->scratch_pt);
 		set_pd_entry(pdp, pdpe, pd);
 	}
 
@@ -1568,10 +1548,10 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
 
 static void init_pd_n(struct i915_address_space *vm,
 		      struct i915_page_directory *pd,
-		      struct i915_page_directory *to,
+		      struct i915_page_dma *to,
 		      const unsigned int entries)
 {
-	const u64 daddr = gen8_pde_encode(px_dma(to), I915_CACHE_LLC);
+	const u64 daddr = gen8_pde_encode(to->daddr, I915_CACHE_LLC);
 	u64 * const vaddr = kmap_atomic(pd->base.page);
 
 	memset64(vaddr, daddr, entries);
@@ -1588,7 +1568,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
 	if (i915_vm_is_4lvl(vm)) {
 		pd = alloc_pd(vm);
 		if (!IS_ERR(pd))
-			init_pd(vm, pd, vm->scratch_pdp);
+			init_pd(pd, &vm->scratch_pdp);
 
 		return pd;
 	}
@@ -1605,7 +1585,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	init_pd_n(vm, pd, vm->scratch_pd, GEN8_3LVL_PDPES);
+	init_pd_n(vm, pd, &vm->scratch_pd, GEN8_3LVL_PDPES);
 
 	return pd;
 }
@@ -1678,7 +1658,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 err_free_pd:
 	free_pd(&ppgtt->vm, ppgtt->pd);
 err_free_scratch:
-	gen8_free_scratch(&ppgtt->vm);
+	free_scratch(&ppgtt->vm);
 err_free:
 	kfree(ppgtt);
 	return ERR_PTR(err);
@@ -1763,7 +1743,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 		const unsigned int count = min(num_entries, GEN6_PTES - pte);
 		gen6_pte_t *vaddr;
 
-		GEM_BUG_ON(pt == vm->scratch_pt);
+		GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
 
 		num_entries -= count;
 
@@ -1800,7 +1780,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct sgt_dma iter = sgt_dma(vma);
 	gen6_pte_t *vaddr;
 
-	GEM_BUG_ON(i915_pt_entry(pd, act_pt) == vm->scratch_pt);
+	GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch_pt);
 
 	vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
 	do {
@@ -1845,7 +1825,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 	gen6_for_each_pde(pt, pd, start, length, pde) {
 		const unsigned int count = gen6_pte_count(start, length);
 
-		if (pt == vm->scratch_pt) {
+		if (px_base(pt) == &vm->scratch_pt) {
 			spin_unlock(&pd->lock);
 
 			pt = fetch_and_zero(&alloc);
@@ -1856,10 +1836,10 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 				goto unwind_out;
 			}
 
-			gen6_initialize_pt(vm, pt);
+			fill32_px(pt, vm->scratch_pte);
 
 			spin_lock(&pd->lock);
-			if (pd->entry[pde] == vm->scratch_pt) {
+			if (pd->entry[pde] == &vm->scratch_pt) {
 				pd->entry[pde] = pt;
 				if (i915_vma_is_bound(ppgtt->vma,
 						      I915_VMA_GLOBAL_BIND)) {
@@ -1908,26 +1888,18 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
 					 I915_CACHE_NONE,
 					 PTE_READ_ONLY);
 
-	vm->scratch_pt = alloc_pt(vm);
-	if (IS_ERR(vm->scratch_pt)) {
+	if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) {
 		cleanup_scratch_page(vm);
-		return PTR_ERR(vm->scratch_pt);
+		return -ENOMEM;
 	}
-
-	gen6_initialize_pt(vm, vm->scratch_pt);
+	fill_page_dma_32(&vm->scratch_pt, vm->scratch_pte);
 
 	gen6_for_all_pdes(unused, pd, pde)
-		pd->entry[pde] = vm->scratch_pt;
+		pd->entry[pde] = &vm->scratch_pt;
 
 	return 0;
 }
 
-static void gen6_ppgtt_free_scratch(struct i915_address_space *vm)
-{
-	free_pt(vm, vm->scratch_pt);
-	cleanup_scratch_page(vm);
-}
-
 static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
 {
 	struct i915_page_directory * const pd = ppgtt->base.pd;
@@ -1935,7 +1907,7 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
 	u32 pde;
 
 	gen6_for_all_pdes(pt, pd, pde)
-		if (pt != ppgtt->base.vm.scratch_pt)
+		if (px_base(pt) != &ppgtt->base.vm.scratch_pt)
 			free_pt(&ppgtt->base.vm, pt);
 }
 
@@ -1950,7 +1922,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	gen6_ppgtt_free_pd(ppgtt);
-	gen6_ppgtt_free_scratch(vm);
+	free_scratch(vm);
 	kfree(ppgtt->base.pd);
 }
 
@@ -1993,7 +1965,7 @@ static void pd_vma_unbind(struct i915_vma *vma)
 {
 	struct gen6_ppgtt *ppgtt = vma->private;
 	struct i915_page_directory * const pd = ppgtt->base.pd;
-	struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt;
+	struct i915_page_dma * const scratch = &ppgtt->base.vm.scratch_pt;
 	struct i915_page_table *pt;
 	unsigned int pde;
 
@@ -2002,11 +1974,11 @@ static void pd_vma_unbind(struct i915_vma *vma)
 
 	/* Free all no longer used page tables */
 	gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
-		if (atomic_read(&pt->used) || pt == scratch_pt)
+		if (px_base(pt) == scratch || atomic_read(&pt->used))
 			continue;
 
 		free_pt(&ppgtt->base.vm, pt);
-		pd->entry[pde] = scratch_pt;
+		pd->entry[pde] = scratch;
 	}
 
 	ppgtt->scan_for_unused_pt = false;
@@ -2148,7 +2120,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 	return &ppgtt->base;
 
 err_scratch:
-	gen6_ppgtt_free_scratch(&ppgtt->base.vm);
+	free_scratch(&ppgtt->base.vm);
 err_pd:
 	kfree(ppgtt->base.pd);
 err_free:
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 57a68ef4eda7..860850411a1b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -304,9 +304,9 @@ struct i915_address_space {
 	u64 scratch_pte;
 	int scratch_order;
 	struct i915_page_dma scratch_page;
-	struct i915_page_table *scratch_pt;
-	struct i915_page_directory *scratch_pd;
-	struct i915_page_directory *scratch_pdp; /* GEN8+ & 48b PPGTT */
+	struct i915_page_dma scratch_pt;
+	struct i915_page_dma scratch_pd;
+	struct i915_page_dma scratch_pdp; /* GEN8+ & 48b PPGTT */
 
 	/**
 	 * List of vma currently bound.
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 02/11] drm/i915/gtt: Wrap page_table with page_directory
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
  2019-07-07 21:00 ` [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch Chris Wilson
@ 2019-07-07 21:00 ` Chris Wilson
  2019-07-09 14:43   ` Mika Kuoppala
  2019-07-07 21:00 ` [PATCH 03/11] drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc Chris Wilson
                   ` (11 subsequent siblings)
  13 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

The page directory extends the page table with the shadow entries. Make
the page directory struct embed the page table for easier code reuse.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c |   2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 100 ++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_gtt.h        |  31 +++++--
 3 files changed, 70 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index b33cfc56f623..9163b5238082 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1503,7 +1503,7 @@ static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt)
 
 	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-	*cs++ = ppgtt->pd->base.ggtt_offset << 10;
+	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
 
 	intel_ring_advance(rq, cs);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 937236913e70..1fa93f56792e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -107,6 +107,8 @@
  *
  */
 
+#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
+
 static int
 i915_get_ggtt_vma_pages(struct i915_vma *vma);
 
@@ -712,28 +714,17 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 	return pt;
 }
 
-static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
-{
-	cleanup_page_dma(vm, &pt->base);
-	kfree(pt);
-}
-
 static struct i915_page_directory *__alloc_pd(void)
 {
 	struct i915_page_directory *pd;
 
 	pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
-
 	if (unlikely(!pd))
 		return NULL;
 
-	memset(&pd->base, 0, sizeof(pd->base));
-	atomic_set(&pd->used, 0);
+	atomic_set(px_used(pd), 0);
 	spin_lock_init(&pd->lock);
 
-	/* for safety */
-	pd->entry[0] = NULL;
-
 	return pd;
 }
 
@@ -745,7 +736,7 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 	if (unlikely(!pd))
 		return ERR_PTR(-ENOMEM);
 
-	if (unlikely(setup_page_dma(vm, &pd->base))) {
+	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
 		kfree(pd);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -753,13 +744,14 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 	return pd;
 }
 
-static void free_pd(struct i915_address_space *vm,
-		    struct i915_page_directory *pd)
+static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
 {
-	cleanup_page_dma(vm, &pd->base);
+	cleanup_page_dma(vm, pd);
 	kfree(pd);
 }
 
+#define free_px(vm, px) free_pd(vm, px_base(px))
+
 static void init_pd(struct i915_page_directory *pd,
 		    struct i915_page_dma *scratch)
 {
@@ -784,9 +776,9 @@ __set_pd_entry(struct i915_page_directory * const pd,
 	       struct i915_page_dma * const to,
 	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
 {
-	GEM_BUG_ON(atomic_read(&pd->used) > 512);
+	GEM_BUG_ON(atomic_read(px_used(pd)) > 512);
 
-	atomic_inc(&pd->used);
+	atomic_inc(px_used(pd));
 	pd->entry[pde] = to;
 	write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
 }
@@ -797,11 +789,11 @@ __clear_pd_entry(struct i915_page_directory * const pd,
 		 struct i915_page_dma * const to,
 		 u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
 {
-	GEM_BUG_ON(atomic_read(&pd->used) == 0);
+	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
 
 	write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
 	pd->entry[pde] = to;
-	atomic_dec(&pd->used);
+	atomic_dec(px_used(pd));
 }
 
 #define set_pd_entry(pd, pde, to) \
@@ -813,13 +805,13 @@ __clear_pd_entry(struct i915_page_directory * const pd,
 static bool
 release_pd_entry(struct i915_page_directory * const pd,
 		 const unsigned short pde,
-		 atomic_t *counter,
+		 struct i915_page_table * const pt,
 		 struct i915_page_dma * const scratch)
 {
 	bool free = false;
 
 	spin_lock(&pd->lock);
-	if (atomic_dec_and_test(counter)) {
+	if (atomic_dec_and_test(&pt->used)) {
 		clear_pd_entry(pd, pde, scratch);
 		free = true;
 	}
@@ -870,8 +862,8 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 
 		atomic_inc(&pt->used);
 		gen8_ppgtt_clear_pt(vm, pt, start, length);
-		if (release_pd_entry(pd, pde, &pt->used, &vm->scratch_pt))
-			free_pt(vm, pt);
+		if (release_pd_entry(pd, pde, pt, &vm->scratch_pt))
+			free_px(vm, pt);
 	}
 }
 
@@ -888,10 +880,10 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		GEM_BUG_ON(px_base(pd) == &vm->scratch_pd);
 
-		atomic_inc(&pd->used);
+		atomic_inc(px_used(pd));
 		gen8_ppgtt_clear_pd(vm, pd, start, length);
-		if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd))
-			free_pd(vm, pd);
+		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
+			free_px(vm, pd);
 	}
 }
 
@@ -918,10 +910,10 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 		GEM_BUG_ON(px_base(pdp) == &vm->scratch_pdp);
 
-		atomic_inc(&pdp->used);
+		atomic_inc(px_used(pdp));
 		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
-		if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp))
-			free_pd(vm, pdp);
+		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
+			free_px(vm, pdp);
 	}
 }
 
@@ -1176,7 +1168,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
 
 	for (i = 0; i < I915_PDES; i++) {
 		if (pd->entry[i] != &vm->scratch_pt)
-			free_pt(vm, pd->entry[i]);
+			free_pd(vm, pd->entry[i]);
 	}
 }
 
@@ -1255,9 +1247,9 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 	int i;
 
 	if (create)
-		atomic_inc(&ppgtt->pd->used); /* never remove */
+		atomic_inc(px_used(ppgtt->pd)); /* never remove */
 	else
-		atomic_dec(&ppgtt->pd->used);
+		atomic_dec(px_used(ppgtt->pd));
 
 	if (i915_vm_is_4lvl(vm)) {
 		const u64 daddr = px_dma(ppgtt->pd);
@@ -1298,7 +1290,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
 		free_pd(vm, pdp->entry[i]);
 	}
 
-	free_pd(vm, pdp);
+	free_px(vm, pdp);
 }
 
 static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
@@ -1315,7 +1307,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
 		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
 	}
 
-	free_pd(&ppgtt->vm, pml4);
+	free_px(&ppgtt->vm, pml4);
 }
 
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
@@ -1379,7 +1371,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
 out:
 	if (alloc)
-		free_pt(vm, alloc);
+		free_px(vm, alloc);
 	return ret;
 }
 
@@ -1415,7 +1407,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 				pd = pdp->entry[pdpe];
 			}
 		}
-		atomic_inc(&pd->used);
+		atomic_inc(px_used(pd));
 		spin_unlock(&pdp->lock);
 
 		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
@@ -1423,19 +1415,19 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 			goto unwind_pd;
 
 		spin_lock(&pdp->lock);
-		atomic_dec(&pd->used);
+		atomic_dec(px_used(pd));
 	}
 	spin_unlock(&pdp->lock);
 	goto out;
 
 unwind_pd:
-	if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd))
-		free_pd(vm, pd);
+	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
+		free_px(vm, pd);
 unwind:
 	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
 out:
 	if (alloc)
-		free_pd(vm, alloc);
+		free_px(vm, alloc);
 	return ret;
 }
 
@@ -1479,7 +1471,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 				pdp = pml4->entry[pml4e];
 			}
 		}
-		atomic_inc(&pdp->used);
+		atomic_inc(px_used(pdp));
 		spin_unlock(&pml4->lock);
 
 		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
@@ -1487,19 +1479,19 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 			goto unwind_pdp;
 
 		spin_lock(&pml4->lock);
-		atomic_dec(&pdp->used);
+		atomic_dec(px_used(pdp));
 	}
 	spin_unlock(&pml4->lock);
 	goto out;
 
 unwind_pdp:
-	if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp))
-		free_pd(vm, pdp);
+	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
+		free_px(vm, pdp);
 unwind:
 	gen8_ppgtt_clear_4lvl(vm, from, start - from);
 out:
 	if (alloc)
-		free_pd(vm, alloc);
+		free_px(vm, alloc);
 	return ret;
 }
 
@@ -1525,7 +1517,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 
 unwind:
 	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
-	atomic_set(&pdp->used, 0);
+	atomic_set(px_used(pdp), 0);
 	return -ENOMEM;
 }
 
@@ -1552,7 +1544,7 @@ static void init_pd_n(struct i915_address_space *vm,
 		      const unsigned int entries)
 {
 	const u64 daddr = gen8_pde_encode(to->daddr, I915_CACHE_LLC);
-	u64 * const vaddr = kmap_atomic(pd->base.page);
+	u64 * const vaddr = kmap_atomic_px(pd);
 
 	memset64(vaddr, daddr, entries);
 	kunmap_atomic(vaddr);
@@ -1580,7 +1572,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
 
 	pd->entry[GEN8_3LVL_PDPES] = NULL;
 
-	if (unlikely(setup_page_dma(vm, &pd->base))) {
+	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
 		kfree(pd);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1656,7 +1648,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 	return ppgtt;
 
 err_free_pd:
-	free_pd(&ppgtt->vm, ppgtt->pd);
+	free_px(&ppgtt->vm, ppgtt->pd);
 err_free_scratch:
 	free_scratch(&ppgtt->vm);
 err_free:
@@ -1867,7 +1859,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 	gen6_ppgtt_clear_range(vm, from, start - from);
 out:
 	if (alloc)
-		free_pt(vm, alloc);
+		free_px(vm, alloc);
 	intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
 	return ret;
 }
@@ -1908,7 +1900,7 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
 
 	gen6_for_all_pdes(pt, pd, pde)
 		if (px_base(pt) != &ppgtt->base.vm.scratch_pt)
-			free_pt(&ppgtt->base.vm, pt);
+			free_px(&ppgtt->base.vm, pt);
 }
 
 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
@@ -1949,7 +1941,7 @@ static int pd_vma_bind(struct i915_vma *vma,
 	struct i915_page_table *pt;
 	unsigned int pde;
 
-	ppgtt->base.pd->base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
+	px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
 	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
 
 	gen6_for_all_pdes(pt, ppgtt->base.pd, pde)
@@ -1977,7 +1969,7 @@ static void pd_vma_unbind(struct i915_vma *vma)
 		if (px_base(pt) == scratch || atomic_read(&pt->used))
 			continue;
 
-		free_pt(&ppgtt->base.vm, pt);
+		free_px(&ppgtt->base.vm, pt);
 		pd->entry[pde] = scratch;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 860850411a1b..48bb8c5125e3 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -240,21 +240,37 @@ struct i915_page_dma {
 	};
 };
 
-#define px_base(px) (&(px)->base)
-#define px_dma(px) (px_base(px)->daddr)
-
 struct i915_page_table {
 	struct i915_page_dma base;
 	atomic_t used;
 };
 
 struct i915_page_directory {
-	struct i915_page_dma base;
-	atomic_t used;
+	struct i915_page_table pt;
 	spinlock_t lock;
 	void *entry[512];
 };
 
+#define __px_choose_expr(x, type, expr, other) \
+	__builtin_choose_expr( \
+	__builtin_types_compatible_p(typeof(x), type) || \
+	__builtin_types_compatible_p(typeof(x), const type), \
+	({ type __x = (type)(x); expr; }), \
+	other)
+
+#define px_base(px) \
+	__px_choose_expr(px, struct i915_page_dma *, __x, \
+	__px_choose_expr(px, struct i915_page_table *, &__x->base, \
+	__px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
+	(void)0)))
+#define px_dma(px) (px_base(px)->daddr)
+
+#define px_pt(px) \
+	__px_choose_expr(px, struct i915_page_table *, __x, \
+	__px_choose_expr(px, struct i915_page_directory *, &__x->pt, \
+	(void)0))
+#define px_used(px) (&px_pt(px)->used)
+
 struct i915_vma_ops {
 	/* Map an object into an address space with the given cache flags. */
 	int (*bind_vma)(struct i915_vma *vma,
@@ -589,10 +605,9 @@ static inline u64 gen8_pte_count(u64 address, u64 length)
 static inline dma_addr_t
 i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
 {
-	struct i915_page_directory *pd;
+	struct i915_page_dma *pt = ppgtt->pd->entry[n];
 
-	pd = i915_pdp_entry(ppgtt->pd, n);
-	return px_dma(pd);
+	return px_dma(pt);
 }
 
 static inline struct i915_ggtt *
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 03/11] drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
  2019-07-07 21:00 ` [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch Chris Wilson
  2019-07-07 21:00 ` [PATCH 02/11] drm/i915/gtt: Wrap page_table with page_directory Chris Wilson
@ 2019-07-07 21:00 ` Chris Wilson
  2019-07-09 14:59   ` Mika Kuoppala
  2019-07-07 21:00 ` [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth Chris Wilson
                   ` (10 subsequent siblings)
  13 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

In preparation for refactoring the free/clear/alloc, first move the code
around so that we can avoid forward declarations in the next set of
patches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 673 ++++++++++++++--------------
 1 file changed, 337 insertions(+), 336 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1fa93f56792e..da4db76ce054 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -831,6 +831,104 @@ static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt)
 	ppgtt->pd_dirty_engines = ALL_ENGINES;
 }
 
+static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
+{
+	struct i915_address_space *vm = &ppgtt->vm;
+	struct drm_i915_private *dev_priv = vm->i915;
+	enum vgt_g2v_type msg;
+	int i;
+
+	if (create)
+		atomic_inc(px_used(ppgtt->pd)); /* never remove */
+	else
+		atomic_dec(px_used(ppgtt->pd));
+
+	if (i915_vm_is_4lvl(vm)) {
+		const u64 daddr = px_dma(ppgtt->pd);
+
+		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
+		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
+
+		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
+				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
+	} else {
+		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
+			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
+		}
+
+		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
+				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
+	}
+
+	I915_WRITE(vgtif_reg(g2v_notify), msg);
+
+	return 0;
+}
+
+static void gen8_free_page_tables(struct i915_address_space *vm,
+				  struct i915_page_directory *pd)
+{
+	int i;
+
+	for (i = 0; i < I915_PDES; i++) {
+		if (pd->entry[i] != &vm->scratch_pt)
+			free_pd(vm, pd->entry[i]);
+	}
+}
+
+static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
+				    struct i915_page_directory *pdp)
+{
+	const unsigned int pdpes = i915_pdpes_per_pdp(vm);
+	int i;
+
+	for (i = 0; i < pdpes; i++) {
+		if (pdp->entry[i] == &vm->scratch_pd)
+			continue;
+
+		gen8_free_page_tables(vm, pdp->entry[i]);
+		free_pd(vm, pdp->entry[i]);
+	}
+
+	free_px(vm, pdp);
+}
+
+static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
+{
+	struct i915_page_directory * const pml4 = ppgtt->pd;
+	int i;
+
+	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
+		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
+
+		if (px_base(pdp) == &ppgtt->vm.scratch_pdp)
+			continue;
+
+		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
+	}
+
+	free_px(&ppgtt->vm, pml4);
+}
+
+static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+{
+	struct drm_i915_private *i915 = vm->i915;
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
+	if (intel_vgpu_active(i915))
+		gen8_ppgtt_notify_vgt(ppgtt, false);
+
+	if (i915_vm_is_4lvl(vm))
+		gen8_ppgtt_cleanup_4lvl(ppgtt);
+	else
+		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
+
+	free_scratch(vm);
+}
+
 /* Removes entries from a single page table, releasing it if it's empty.
  * Caller can use the return value to update higher-level entries.
  */
@@ -917,95 +1015,265 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 	}
 }
 
-static inline struct sgt_dma {
-	struct scatterlist *sg;
-	dma_addr_t dma, max;
-} sgt_dma(struct i915_vma *vma) {
-	struct scatterlist *sg = vma->pages->sgl;
-	dma_addr_t addr = sg_dma_address(sg);
-	return (struct sgt_dma) { sg, addr, addr + sg->length };
-}
-
-struct gen8_insert_pte {
-	u16 pml4e;
-	u16 pdpe;
-	u16 pde;
-	u16 pte;
-};
 
-static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
+static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
+			       struct i915_page_directory *pd,
+			       u64 start, u64 length)
 {
-	return (struct gen8_insert_pte) {
-		 gen8_pml4e_index(start),
-		 gen8_pdpe_index(start),
-		 gen8_pde_index(start),
-		 gen8_pte_index(start),
-	};
-}
+	struct i915_page_table *pt, *alloc = NULL;
+	u64 from = start;
+	unsigned int pde;
+	int ret = 0;
 
-static __always_inline bool
-gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
-			      struct i915_page_directory *pdp,
-			      struct sgt_dma *iter,
-			      struct gen8_insert_pte *idx,
-			      enum i915_cache_level cache_level,
-			      u32 flags)
-{
-	struct i915_page_directory *pd;
-	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
-	gen8_pte_t *vaddr;
-	bool ret;
+	spin_lock(&pd->lock);
+	gen8_for_each_pde(pt, pd, start, length, pde) {
+		const int count = gen8_pte_count(start, length);
 
-	GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-	pd = i915_pd_entry(pdp, idx->pdpe);
-	vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
-	do {
-		vaddr[idx->pte] = pte_encode | iter->dma;
+		if (px_base(pt) == &vm->scratch_pt) {
+			spin_unlock(&pd->lock);
 
-		iter->dma += I915_GTT_PAGE_SIZE;
-		if (iter->dma >= iter->max) {
-			iter->sg = __sg_next(iter->sg);
-			if (!iter->sg) {
-				ret = false;
-				break;
+			pt = fetch_and_zero(&alloc);
+			if (!pt)
+				pt = alloc_pt(vm);
+			if (IS_ERR(pt)) {
+				ret = PTR_ERR(pt);
+				goto unwind;
 			}
 
-			iter->dma = sg_dma_address(iter->sg);
-			iter->max = iter->dma + iter->sg->length;
+			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
+				fill_px(pt, vm->scratch_pte);
+
+			spin_lock(&pd->lock);
+			if (pd->entry[pde] == &vm->scratch_pt) {
+				set_pd_entry(pd, pde, pt);
+			} else {
+				alloc = pt;
+				pt = pd->entry[pde];
+			}
 		}
 
-		if (++idx->pte == GEN8_PTES) {
-			idx->pte = 0;
+		atomic_add(count, &pt->used);
+	}
+	spin_unlock(&pd->lock);
+	goto out;
 
-			if (++idx->pde == I915_PDES) {
-				idx->pde = 0;
+unwind:
+	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
+out:
+	if (alloc)
+		free_px(vm, alloc);
+	return ret;
+}
 
-				/* Limited by sg length for 3lvl */
-				if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
-					idx->pdpe = 0;
-					ret = true;
-					break;
-				}
+static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
+				struct i915_page_directory *pdp,
+				u64 start, u64 length)
+{
+	struct i915_page_directory *pd, *alloc = NULL;
+	u64 from = start;
+	unsigned int pdpe;
+	int ret = 0;
 
-				GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-				pd = pdp->entry[idx->pdpe];
+	spin_lock(&pdp->lock);
+	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
+		if (px_base(pd) == &vm->scratch_pd) {
+			spin_unlock(&pdp->lock);
+
+			pd = fetch_and_zero(&alloc);
+			if (!pd)
+				pd = alloc_pd(vm);
+			if (IS_ERR(pd)) {
+				ret = PTR_ERR(pd);
+				goto unwind;
 			}
 
-			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+			init_pd(pd, &vm->scratch_pt);
+
+			spin_lock(&pdp->lock);
+			if (pdp->entry[pdpe] == &vm->scratch_pd) {
+				set_pd_entry(pdp, pdpe, pd);
+			} else {
+				alloc = pd;
+				pd = pdp->entry[pdpe];
+			}
 		}
-	} while (1);
-	kunmap_atomic(vaddr);
+		atomic_inc(px_used(pd));
+		spin_unlock(&pdp->lock);
+
+		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
+		if (unlikely(ret))
+			goto unwind_pd;
+
+		spin_lock(&pdp->lock);
+		atomic_dec(px_used(pd));
+	}
+	spin_unlock(&pdp->lock);
+	goto out;
 
+unwind_pd:
+	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
+		free_px(vm, pd);
+unwind:
+	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
+out:
+	if (alloc)
+		free_px(vm, alloc);
 	return ret;
 }
 
-static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
-				   struct i915_vma *vma,
-				   enum i915_cache_level cache_level,
-				   u32 flags)
+static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
+				 u64 start, u64 length)
 {
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	return gen8_ppgtt_alloc_pdp(vm,
+				    i915_vm_to_ppgtt(vm)->pd, start, length);
+}
+
+static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
+				 u64 start, u64 length)
+{
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_page_directory * const pml4 = ppgtt->pd;
+	struct i915_page_directory *pdp, *alloc = NULL;
+	u64 from = start;
+	int ret = 0;
+	u32 pml4e;
+
+	spin_lock(&pml4->lock);
+	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
+		if (px_base(pdp) == &vm->scratch_pdp) {
+			spin_unlock(&pml4->lock);
+
+			pdp = fetch_and_zero(&alloc);
+			if (!pdp)
+				pdp = alloc_pd(vm);
+			if (IS_ERR(pdp)) {
+				ret = PTR_ERR(pdp);
+				goto unwind;
+			}
+
+			init_pd(pdp, &vm->scratch_pd);
+
+			spin_lock(&pml4->lock);
+			if (pml4->entry[pml4e] == &vm->scratch_pdp) {
+				set_pd_entry(pml4, pml4e, pdp);
+			} else {
+				alloc = pdp;
+				pdp = pml4->entry[pml4e];
+			}
+		}
+		atomic_inc(px_used(pdp));
+		spin_unlock(&pml4->lock);
+
+		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
+		if (unlikely(ret))
+			goto unwind_pdp;
+
+		spin_lock(&pml4->lock);
+		atomic_dec(px_used(pdp));
+	}
+	spin_unlock(&pml4->lock);
+	goto out;
+
+unwind_pdp:
+	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
+		free_px(vm, pdp);
+unwind:
+	gen8_ppgtt_clear_4lvl(vm, from, start - from);
+out:
+	if (alloc)
+		free_px(vm, alloc);
+	return ret;
+}
+
+static inline struct sgt_dma {
+	struct scatterlist *sg;
+	dma_addr_t dma, max;
+} sgt_dma(struct i915_vma *vma) {
+	struct scatterlist *sg = vma->pages->sgl;
+	dma_addr_t addr = sg_dma_address(sg);
+	return (struct sgt_dma) { sg, addr, addr + sg->length };
+}
+
+struct gen8_insert_pte {
+	u16 pml4e;
+	u16 pdpe;
+	u16 pde;
+	u16 pte;
+};
+
+static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
+{
+	return (struct gen8_insert_pte) {
+		 gen8_pml4e_index(start),
+		 gen8_pdpe_index(start),
+		 gen8_pde_index(start),
+		 gen8_pte_index(start),
+	};
+}
+
+static __always_inline bool
+gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
+			      struct i915_page_directory *pdp,
+			      struct sgt_dma *iter,
+			      struct gen8_insert_pte *idx,
+			      enum i915_cache_level cache_level,
+			      u32 flags)
+{
+	struct i915_page_directory *pd;
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+	gen8_pte_t *vaddr;
+	bool ret;
+
+	GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
+	pd = i915_pd_entry(pdp, idx->pdpe);
+	vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+	do {
+		vaddr[idx->pte] = pte_encode | iter->dma;
+
+		iter->dma += I915_GTT_PAGE_SIZE;
+		if (iter->dma >= iter->max) {
+			iter->sg = __sg_next(iter->sg);
+			if (!iter->sg) {
+				ret = false;
+				break;
+			}
+
+			iter->dma = sg_dma_address(iter->sg);
+			iter->max = iter->dma + iter->sg->length;
+		}
+
+		if (++idx->pte == GEN8_PTES) {
+			idx->pte = 0;
+
+			if (++idx->pde == I915_PDES) {
+				idx->pde = 0;
+
+				/* Limited by sg length for 3lvl */
+				if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
+					idx->pdpe = 0;
+					ret = true;
+					break;
+				}
+
+				GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
+				pd = pdp->entry[idx->pdpe];
+			}
+
+			kunmap_atomic(vaddr);
+			vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+		}
+	} while (1);
+	kunmap_atomic(vaddr);
+
+	return ret;
+}
+
+static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
+				   struct i915_vma *vma,
+				   enum i915_cache_level cache_level,
+				   u32 flags)
+{
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct sgt_dma iter = sgt_dma(vma);
 	struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
 
@@ -1161,17 +1429,6 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
 	}
 }
 
-static void gen8_free_page_tables(struct i915_address_space *vm,
-				  struct i915_page_directory *pd)
-{
-	int i;
-
-	for (i = 0; i < I915_PDES; i++) {
-		if (pd->entry[i] != &vm->scratch_pt)
-			free_pd(vm, pd->entry[i]);
-	}
-}
-
 static int gen8_init_scratch(struct i915_address_space *vm)
 {
 	int ret;
@@ -1239,262 +1496,6 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 	return ret;
 }
 
-static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
-{
-	struct i915_address_space *vm = &ppgtt->vm;
-	struct drm_i915_private *dev_priv = vm->i915;
-	enum vgt_g2v_type msg;
-	int i;
-
-	if (create)
-		atomic_inc(px_used(ppgtt->pd)); /* never remove */
-	else
-		atomic_dec(px_used(ppgtt->pd));
-
-	if (i915_vm_is_4lvl(vm)) {
-		const u64 daddr = px_dma(ppgtt->pd);
-
-		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
-		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
-
-		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
-				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
-	} else {
-		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
-			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
-
-			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
-			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
-		}
-
-		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
-				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
-	}
-
-	I915_WRITE(vgtif_reg(g2v_notify), msg);
-
-	return 0;
-}
-
-static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
-				    struct i915_page_directory *pdp)
-{
-	const unsigned int pdpes = i915_pdpes_per_pdp(vm);
-	int i;
-
-	for (i = 0; i < pdpes; i++) {
-		if (pdp->entry[i] == &vm->scratch_pd)
-			continue;
-
-		gen8_free_page_tables(vm, pdp->entry[i]);
-		free_pd(vm, pdp->entry[i]);
-	}
-
-	free_px(vm, pdp);
-}
-
-static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
-{
-	struct i915_page_directory * const pml4 = ppgtt->pd;
-	int i;
-
-	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
-		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
-
-		if (px_base(pdp) == &ppgtt->vm.scratch_pdp)
-			continue;
-
-		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
-	}
-
-	free_px(&ppgtt->vm, pml4);
-}
-
-static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
-{
-	struct drm_i915_private *i915 = vm->i915;
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
-	if (intel_vgpu_active(i915))
-		gen8_ppgtt_notify_vgt(ppgtt, false);
-
-	if (i915_vm_is_4lvl(vm))
-		gen8_ppgtt_cleanup_4lvl(ppgtt);
-	else
-		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
-
-	free_scratch(vm);
-}
-
-static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
-			       struct i915_page_directory *pd,
-			       u64 start, u64 length)
-{
-	struct i915_page_table *pt, *alloc = NULL;
-	u64 from = start;
-	unsigned int pde;
-	int ret = 0;
-
-	spin_lock(&pd->lock);
-	gen8_for_each_pde(pt, pd, start, length, pde) {
-		const int count = gen8_pte_count(start, length);
-
-		if (px_base(pt) == &vm->scratch_pt) {
-			spin_unlock(&pd->lock);
-
-			pt = fetch_and_zero(&alloc);
-			if (!pt)
-				pt = alloc_pt(vm);
-			if (IS_ERR(pt)) {
-				ret = PTR_ERR(pt);
-				goto unwind;
-			}
-
-			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
-				fill_px(pt, vm->scratch_pte);
-
-			spin_lock(&pd->lock);
-			if (pd->entry[pde] == &vm->scratch_pt) {
-				set_pd_entry(pd, pde, pt);
-			} else {
-				alloc = pt;
-				pt = pd->entry[pde];
-			}
-		}
-
-		atomic_add(count, &pt->used);
-	}
-	spin_unlock(&pd->lock);
-	goto out;
-
-unwind:
-	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
-out:
-	if (alloc)
-		free_px(vm, alloc);
-	return ret;
-}
-
-static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
-				struct i915_page_directory *pdp,
-				u64 start, u64 length)
-{
-	struct i915_page_directory *pd, *alloc = NULL;
-	u64 from = start;
-	unsigned int pdpe;
-	int ret = 0;
-
-	spin_lock(&pdp->lock);
-	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		if (px_base(pd) == &vm->scratch_pd) {
-			spin_unlock(&pdp->lock);
-
-			pd = fetch_and_zero(&alloc);
-			if (!pd)
-				pd = alloc_pd(vm);
-			if (IS_ERR(pd)) {
-				ret = PTR_ERR(pd);
-				goto unwind;
-			}
-
-			init_pd(pd, &vm->scratch_pt);
-
-			spin_lock(&pdp->lock);
-			if (pdp->entry[pdpe] == &vm->scratch_pd) {
-				set_pd_entry(pdp, pdpe, pd);
-			} else {
-				alloc = pd;
-				pd = pdp->entry[pdpe];
-			}
-		}
-		atomic_inc(px_used(pd));
-		spin_unlock(&pdp->lock);
-
-		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
-		if (unlikely(ret))
-			goto unwind_pd;
-
-		spin_lock(&pdp->lock);
-		atomic_dec(px_used(pd));
-	}
-	spin_unlock(&pdp->lock);
-	goto out;
-
-unwind_pd:
-	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
-		free_px(vm, pd);
-unwind:
-	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
-out:
-	if (alloc)
-		free_px(vm, alloc);
-	return ret;
-}
-
-static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
-				 u64 start, u64 length)
-{
-	return gen8_ppgtt_alloc_pdp(vm,
-				    i915_vm_to_ppgtt(vm)->pd, start, length);
-}
-
-static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
-				 u64 start, u64 length)
-{
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct i915_page_directory * const pml4 = ppgtt->pd;
-	struct i915_page_directory *pdp, *alloc = NULL;
-	u64 from = start;
-	int ret = 0;
-	u32 pml4e;
-
-	spin_lock(&pml4->lock);
-	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (px_base(pdp) == &vm->scratch_pdp) {
-			spin_unlock(&pml4->lock);
-
-			pdp = fetch_and_zero(&alloc);
-			if (!pdp)
-				pdp = alloc_pd(vm);
-			if (IS_ERR(pdp)) {
-				ret = PTR_ERR(pdp);
-				goto unwind;
-			}
-
-			init_pd(pdp, &vm->scratch_pd);
-
-			spin_lock(&pml4->lock);
-			if (pml4->entry[pml4e] == &vm->scratch_pdp) {
-				set_pd_entry(pml4, pml4e, pdp);
-			} else {
-				alloc = pdp;
-				pdp = pml4->entry[pml4e];
-			}
-		}
-		atomic_inc(px_used(pdp));
-		spin_unlock(&pml4->lock);
-
-		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
-		if (unlikely(ret))
-			goto unwind_pdp;
-
-		spin_lock(&pml4->lock);
-		atomic_dec(px_used(pdp));
-	}
-	spin_unlock(&pml4->lock);
-	goto out;
-
-unwind_pdp:
-	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
-		free_px(vm, pdp);
-unwind:
-	gen8_ppgtt_clear_4lvl(vm, from, start - from);
-out:
-	if (alloc)
-		free_px(vm, alloc);
-	return ret;
-}
-
 static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 {
 	struct i915_address_space *vm = &ppgtt->vm;
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
                   ` (2 preceding siblings ...)
  2019-07-07 21:00 ` [PATCH 03/11] drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc Chris Wilson
@ 2019-07-07 21:00 ` Chris Wilson
  2019-07-10  8:17   ` Mika Kuoppala
  2019-07-07 21:00 ` [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels Chris Wilson
                   ` (9 subsequent siblings)
  13 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

This will be useful to consolidate recursive code.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
 drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index da4db76ce054..2fc60e8acd9a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1537,6 +1537,8 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
 	ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
 	ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
 	ppgtt->vm.vma_ops.clear_pages = clear_pages;
+
+	ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
 }
 
 static void init_pd_n(struct i915_address_space *vm,
@@ -2086,6 +2088,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 		return ERR_PTR(-ENOMEM);
 
 	ppgtt_init(&ppgtt->base, &i915->gt);
+	ppgtt->base.vm.top = 1;
 
 	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
 	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 48bb8c5125e3..119b6d33b266 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -323,6 +323,7 @@ struct i915_address_space {
 	struct i915_page_dma scratch_pt;
 	struct i915_page_dma scratch_pd;
 	struct i915_page_dma scratch_pdp; /* GEN8+ & 48b PPGTT */
+	int top;
 
 	/**
 	 * List of vma currently bound.
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
                   ` (3 preceding siblings ...)
  2019-07-07 21:00 ` [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth Chris Wilson
@ 2019-07-07 21:00 ` Chris Wilson
  2019-07-09 15:21   ` Chris Wilson
                     ` (2 more replies)
  2019-07-07 21:00 ` [PATCH 06/11] drm/i915/gtt: Convert vm->scratch into an array Chris Wilson
                   ` (8 subsequent siblings)
  13 siblings, 3 replies; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

The radix levels of each page directory are easily determined so replace
the numerous hardcoded constants with precomputed derived constants.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2fc60e8acd9a..271305705c1c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -868,6 +868,45 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 	return 0;
 }
 
+/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
+#define gen8_pd_shift(lvl) ((lvl) * ilog2(I915_PDES))
+#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
+#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
+#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
+
+static inline unsigned int
+gen8_pd_range(u64 addr, u64 end, int lvl, unsigned int *idx)
+{
+	const int shift = gen8_pd_shift(lvl);
+	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
+
+	GEM_BUG_ON(addr >= end);
+	end += ~mask >> gen8_pd_shift(1);
+
+	*idx = i915_pde_index(addr, shift);
+	if ((addr ^ end) & mask)
+		return I915_PDES - *idx;
+	else
+		return i915_pde_index(end, shift) - *idx;
+}
+
+static inline bool gen8_pd_subsumes(u64 addr, u64 end, int lvl)
+{
+	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
+
+	GEM_BUG_ON(addr >= end);
+	return (addr ^ end) & mask && (addr & ~mask) == 0;
+}
+
+static inline unsigned int gen8_pt_count(u64 addr, u64 end)
+{
+	GEM_BUG_ON(addr >= end);
+	if ((addr ^ end) & ~I915_PDE_MASK)
+		return I915_PDES - (addr & I915_PDE_MASK);
+	else
+		return end - addr;
+}
+
 static void gen8_free_page_tables(struct i915_address_space *vm,
 				  struct i915_page_directory *pd)
 {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 06/11] drm/i915/gtt: Convert vm->scratch into an array
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
                   ` (4 preceding siblings ...)
  2019-07-07 21:00 ` [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels Chris Wilson
@ 2019-07-07 21:00 ` Chris Wilson
  2019-07-10 14:18   ` Mika Kuoppala
  2019-07-07 21:00 ` [PATCH 07/11] drm/i915/gtt: Use NULL to encode scratch shadow entries Chris Wilson
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

Each level has its own scratch. Make the levels more obvious by forgoing
the fancy similarly names and replace them with a number. 0 is the bottom
most level, the physical page used for actual data; 1+ are the page
directories.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 206 ++++++++++++----------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  14 +-
 2 files changed, 99 insertions(+), 121 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 271305705c1c..b7882f06214a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -634,7 +634,7 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 	gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
 
 	do {
-		int order = get_order(size);
+		unsigned int order = get_order(size);
 		struct page *page;
 		dma_addr_t addr;
 
@@ -653,8 +653,8 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 		if (unlikely(!IS_ALIGNED(addr, size)))
 			goto unmap_page;
 
-		vm->scratch_page.page = page;
-		vm->scratch_page.daddr = addr;
+		vm->scratch[0].base.page = page;
+		vm->scratch[0].base.daddr = addr;
 		vm->scratch_order = order;
 		return 0;
 
@@ -673,8 +673,8 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 
 static void cleanup_scratch_page(struct i915_address_space *vm)
 {
-	struct i915_page_dma *p = &vm->scratch_page;
-	int order = vm->scratch_order;
+	struct i915_page_dma *p = px_base(&vm->scratch[0]);
+	unsigned int order = vm->scratch_order;
 
 	dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
 		       PCI_DMA_BIDIRECTIONAL);
@@ -683,15 +683,16 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
 
 static void free_scratch(struct i915_address_space *vm)
 {
-	if (!vm->scratch_page.daddr) /* set to 0 on clones */
+	int i;
+
+	if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
 		return;
 
-	if (vm->scratch_pdp.daddr)
-		cleanup_page_dma(vm, &vm->scratch_pdp);
-	if (vm->scratch_pd.daddr)
-		cleanup_page_dma(vm, &vm->scratch_pd);
-	if (vm->scratch_pt.daddr)
-		cleanup_page_dma(vm, &vm->scratch_pt);
+	for (i = 1; i <= vm->top; i++) {
+		if (!px_dma(&vm->scratch[i]))
+			break;
+		cleanup_page_dma(vm, px_base(&vm->scratch[i]));
+	}
 
 	cleanup_scratch_page(vm);
 }
@@ -753,9 +754,9 @@ static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
 #define free_px(vm, px) free_pd(vm, px_base(px))
 
 static void init_pd(struct i915_page_directory *pd,
-		    struct i915_page_dma *scratch)
+		    struct i915_page_scratch *scratch)
 {
-	fill_px(pd, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC));
+	fill_px(pd, scratch->encode);
 	memset_p(pd->entry, scratch, 512);
 }
 
@@ -783,30 +784,26 @@ __set_pd_entry(struct i915_page_directory * const pd,
 	write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
 }
 
+#define set_pd_entry(pd, pde, to) \
+	__set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode)
+
 static inline void
-__clear_pd_entry(struct i915_page_directory * const pd,
-		 const unsigned short pde,
-		 struct i915_page_dma * const to,
-		 u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
+clear_pd_entry(struct i915_page_directory * const pd,
+	       const unsigned short pde,
+	       struct i915_page_scratch * const scratch)
 {
 	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
 
-	write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
-	pd->entry[pde] = to;
+	write_dma_entry(px_base(pd), pde, scratch->encode);
+	pd->entry[pde] = scratch;
 	atomic_dec(px_used(pd));
 }
 
-#define set_pd_entry(pd, pde, to) \
-	__set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode)
-
-#define clear_pd_entry(pd, pde, to) \
-	__clear_pd_entry((pd), (pde), (to), gen8_pde_encode)
-
 static bool
 release_pd_entry(struct i915_page_directory * const pd,
 		 const unsigned short pde,
 		 struct i915_page_table * const pt,
-		 struct i915_page_dma * const scratch)
+		 struct i915_page_scratch * const scratch)
 {
 	bool free = false;
 
@@ -913,7 +910,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
 	int i;
 
 	for (i = 0; i < I915_PDES; i++) {
-		if (pd->entry[i] != &vm->scratch_pt)
+		if (pd->entry[i] != &vm->scratch[1])
 			free_pd(vm, pd->entry[i]);
 	}
 }
@@ -925,7 +922,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
 	int i;
 
 	for (i = 0; i < pdpes; i++) {
-		if (pdp->entry[i] == &vm->scratch_pd)
+		if (pdp->entry[i] == &vm->scratch[2])
 			continue;
 
 		gen8_free_page_tables(vm, pdp->entry[i]);
@@ -943,7 +940,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
 	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
 		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
 
-		if (px_base(pdp) == &ppgtt->vm.scratch_pdp)
+		if (px_base(pdp) == px_base(&ppgtt->vm.scratch[3]))
 			continue;
 
 		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
@@ -979,7 +976,9 @@ static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
 	gen8_pte_t *vaddr;
 
 	vaddr = kmap_atomic_px(pt);
-	memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries);
+	memset64(vaddr + gen8_pte_index(start),
+		 vm->scratch[0].encode,
+		 num_entries);
 	kunmap_atomic(vaddr);
 
 	GEM_BUG_ON(num_entries > atomic_read(&pt->used));
@@ -995,11 +994,11 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 	u32 pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
-		GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
+		GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
 
 		atomic_inc(&pt->used);
 		gen8_ppgtt_clear_pt(vm, pt, start, length);
-		if (release_pd_entry(pd, pde, pt, &vm->scratch_pt))
+		if (release_pd_entry(pd, pde, pt, &vm->scratch[1]))
 			free_px(vm, pt);
 	}
 }
@@ -1015,11 +1014,11 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 	unsigned int pdpe;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		GEM_BUG_ON(px_base(pd) == &vm->scratch_pd);
+		GEM_BUG_ON(px_base(pd) == px_base(&vm->scratch[2]));
 
 		atomic_inc(px_used(pd));
 		gen8_ppgtt_clear_pd(vm, pd, start, length);
-		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
+		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
 			free_px(vm, pd);
 	}
 }
@@ -1045,16 +1044,15 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		GEM_BUG_ON(px_base(pdp) == &vm->scratch_pdp);
+		GEM_BUG_ON(px_base(pdp) == px_base(&vm->scratch[3]));
 
 		atomic_inc(px_used(pdp));
 		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
-		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
+		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
 			free_px(vm, pdp);
 	}
 }
 
-
 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 			       struct i915_page_directory *pd,
 			       u64 start, u64 length)
@@ -1068,7 +1066,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 	gen8_for_each_pde(pt, pd, start, length, pde) {
 		const int count = gen8_pte_count(start, length);
 
-		if (px_base(pt) == &vm->scratch_pt) {
+		if (px_base(pt) == px_base(&vm->scratch[1])) {
 			spin_unlock(&pd->lock);
 
 			pt = fetch_and_zero(&alloc);
@@ -1080,10 +1078,10 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 			}
 
 			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
-				fill_px(pt, vm->scratch_pte);
+				fill_px(pt, vm->scratch[0].encode);
 
 			spin_lock(&pd->lock);
-			if (pd->entry[pde] == &vm->scratch_pt) {
+			if (pd->entry[pde] == &vm->scratch[1]) {
 				set_pd_entry(pd, pde, pt);
 			} else {
 				alloc = pt;
@@ -1115,7 +1113,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 
 	spin_lock(&pdp->lock);
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		if (px_base(pd) == &vm->scratch_pd) {
+		if (px_base(pd) == px_base(&vm->scratch[2])) {
 			spin_unlock(&pdp->lock);
 
 			pd = fetch_and_zero(&alloc);
@@ -1126,10 +1124,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 				goto unwind;
 			}
 
-			init_pd(pd, &vm->scratch_pt);
+			init_pd(pd, &vm->scratch[1]);
 
 			spin_lock(&pdp->lock);
-			if (pdp->entry[pdpe] == &vm->scratch_pd) {
+			if (pdp->entry[pdpe] == &vm->scratch[2]) {
 				set_pd_entry(pdp, pdpe, pd);
 			} else {
 				alloc = pd;
@@ -1150,7 +1148,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 	goto out;
 
 unwind_pd:
-	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
+	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
 		free_px(vm, pd);
 unwind:
 	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
@@ -1179,7 +1177,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 
 	spin_lock(&pml4->lock);
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (px_base(pdp) == &vm->scratch_pdp) {
+		if (px_base(pdp) == px_base(&vm->scratch[3])) {
 			spin_unlock(&pml4->lock);
 
 			pdp = fetch_and_zero(&alloc);
@@ -1190,10 +1188,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 				goto unwind;
 			}
 
-			init_pd(pdp, &vm->scratch_pd);
+			init_pd(pdp, &vm->scratch[2]);
 
 			spin_lock(&pml4->lock);
-			if (pml4->entry[pml4e] == &vm->scratch_pdp) {
+			if (pml4->entry[pml4e] == &vm->scratch[3]) {
 				set_pd_entry(pml4, pml4e, pdp);
 			} else {
 				alloc = pdp;
@@ -1214,7 +1212,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 	goto out;
 
 unwind_pdp:
-	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
+	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
 		free_px(vm, pdp);
 unwind:
 	gen8_ppgtt_clear_4lvl(vm, from, start - from);
@@ -1428,7 +1426,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 			if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
 				u16 i;
 
-				encode = vma->vm->scratch_pte;
+				encode = vma->vm->scratch[0].encode;
 				vaddr = kmap_atomic_px(i915_pt_entry(pd,
 								     idx.pde));
 
@@ -1471,6 +1469,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
 static int gen8_init_scratch(struct i915_address_space *vm)
 {
 	int ret;
+	int i;
 
 	/*
 	 * If everybody agrees to not to write into the scratch page,
@@ -1484,10 +1483,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 		GEM_BUG_ON(!clone->has_read_only);
 
 		vm->scratch_order = clone->scratch_order;
-		vm->scratch_pte = clone->scratch_pte;
-		vm->scratch_pt  = clone->scratch_pt;
-		vm->scratch_pd  = clone->scratch_pd;
-		vm->scratch_pdp = clone->scratch_pdp;
+		memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
+		px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
 		return 0;
 	}
 
@@ -1495,44 +1492,25 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 	if (ret)
 		return ret;
 
-	vm->scratch_pte =
-		gen8_pte_encode(vm->scratch_page.daddr,
-				I915_CACHE_LLC,
-				vm->has_read_only);
+	vm->scratch[0].encode =
+		gen8_pte_encode(px_dma(&vm->scratch[0]),
+				I915_CACHE_LLC, vm->has_read_only);
 
-	if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) {
-		ret = -ENOMEM;
-		goto free_scratch_page;
-	}
-	fill_page_dma(&vm->scratch_pt, vm->scratch_pte);
+	for (i = 1; i <= vm->top; i++) {
+		if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
+			goto free_scratch;
 
-	if (unlikely(setup_page_dma(vm, &vm->scratch_pd))) {
-		ret = -ENOMEM;
-		goto free_pt;
-	}
-	fill_page_dma(&vm->scratch_pd,
-		      gen8_pde_encode(vm->scratch_pd.daddr, I915_CACHE_LLC));
-
-	if (i915_vm_is_4lvl(vm)) {
-		if (unlikely(setup_page_dma(vm, &vm->scratch_pdp))) {
-			ret = -ENOMEM;
-			goto free_pd;
-		}
-		fill_page_dma(&vm->scratch_pdp,
-			      gen8_pde_encode(vm->scratch_pdp.daddr,
-					      I915_CACHE_LLC));
+		fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
+		vm->scratch[i].encode =
+			gen8_pde_encode(px_dma(&vm->scratch[i]),
+					I915_CACHE_LLC);
 	}
 
 	return 0;
 
-free_pd:
-	cleanup_page_dma(vm, &vm->scratch_pd);
-free_pt:
-	cleanup_page_dma(vm, &vm->scratch_pt);
-free_scratch_page:
-	cleanup_scratch_page(vm);
-
-	return ret;
+free_scratch:
+	free_scratch(vm);
+	return -ENOMEM;
 }
 
 static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
@@ -1549,7 +1527,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 		if (IS_ERR(pd))
 			goto unwind;
 
-		init_pd(pd, &vm->scratch_pt);
+		init_pd(pd, &vm->scratch[1]);
 		set_pd_entry(pdp, pdpe, pd);
 	}
 
@@ -1582,16 +1560,15 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
 
 static void init_pd_n(struct i915_address_space *vm,
 		      struct i915_page_directory *pd,
-		      struct i915_page_dma *to,
+		      struct i915_page_scratch *scratch,
 		      const unsigned int entries)
 {
-	const u64 daddr = gen8_pde_encode(to->daddr, I915_CACHE_LLC);
 	u64 * const vaddr = kmap_atomic_px(pd);
 
-	memset64(vaddr, daddr, entries);
+	memset64(vaddr, scratch->encode, entries);
 	kunmap_atomic(vaddr);
 
-	memset_p(pd->entry, to, entries);
+	memset_p(pd->entry, scratch, entries);
 }
 
 static struct i915_page_directory *
@@ -1602,7 +1579,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
 	if (i915_vm_is_4lvl(vm)) {
 		pd = alloc_pd(vm);
 		if (!IS_ERR(pd))
-			init_pd(pd, &vm->scratch_pdp);
+			init_pd(pd, &vm->scratch[3]);
 
 		return pd;
 	}
@@ -1619,7 +1596,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	init_pd_n(vm, pd, &vm->scratch_pd, GEN8_3LVL_PDPES);
+	init_pd_n(vm, pd, &vm->scratch[2], GEN8_3LVL_PDPES);
 
 	return pd;
 }
@@ -1766,7 +1743,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 {
 	struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
 	const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
-	const gen6_pte_t scratch_pte = vm->scratch_pte;
+	const gen6_pte_t scratch_pte = vm->scratch[0].encode;
 	unsigned int pde = first_entry / GEN6_PTES;
 	unsigned int pte = first_entry % GEN6_PTES;
 	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
@@ -1777,7 +1754,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 		const unsigned int count = min(num_entries, GEN6_PTES - pte);
 		gen6_pte_t *vaddr;
 
-		GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
+		GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
 
 		num_entries -= count;
 
@@ -1814,7 +1791,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct sgt_dma iter = sgt_dma(vma);
 	gen6_pte_t *vaddr;
 
-	GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch_pt);
+	GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
 
 	vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
 	do {
@@ -1859,7 +1836,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 	gen6_for_each_pde(pt, pd, start, length, pde) {
 		const unsigned int count = gen6_pte_count(start, length);
 
-		if (px_base(pt) == &vm->scratch_pt) {
+		if (px_base(pt) == px_base(&vm->scratch[1])) {
 			spin_unlock(&pd->lock);
 
 			pt = fetch_and_zero(&alloc);
@@ -1870,10 +1847,10 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 				goto unwind_out;
 			}
 
-			fill32_px(pt, vm->scratch_pte);
+			fill32_px(pt, vm->scratch[0].encode);
 
 			spin_lock(&pd->lock);
-			if (pd->entry[pde] == &vm->scratch_pt) {
+			if (pd->entry[pde] == &vm->scratch[1]) {
 				pd->entry[pde] = pt;
 				if (i915_vma_is_bound(ppgtt->vma,
 						      I915_VMA_GLOBAL_BIND)) {
@@ -1910,26 +1887,23 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
 {
 	struct i915_address_space * const vm = &ppgtt->base.vm;
 	struct i915_page_directory * const pd = ppgtt->base.pd;
-	struct i915_page_table *unused;
-	u32 pde;
 	int ret;
 
 	ret = setup_scratch_page(vm, __GFP_HIGHMEM);
 	if (ret)
 		return ret;
 
-	vm->scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
-					 I915_CACHE_NONE,
-					 PTE_READ_ONLY);
+	vm->scratch[0].encode =
+		vm->pte_encode(px_dma(&vm->scratch[0]),
+			       I915_CACHE_NONE, PTE_READ_ONLY);
 
-	if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) {
+	if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
 		cleanup_scratch_page(vm);
 		return -ENOMEM;
 	}
-	fill_page_dma_32(&vm->scratch_pt, vm->scratch_pte);
 
-	gen6_for_all_pdes(unused, pd, pde)
-		pd->entry[pde] = &vm->scratch_pt;
+	fill32_px(&vm->scratch[1], vm->scratch[0].encode);
+	memset_p(pd->entry, &vm->scratch[1], I915_PDES);
 
 	return 0;
 }
@@ -1937,11 +1911,13 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
 static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
 {
 	struct i915_page_directory * const pd = ppgtt->base.pd;
+	struct i915_page_dma * const scratch =
+		px_base(&ppgtt->base.vm.scratch[1]);
 	struct i915_page_table *pt;
 	u32 pde;
 
 	gen6_for_all_pdes(pt, pd, pde)
-		if (px_base(pt) != &ppgtt->base.vm.scratch_pt)
+		if (px_base(pt) != scratch)
 			free_px(&ppgtt->base.vm, pt);
 }
 
@@ -1999,7 +1975,8 @@ static void pd_vma_unbind(struct i915_vma *vma)
 {
 	struct gen6_ppgtt *ppgtt = vma->private;
 	struct i915_page_directory * const pd = ppgtt->base.pd;
-	struct i915_page_dma * const scratch = &ppgtt->base.vm.scratch_pt;
+	struct i915_page_dma * const scratch =
+		px_base(&ppgtt->base.vm.scratch[1]);
 	struct i915_page_table *pt;
 	unsigned int pde;
 
@@ -2405,7 +2382,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	unsigned first_entry = start / I915_GTT_PAGE_SIZE;
 	unsigned num_entries = length / I915_GTT_PAGE_SIZE;
-	const gen8_pte_t scratch_pte = vm->scratch_pte;
+	const gen8_pte_t scratch_pte = vm->scratch[0].encode;
 	gen8_pte_t __iomem *gtt_base =
 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
@@ -2530,8 +2507,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 
-	scratch_pte = vm->scratch_pte;
-
+	scratch_pte = vm->scratch[0].encode;
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
 }
@@ -3005,8 +2981,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 		return ret;
 	}
 
-	ggtt->vm.scratch_pte =
-		ggtt->vm.pte_encode(ggtt->vm.scratch_page.daddr,
+	ggtt->vm.scratch[0].encode =
+		ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
 				    I915_CACHE_NONE, 0);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 119b6d33b266..669b204d4c13 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -240,6 +240,11 @@ struct i915_page_dma {
 	};
 };
 
+struct i915_page_scratch {
+	struct i915_page_dma base;
+	u64 encode;
+};
+
 struct i915_page_table {
 	struct i915_page_dma base;
 	atomic_t used;
@@ -260,9 +265,10 @@ struct i915_page_directory {
 
 #define px_base(px) \
 	__px_choose_expr(px, struct i915_page_dma *, __x, \
+	__px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
 	__px_choose_expr(px, struct i915_page_table *, &__x->base, \
 	__px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
-	(void)0)))
+	(void)0))))
 #define px_dma(px) (px_base(px)->daddr)
 
 #define px_pt(px) \
@@ -317,12 +323,8 @@ struct i915_address_space {
 #define VM_CLASS_GGTT 0
 #define VM_CLASS_PPGTT 1
 
-	u64 scratch_pte;
+	struct i915_page_scratch scratch[4];
 	int scratch_order;
-	struct i915_page_dma scratch_page;
-	struct i915_page_dma scratch_pt;
-	struct i915_page_dma scratch_pd;
-	struct i915_page_dma scratch_pdp; /* GEN8+ & 48b PPGTT */
 	int top;
 
 	/**
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 07/11] drm/i915/gtt: Use NULL to encode scratch shadow entries
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
                   ` (5 preceding siblings ...)
  2019-07-07 21:00 ` [PATCH 06/11] drm/i915/gtt: Convert vm->scratch into an array Chris Wilson
@ 2019-07-07 21:00 ` Chris Wilson
  2019-07-10 16:21   ` Mika Kuoppala
  2019-07-07 21:00 ` [PATCH 08/11] drm/i915/gtt: Recursive cleanup for gen8 Chris Wilson
                   ` (6 subsequent siblings)
  13 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

We can simplify our gtt walking code by comparing against NULL for
scratch entries as opposed to looking up the distinct per-level scratch
pointer.

The only caveat is to remember to protect external parties and map the
NULL to the scratch top pd.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 124 +++++++++-------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   2 +-
 2 files changed, 41 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b7882f06214a..a99b89502a90 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -596,18 +596,17 @@ static void cleanup_page_dma(struct i915_address_space *vm,
 
 #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
 
-#define fill_px(px, v) fill_page_dma(px_base(px), (v))
-#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v))
-
-static void fill_page_dma(struct i915_page_dma *p, const u64 val)
+static void
+fill_page_dma(struct i915_page_dma *p, const u64 val, unsigned int count)
 {
-	kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES));
+	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
 }
 
-static void fill_page_dma_32(struct i915_page_dma *p, const u32 v)
-{
-	fill_page_dma(p, (u64)v << 32 | v);
-}
+#define fill_px(px, v) fill_page_dma(px_base(px), (v), I915_PDES)
+#define fill32_px(px, v) do { \
+	u64 vv = lower_32_bits(v); \
+	fill_px(px, vv << 32 | vv); \
+} while (0)
 
 static int
 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
@@ -711,7 +710,6 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 	}
 
 	atomic_set(&pt->used, 0);
-
 	return pt;
 }
 
@@ -719,13 +717,11 @@ static struct i915_page_directory *__alloc_pd(void)
 {
 	struct i915_page_directory *pd;
 
-	pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
+	pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
 	if (unlikely(!pd))
 		return NULL;
 
-	atomic_set(px_used(pd), 0);
 	spin_lock_init(&pd->lock);
-
 	return pd;
 }
 
@@ -753,63 +749,56 @@ static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
 
 #define free_px(vm, px) free_pd(vm, px_base(px))
 
-static void init_pd(struct i915_page_directory *pd,
-		    struct i915_page_scratch *scratch)
-{
-	fill_px(pd, scratch->encode);
-	memset_p(pd->entry, scratch, 512);
-}
-
 static inline void
 write_dma_entry(struct i915_page_dma * const pdma,
-		const unsigned short pde,
+		const unsigned short idx,
 		const u64 encoded_entry)
 {
 	u64 * const vaddr = kmap_atomic(pdma->page);
 
-	vaddr[pde] = encoded_entry;
+	vaddr[idx] = encoded_entry;
 	kunmap_atomic(vaddr);
 }
 
 static inline void
 __set_pd_entry(struct i915_page_directory * const pd,
-	       const unsigned short pde,
+	       const unsigned short idx,
 	       struct i915_page_dma * const to,
 	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
 {
 	GEM_BUG_ON(atomic_read(px_used(pd)) > 512);
 
 	atomic_inc(px_used(pd));
-	pd->entry[pde] = to;
-	write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
+	pd->entry[idx] = to;
+	write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
 }
 
-#define set_pd_entry(pd, pde, to) \
-	__set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode)
+#define set_pd_entry(pd, idx, to) \
+	__set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
 
 static inline void
 clear_pd_entry(struct i915_page_directory * const pd,
-	       const unsigned short pde,
-	       struct i915_page_scratch * const scratch)
+	       const unsigned short idx,
+	       const struct i915_page_scratch * const scratch)
 {
 	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
 
-	write_dma_entry(px_base(pd), pde, scratch->encode);
-	pd->entry[pde] = scratch;
+	write_dma_entry(px_base(pd), idx, scratch->encode);
+	pd->entry[idx] = NULL;
 	atomic_dec(px_used(pd));
 }
 
 static bool
 release_pd_entry(struct i915_page_directory * const pd,
-		 const unsigned short pde,
+		 const unsigned short idx,
 		 struct i915_page_table * const pt,
-		 struct i915_page_scratch * const scratch)
+		 const struct i915_page_scratch * const scratch)
 {
 	bool free = false;
 
 	spin_lock(&pd->lock);
 	if (atomic_dec_and_test(&pt->used)) {
-		clear_pd_entry(pd, pde, scratch);
+		clear_pd_entry(pd, idx, scratch);
 		free = true;
 	}
 	spin_unlock(&pd->lock);
@@ -910,7 +899,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
 	int i;
 
 	for (i = 0; i < I915_PDES; i++) {
-		if (pd->entry[i] != &vm->scratch[1])
+		if (pd->entry[i])
 			free_pd(vm, pd->entry[i]);
 	}
 }
@@ -922,7 +911,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
 	int i;
 
 	for (i = 0; i < pdpes; i++) {
-		if (pdp->entry[i] == &vm->scratch[2])
+		if (!pdp->entry[i])
 			continue;
 
 		gen8_free_page_tables(vm, pdp->entry[i]);
@@ -940,7 +929,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
 	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
 		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
 
-		if (px_base(pdp) == px_base(&ppgtt->vm.scratch[3]))
+		if (!pdp)
 			continue;
 
 		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
@@ -994,8 +983,6 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 	u32 pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
-		GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
-
 		atomic_inc(&pt->used);
 		gen8_ppgtt_clear_pt(vm, pt, start, length);
 		if (release_pd_entry(pd, pde, pt, &vm->scratch[1]))
@@ -1014,8 +1001,6 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 	unsigned int pdpe;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		GEM_BUG_ON(px_base(pd) == px_base(&vm->scratch[2]));
-
 		atomic_inc(px_used(pd));
 		gen8_ppgtt_clear_pd(vm, pd, start, length);
 		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
@@ -1044,8 +1029,6 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		GEM_BUG_ON(px_base(pdp) == px_base(&vm->scratch[3]));
-
 		atomic_inc(px_used(pdp));
 		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
 		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
@@ -1066,7 +1049,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 	gen8_for_each_pde(pt, pd, start, length, pde) {
 		const int count = gen8_pte_count(start, length);
 
-		if (px_base(pt) == px_base(&vm->scratch[1])) {
+		if (!pt) {
 			spin_unlock(&pd->lock);
 
 			pt = fetch_and_zero(&alloc);
@@ -1081,7 +1064,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 				fill_px(pt, vm->scratch[0].encode);
 
 			spin_lock(&pd->lock);
-			if (pd->entry[pde] == &vm->scratch[1]) {
+			if (!pd->entry[pde]) {
 				set_pd_entry(pd, pde, pt);
 			} else {
 				alloc = pt;
@@ -1113,7 +1096,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 
 	spin_lock(&pdp->lock);
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		if (px_base(pd) == px_base(&vm->scratch[2])) {
+		if (!pd) {
 			spin_unlock(&pdp->lock);
 
 			pd = fetch_and_zero(&alloc);
@@ -1124,10 +1107,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 				goto unwind;
 			}
 
-			init_pd(pd, &vm->scratch[1]);
+			fill_px(pd, vm->scratch[1].encode);
 
 			spin_lock(&pdp->lock);
-			if (pdp->entry[pdpe] == &vm->scratch[2]) {
+			if (!pdp->entry[pdpe]) {
 				set_pd_entry(pdp, pdpe, pd);
 			} else {
 				alloc = pd;
@@ -1177,7 +1160,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 
 	spin_lock(&pml4->lock);
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (px_base(pdp) == px_base(&vm->scratch[3])) {
+		if (!pdp) {
 			spin_unlock(&pml4->lock);
 
 			pdp = fetch_and_zero(&alloc);
@@ -1188,10 +1171,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 				goto unwind;
 			}
 
-			init_pd(pdp, &vm->scratch[2]);
+			fill_px(pdp, vm->scratch[2].encode);
 
 			spin_lock(&pml4->lock);
-			if (pml4->entry[pml4e] == &vm->scratch[3]) {
+			if (!pml4->entry[pml4e]) {
 				set_pd_entry(pml4, pml4e, pdp);
 			} else {
 				alloc = pdp;
@@ -1527,7 +1510,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 		if (IS_ERR(pd))
 			goto unwind;
 
-		init_pd(pd, &vm->scratch[1]);
+		fill_px(pd, vm->scratch[1].encode);
 		set_pd_entry(pdp, pdpe, pd);
 	}
 
@@ -1558,46 +1541,19 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
 	ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
 }
 
-static void init_pd_n(struct i915_address_space *vm,
-		      struct i915_page_directory *pd,
-		      struct i915_page_scratch *scratch,
-		      const unsigned int entries)
-{
-	u64 * const vaddr = kmap_atomic_px(pd);
-
-	memset64(vaddr, scratch->encode, entries);
-	kunmap_atomic(vaddr);
-
-	memset_p(pd->entry, scratch, entries);
-}
-
 static struct i915_page_directory *
 gen8_alloc_top_pd(struct i915_address_space *vm)
 {
+	const unsigned int count = vm->total >> __gen8_pte_shift(vm->top);
 	struct i915_page_directory *pd;
 
-	if (i915_vm_is_4lvl(vm)) {
-		pd = alloc_pd(vm);
-		if (!IS_ERR(pd))
-			init_pd(pd, &vm->scratch[3]);
+	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
 
+	pd = alloc_pd(vm);
+	if (IS_ERR(pd))
 		return pd;
-	}
-
-	/* 3lvl */
-	pd = __alloc_pd();
-	if (!pd)
-		return ERR_PTR(-ENOMEM);
-
-	pd->entry[GEN8_3LVL_PDPES] = NULL;
-
-	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
-		kfree(pd);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	init_pd_n(vm, pd, &vm->scratch[2], GEN8_3LVL_PDPES);
 
+	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
 	return pd;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 669b204d4c13..2341944b9b17 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -610,7 +610,7 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
 {
 	struct i915_page_dma *pt = ppgtt->pd->entry[n];
 
-	return px_dma(pt);
+	return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));
 }
 
 static inline struct i915_ggtt *
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 08/11] drm/i915/gtt: Recursive cleanup for gen8
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
                   ` (6 preceding siblings ...)
  2019-07-07 21:00 ` [PATCH 07/11] drm/i915/gtt: Use NULL to encode scratch shadow entries Chris Wilson
@ 2019-07-07 21:00 ` Chris Wilson
  2019-07-07 21:00 ` [PATCH 09/11] drm/i915/gtt: Recursive ppgtt clear " Chris Wilson
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

With an explicit level, we can refactor the separate cleanup functions
as a simple recursive function. We take the opportunity to pass down the
size of each level so that we can deal with the different sizes of
top-level and avoid over allocating for 32/36-bit vm.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 93 ++++++++++-------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  2 +-
 2 files changed, 33 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a99b89502a90..0625b07a1132 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -713,11 +713,11 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 	return pt;
 }
 
-static struct i915_page_directory *__alloc_pd(void)
+static struct i915_page_directory *__alloc_pd(size_t sz)
 {
 	struct i915_page_directory *pd;
 
-	pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
+	pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
 	if (unlikely(!pd))
 		return NULL;
 
@@ -729,7 +729,7 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 {
 	struct i915_page_directory *pd;
 
-	pd = __alloc_pd();
+	pd = __alloc_pd(sizeof(*pd));
 	if (unlikely(!pd))
 		return ERR_PTR(-ENOMEM);
 
@@ -766,7 +766,7 @@ __set_pd_entry(struct i915_page_directory * const pd,
 	       struct i915_page_dma * const to,
 	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
 {
-	GEM_BUG_ON(atomic_read(px_used(pd)) > 512);
+	GEM_BUG_ON(atomic_read(px_used(pd)) > ARRAY_SIZE(pd->entry));
 
 	atomic_inc(px_used(pd));
 	pd->entry[idx] = to;
@@ -893,64 +893,34 @@ static inline unsigned int gen8_pt_count(u64 addr, u64 end)
 		return end - addr;
 }
 
-static void gen8_free_page_tables(struct i915_address_space *vm,
-				  struct i915_page_directory *pd)
+static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
+				 struct i915_page_directory *pd,
+				 int count, int lvl)
 {
-	int i;
-
-	for (i = 0; i < I915_PDES; i++) {
-		if (pd->entry[i])
-			free_pd(vm, pd->entry[i]);
-	}
-}
-
-static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
-				    struct i915_page_directory *pdp)
-{
-	const unsigned int pdpes = i915_pdpes_per_pdp(vm);
-	int i;
-
-	for (i = 0; i < pdpes; i++) {
-		if (!pdp->entry[i])
-			continue;
-
-		gen8_free_page_tables(vm, pdp->entry[i]);
-		free_pd(vm, pdp->entry[i]);
-	}
-
-	free_px(vm, pdp);
-}
-
-static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
-{
-	struct i915_page_directory * const pml4 = ppgtt->pd;
-	int i;
-
-	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
-		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
+	if (lvl) {
+		void **pde = pd->entry;
 
-		if (!pdp)
-			continue;
+		do {
+			if (!*pde)
+				continue;
 
-		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
+			__gen8_ppgtt_cleanup(vm, *pde, I915_PDES, lvl - 1);
+		} while (pde++, --count);
 	}
 
-	free_px(&ppgtt->vm, pml4);
+	free_px(vm, pd);
 }
 
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 {
-	struct drm_i915_private *i915 = vm->i915;
 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 
-	if (intel_vgpu_active(i915))
+	if (intel_vgpu_active(vm->i915))
 		gen8_ppgtt_notify_vgt(ppgtt, false);
 
-	if (i915_vm_is_4lvl(vm))
-		gen8_ppgtt_cleanup_4lvl(ppgtt);
-	else
-		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
-
+	__gen8_ppgtt_cleanup(vm, ppgtt->pd,
+			     vm->total >> __gen8_pte_shift(vm->top),
+			     vm->top);
 	free_scratch(vm);
 }
 
@@ -1502,24 +1472,18 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 	struct i915_page_directory *pdp = ppgtt->pd;
 	struct i915_page_directory *pd;
 	u64 start = 0, length = ppgtt->vm.total;
-	u64 from = start;
 	unsigned int pdpe;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		pd = alloc_pd(vm);
 		if (IS_ERR(pd))
-			goto unwind;
+			return PTR_ERR(pd);
 
 		fill_px(pd, vm->scratch[1].encode);
 		set_pd_entry(pdp, pdpe, pd);
 	}
 
 	return 0;
-
-unwind:
-	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
-	atomic_set(px_used(pdp), 0);
-	return -ENOMEM;
 }
 
 static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
@@ -1549,9 +1513,14 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
 
 	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
 
-	pd = alloc_pd(vm);
-	if (IS_ERR(pd))
-		return pd;
+	pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
+	if (unlikely(!pd))
+		return ERR_PTR(-ENOMEM);
+
+	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+		kfree(pd);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
 	return pd;
@@ -1623,7 +1592,9 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 	return ppgtt;
 
 err_free_pd:
-	free_px(&ppgtt->vm, ppgtt->pd);
+	__gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
+			     ppgtt->vm.total >> __gen8_pte_shift(ppgtt->vm.top),
+			     ppgtt->vm.top);
 err_free_scratch:
 	free_scratch(&ppgtt->vm);
 err_free:
@@ -2069,7 +2040,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 
 	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
 
-	ppgtt->base.pd = __alloc_pd();
+	ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
 	if (!ppgtt->base.pd) {
 		err = -ENOMEM;
 		goto err_free;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 2341944b9b17..d0128f52184b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -253,7 +253,7 @@ struct i915_page_table {
 struct i915_page_directory {
 	struct i915_page_table pt;
 	spinlock_t lock;
-	void *entry[512];
+	void *entry[I915_PDES];
 };
 
 #define __px_choose_expr(x, type, expr, other) \
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 09/11] drm/i915/gtt: Recursive ppgtt clear for gen8
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
                   ` (7 preceding siblings ...)
  2019-07-07 21:00 ` [PATCH 08/11] drm/i915/gtt: Recursive cleanup for gen8 Chris Wilson
@ 2019-07-07 21:00 ` Chris Wilson
  2019-07-07 21:00 ` [PATCH 10/11] drm/i915/gtt: Recursive ppgtt alloc " Chris Wilson
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

With an explicit level, we can refactor the separate clear functions
as a simple recursive function. The additional knowledge of the level
allows us to spot when we can free an entire subtree at once.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig.debug  |  15 +++
 drivers/gpu/drm/i915/i915_gem_gtt.c | 154 ++++++++++++++++------------
 2 files changed, 105 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 8d922bb4d953..ed8c787058a5 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -94,6 +94,21 @@ config DRM_I915_TRACE_GEM
 
 	  If in doubt, say "N".
 
+config DRM_I915_TRACE_GTT
+	bool "Insert extra ftrace output from the GTT internals"
+	depends on DRM_I915_DEBUG_GEM
+	select TRACING
+	default n
+	help
+	  Enable additional and verbose debugging output that will spam
+	  ordinary tests, but may be vital for post-mortem debugging when
+	  used with /proc/sys/kernel/ftrace_dump_on_oops
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
+
 config DRM_I915_SW_FENCE_DEBUG_OBJECTS
         bool "Enable additional driver debugging for fence objects"
         depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0625b07a1132..e2d6169a8a66 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -46,6 +46,12 @@
 
 #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
 
+#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
+#define DBG(...) trace_printk(__VA_ARGS__)
+#else
+#define DBG(...)
+#endif
+
 /**
  * DOC: Global GTT views
  *
@@ -796,6 +802,9 @@ release_pd_entry(struct i915_page_directory * const pd,
 {
 	bool free = false;
 
+	if (atomic_add_unless(&pt->used, -1, 1))
+		return false;
+
 	spin_lock(&pd->lock);
 	if (atomic_dec_and_test(&pt->used)) {
 		clear_pd_entry(pd, idx, scratch);
@@ -924,86 +933,101 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 	free_scratch(vm);
 }
 
-/* Removes entries from a single page table, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries.
- */
-static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
-				struct i915_page_table *pt,
-				u64 start, u64 length)
+static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
+			      struct i915_page_directory * const pd,
+			      u64 start, const u64 end, int lvl)
 {
-	const unsigned int num_entries = gen8_pte_count(start, length);
-	gen8_pte_t *vaddr;
+	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+	unsigned int idx, len;
 
-	vaddr = kmap_atomic_px(pt);
-	memset64(vaddr + gen8_pte_index(start),
-		 vm->scratch[0].encode,
-		 num_entries);
-	kunmap_atomic(vaddr);
+	len = gen8_pd_range(start, end, lvl--, &idx);
+	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n",
+	    __func__, vm, lvl + 1, start, end,
+	    idx, len, atomic_read(px_used(pd)));
+	GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
 
-	GEM_BUG_ON(num_entries > atomic_read(&pt->used));
+	do {
+		struct i915_page_table *pt = pd->entry[idx];
+
+		if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
+		    gen8_pd_subsumes(start, end, lvl)) {
+			DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
+			    __func__, vm, lvl + 1, idx, start, end);
+			clear_pd_entry(pd, idx, scratch);
+			__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
+			start += (u64)I915_PDES << gen8_pd_shift(lvl);
+			continue;
+		}
 
-	atomic_sub(num_entries, &pt->used);
-}
+		if (lvl) {
+			start = __gen8_ppgtt_clear(vm, as_pd(pt),
+						   start, end, lvl);
+		} else {
+			unsigned int count;
+			u64 *vaddr;
 
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
-				struct i915_page_directory *pd,
-				u64 start, u64 length)
-{
-	struct i915_page_table *pt;
-	u32 pde;
+			count = gen8_pt_count(start, end);
+			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} removing pte\n",
+			    __func__, vm, lvl, start, end,
+			    gen8_pd_index(start, 0), count,
+			    atomic_read(&pt->used));
+			GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
 
-	gen8_for_each_pde(pt, pd, start, length, pde) {
-		atomic_inc(&pt->used);
-		gen8_ppgtt_clear_pt(vm, pt, start, length);
-		if (release_pd_entry(pd, pde, pt, &vm->scratch[1]))
+			vaddr = kmap_atomic_px(pt);
+			memset64(vaddr + gen8_pd_index(start, 0),
+				 vm->scratch[0].encode,
+				 count);
+			kunmap_atomic(vaddr);
+
+			atomic_sub(count, &pt->used);
+			start += count;
+		}
+
+		if (release_pd_entry(pd, idx, pt, scratch))
 			free_px(vm, pt);
-	}
+	} while (idx++, --len);
+
+	return start;
 }
 
-/* Removes entries from a single page dir pointer, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries
- */
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
-				 struct i915_page_directory * const pdp,
-				 u64 start, u64 length)
+static void gen8_ppgtt_clear(struct i915_address_space *vm,
+			     u64 start, u64 length)
 {
-	struct i915_page_directory *pd;
-	unsigned int pdpe;
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
 
-	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		atomic_inc(px_used(pd));
-		gen8_ppgtt_clear_pd(vm, pd, start, length);
-		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
-			free_px(vm, pd);
-	}
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
+	GEM_BUG_ON(length == 0);
+
+	__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+			   start, start + length, vm->top);
 }
 
-static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
-				  u64 start, u64 length)
+static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+				struct i915_page_directory *pd,
+				u64 start, u64 length)
 {
-	gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length);
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
+
+	__gen8_ppgtt_clear(vm, pd, start, start + length, 1);
 }
 
-/* Removes entries from a single pml4.
- * This is the top-level structure in 4-level page tables used on gen8+.
- * Empty entries are always scratch pml4e.
- */
-static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
-				  u64 start, u64 length)
+static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+				 struct i915_page_directory * const pdp,
+				 u64 start, u64 length)
 {
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct i915_page_directory * const pml4 = ppgtt->pd;
-	struct i915_page_directory *pdp;
-	unsigned int pml4e;
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
 
-	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
 
-	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		atomic_inc(px_used(pdp));
-		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
-		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
-			free_px(vm, pdp);
-	}
+	__gen8_ppgtt_clear(vm, pdp, start, start + length, 2);
 }
 
 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
@@ -1168,7 +1192,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
 		free_px(vm, pdp);
 unwind:
-	gen8_ppgtt_clear_4lvl(vm, from, start - from);
+	gen8_ppgtt_clear(vm, from, start - from);
 out:
 	if (alloc)
 		free_px(vm, alloc);
@@ -1481,6 +1505,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 
 		fill_px(pd, vm->scratch[1].encode);
 		set_pd_entry(pdp, pdpe, pd);
+		atomic_inc(px_used(pd)); /* keep pinned */
 	}
 
 	return 0;
@@ -1523,6 +1548,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
 	}
 
 	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
+	atomic_inc(px_used(pd)); /* mark as pinned */
 	return pd;
 }
 
@@ -1571,7 +1597,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
 		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
 		ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
-		ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
 	} else {
 		if (intel_vgpu_active(i915)) {
 			err = gen8_preallocate_top_level_pdp(ppgtt);
@@ -1581,9 +1606,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 
 		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
 		ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
-		ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
 	}
 
+	ppgtt->vm.clear_range = gen8_ppgtt_clear;
+
 	if (intel_vgpu_active(i915))
 		gen8_ppgtt_notify_vgt(ppgtt, true);
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 10/11] drm/i915/gtt: Recursive ppgtt alloc for gen8
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
                   ` (8 preceding siblings ...)
  2019-07-07 21:00 ` [PATCH 09/11] drm/i915/gtt: Recursive ppgtt clear " Chris Wilson
@ 2019-07-07 21:00 ` Chris Wilson
  2019-07-07 21:00 ` [PATCH 11/11] drm/i915/gtt: Tidy up ppgtt insertion " Chris Wilson
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

Refactor the separate allocation routines into a single recursive
function.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 272 ++++++++++------------------
 1 file changed, 97 insertions(+), 175 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e2d6169a8a66..83f03f8f4d2c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1004,199 +1004,119 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm,
 			   start, start + length, vm->top);
 }
 
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
-				struct i915_page_directory *pd,
-				u64 start, u64 length)
-{
-	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
-
-	start >>= GEN8_PTE_SHIFT;
-	length >>= GEN8_PTE_SHIFT;
-
-	__gen8_ppgtt_clear(vm, pd, start, start + length, 1);
-}
-
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
-				 struct i915_page_directory * const pdp,
-				 u64 start, u64 length)
-{
-	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
-	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
-
-	start >>= GEN8_PTE_SHIFT;
-	length >>= GEN8_PTE_SHIFT;
-
-	__gen8_ppgtt_clear(vm, pdp, start, start + length, 2);
-}
-
-static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
-			       struct i915_page_directory *pd,
-			       u64 start, u64 length)
+static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
+			      struct i915_page_directory * const pd,
+			      u64 * const start, u64 end, int lvl)
 {
-	struct i915_page_table *pt, *alloc = NULL;
-	u64 from = start;
-	unsigned int pde;
+	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+	struct i915_page_table *alloc = NULL;
+	unsigned int idx, len;
 	int ret = 0;
 
+	len = gen8_pd_range(*start, end, lvl--, &idx);
+	DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n",
+	    __func__, vm, lvl + 1, *start, end,
+	    idx, len, atomic_read(px_used(pd)));
+	GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
+
 	spin_lock(&pd->lock);
-	gen8_for_each_pde(pt, pd, start, length, pde) {
-		const int count = gen8_pte_count(start, length);
+	GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
+	do {
+		struct i915_page_table *pt = pd->entry[idx];
 
 		if (!pt) {
 			spin_unlock(&pd->lock);
 
-			pt = fetch_and_zero(&alloc);
-			if (!pt)
-				pt = alloc_pt(vm);
-			if (IS_ERR(pt)) {
-				ret = PTR_ERR(pt);
-				goto unwind;
-			}
+			DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
+			    __func__, vm, lvl + 1, idx);
 
-			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
-				fill_px(pt, vm->scratch[0].encode);
+			pt = fetch_and_zero(&alloc);
+			if (lvl) {
+				if (!pt) {
+					pt = &alloc_pd(vm)->pt;
+					if (IS_ERR(pt)) {
+						ret = PTR_ERR(pt);
+						break;
+					}
+				}
 
-			spin_lock(&pd->lock);
-			if (!pd->entry[pde]) {
-				set_pd_entry(pd, pde, pt);
+				fill_px(pt, vm->scratch[lvl].encode);
 			} else {
-				alloc = pt;
-				pt = pd->entry[pde];
-			}
-		}
-
-		atomic_add(count, &pt->used);
-	}
-	spin_unlock(&pd->lock);
-	goto out;
-
-unwind:
-	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
-out:
-	if (alloc)
-		free_px(vm, alloc);
-	return ret;
-}
-
-static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
-				struct i915_page_directory *pdp,
-				u64 start, u64 length)
-{
-	struct i915_page_directory *pd, *alloc = NULL;
-	u64 from = start;
-	unsigned int pdpe;
-	int ret = 0;
+				if (!pt) {
+					pt = alloc_pt(vm);
+					if (IS_ERR(pt)) {
+						ret = PTR_ERR(pt);
+						break;
+					}
+				}
 
-	spin_lock(&pdp->lock);
-	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		if (!pd) {
-			spin_unlock(&pdp->lock);
-
-			pd = fetch_and_zero(&alloc);
-			if (!pd)
-				pd = alloc_pd(vm);
-			if (IS_ERR(pd)) {
-				ret = PTR_ERR(pd);
-				goto unwind;
+				if (intel_vgpu_active(vm->i915) ||
+				    gen8_pt_count(*start, end) < I915_PDES)
+					fill_px(pt, vm->scratch[lvl].encode);
 			}
 
-			fill_px(pd, vm->scratch[1].encode);
+			spin_lock(&pd->lock);
+			if (likely(!pd->entry[idx]))
+				set_pd_entry(pd, idx, pt);
+			else
+				alloc = pt, pt = pd->entry[idx];
+		}
 
-			spin_lock(&pdp->lock);
-			if (!pdp->entry[pdpe]) {
-				set_pd_entry(pdp, pdpe, pd);
-			} else {
-				alloc = pd;
-				pd = pdp->entry[pdpe];
+		if (lvl) {
+			atomic_inc(&pt->used);
+			spin_unlock(&pd->lock);
+
+			ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
+						 start, end, lvl);
+			if (unlikely(ret)) {
+				if (release_pd_entry(pd, idx, pt, scratch))
+					free_px(vm, pt);
+				goto out;
 			}
-		}
-		atomic_inc(px_used(pd));
-		spin_unlock(&pdp->lock);
 
-		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
-		if (unlikely(ret))
-			goto unwind_pd;
+			spin_lock(&pd->lock);
+			atomic_dec(&pt->used);
+			GEM_BUG_ON(!atomic_read(&pt->used));
+		} else {
+			unsigned int count = gen8_pt_count(*start, end);
 
-		spin_lock(&pdp->lock);
-		atomic_dec(px_used(pd));
-	}
-	spin_unlock(&pdp->lock);
-	goto out;
+			DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} inserting pte\n",
+			    __func__, vm, lvl, *start, end,
+			    gen8_pd_index(*start, 0), count,
+			    atomic_read(&pt->used));
 
-unwind_pd:
-	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
-		free_px(vm, pd);
-unwind:
-	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
+			atomic_add(count, &pt->used);
+			GEM_BUG_ON(atomic_read(&pt->used) > I915_PDES);
+			*start += count;
+		}
+	} while (idx++, --len);
+	spin_unlock(&pd->lock);
 out:
 	if (alloc)
 		free_px(vm, alloc);
 	return ret;
 }
 
-static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
-				 u64 start, u64 length)
-{
-	return gen8_ppgtt_alloc_pdp(vm,
-				    i915_vm_to_ppgtt(vm)->pd, start, length);
-}
-
-static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
-				 u64 start, u64 length)
+static int gen8_ppgtt_alloc(struct i915_address_space *vm,
+			    u64 start, u64 length)
 {
-	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct i915_page_directory * const pml4 = ppgtt->pd;
-	struct i915_page_directory *pdp, *alloc = NULL;
 	u64 from = start;
-	int ret = 0;
-	u32 pml4e;
-
-	spin_lock(&pml4->lock);
-	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (!pdp) {
-			spin_unlock(&pml4->lock);
-
-			pdp = fetch_and_zero(&alloc);
-			if (!pdp)
-				pdp = alloc_pd(vm);
-			if (IS_ERR(pdp)) {
-				ret = PTR_ERR(pdp);
-				goto unwind;
-			}
-
-			fill_px(pdp, vm->scratch[2].encode);
+	int err;
 
-			spin_lock(&pml4->lock);
-			if (!pml4->entry[pml4e]) {
-				set_pd_entry(pml4, pml4e, pdp);
-			} else {
-				alloc = pdp;
-				pdp = pml4->entry[pml4e];
-			}
-		}
-		atomic_inc(px_used(pdp));
-		spin_unlock(&pml4->lock);
+	GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+	GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
 
-		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
-		if (unlikely(ret))
-			goto unwind_pdp;
+	start >>= GEN8_PTE_SHIFT;
+	length >>= GEN8_PTE_SHIFT;
+	GEM_BUG_ON(length == 0);
 
-		spin_lock(&pml4->lock);
-		atomic_dec(px_used(pdp));
-	}
-	spin_unlock(&pml4->lock);
-	goto out;
+	err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
+				 &start, start + length, vm->top);
+	if (unlikely(err))
+		__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+				   from, start, vm->top);
 
-unwind_pdp:
-	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
-		free_px(vm, pdp);
-unwind:
-	gen8_ppgtt_clear(vm, from, start - from);
-out:
-	if (alloc)
-		free_px(vm, alloc);
-	return ret;
+	return err;
 }
 
 static inline struct sgt_dma {
@@ -1493,19 +1413,22 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 {
 	struct i915_address_space *vm = &ppgtt->vm;
-	struct i915_page_directory *pdp = ppgtt->pd;
-	struct i915_page_directory *pd;
-	u64 start = 0, length = ppgtt->vm.total;
-	unsigned int pdpe;
+	struct i915_page_directory *pd = ppgtt->pd;
+	unsigned int idx;
+
+	GEM_BUG_ON(vm->top != 2);
+	GEM_BUG_ON((vm->total >> __gen8_pte_shift(2)) != GEN8_3LVL_PDPES);
+
+	for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
+		struct i915_page_directory *pde;
 
-	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		pd = alloc_pd(vm);
-		if (IS_ERR(pd))
-			return PTR_ERR(pd);
+		pde = alloc_pd(vm);
+		if (IS_ERR(pde))
+			return PTR_ERR(pde);
 
-		fill_px(pd, vm->scratch[1].encode);
-		set_pd_entry(pdp, pdpe, pd);
-		atomic_inc(px_used(pd)); /* keep pinned */
+		fill_px(pde, vm->scratch[1].encode);
+		set_pd_entry(pd, idx, pde);
+		atomic_inc(px_used(pde)); /* keep pinned */
 	}
 
 	return 0;
@@ -1595,7 +1518,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 	}
 
 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
-		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
 		ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
 	} else {
 		if (intel_vgpu_active(i915)) {
@@ -1604,10 +1526,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 				goto err_free_pd;
 		}
 
-		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
 		ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
 	}
 
+	ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
 	ppgtt->vm.clear_range = gen8_ppgtt_clear;
 
 	if (intel_vgpu_active(i915))
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* [PATCH 11/11] drm/i915/gtt: Tidy up ppgtt insertion for gen8
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
                   ` (9 preceding siblings ...)
  2019-07-07 21:00 ` [PATCH 10/11] drm/i915/gtt: Recursive ppgtt alloc " Chris Wilson
@ 2019-07-07 21:00 ` Chris Wilson
  2019-07-07 21:41 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/11] drm/i915/gtt: Use shallow dma pages for scratch Patchwork
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2019-07-07 21:00 UTC (permalink / raw)
  To: intel-gfx

Apply the new radix shift helpers to extract the multi-level indices
cleanly when inserting pte into the gtt tree.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  92 ++--------------------
 2 files changed, 49 insertions(+), 158 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 83f03f8f4d2c..e2c3c4288edb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1128,47 +1128,28 @@ static inline struct sgt_dma {
 	return (struct sgt_dma) { sg, addr, addr + sg->length };
 }
 
-struct gen8_insert_pte {
-	u16 pml4e;
-	u16 pdpe;
-	u16 pde;
-	u16 pte;
-};
-
-static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
-{
-	return (struct gen8_insert_pte) {
-		 gen8_pml4e_index(start),
-		 gen8_pdpe_index(start),
-		 gen8_pde_index(start),
-		 gen8_pte_index(start),
-	};
-}
-
-static __always_inline bool
+static __always_inline u64
 gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
 			      struct i915_page_directory *pdp,
 			      struct sgt_dma *iter,
-			      struct gen8_insert_pte *idx,
+			      u64 idx,
 			      enum i915_cache_level cache_level,
 			      u32 flags)
 {
 	struct i915_page_directory *pd;
 	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
 	gen8_pte_t *vaddr;
-	bool ret;
 
-	GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-	pd = i915_pd_entry(pdp, idx->pdpe);
-	vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+	pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+	vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
 	do {
-		vaddr[idx->pte] = pte_encode | iter->dma;
+		vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
 
 		iter->dma += I915_GTT_PAGE_SIZE;
 		if (iter->dma >= iter->max) {
 			iter->sg = __sg_next(iter->sg);
 			if (!iter->sg) {
-				ret = false;
+				idx = 0;
 				break;
 			}
 
@@ -1176,30 +1157,22 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
 			iter->max = iter->dma + iter->sg->length;
 		}
 
-		if (++idx->pte == GEN8_PTES) {
-			idx->pte = 0;
-
-			if (++idx->pde == I915_PDES) {
-				idx->pde = 0;
-
+		if (gen8_pd_index(++idx, 0) == 0) {
+			if (gen8_pd_index(idx, 1) == 0) {
 				/* Limited by sg length for 3lvl */
-				if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
-					idx->pdpe = 0;
-					ret = true;
+				if (gen8_pd_index(idx, 2) == 0)
 					break;
-				}
 
-				GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-				pd = pdp->entry[idx->pdpe];
+				pd = pdp->entry[gen8_pd_index(idx, 2)];
 			}
 
 			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+			vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
 		}
 	} while (1);
 	kunmap_atomic(vaddr);
 
-	return ret;
+	return idx;
 }
 
 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
@@ -1209,9 +1182,9 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
 {
 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct sgt_dma iter = sgt_dma(vma);
-	struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
 
-	gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx,
+	gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter,
+				      vma->node.start >> GEN8_PTE_SHIFT,
 				      cache_level, flags);
 
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
@@ -1228,39 +1201,38 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 	dma_addr_t rem = iter->sg->length;
 
 	do {
-		struct gen8_insert_pte idx = gen8_insert_pte(start);
 		struct i915_page_directory *pdp =
-			i915_pdp_entry(pml4, idx.pml4e);
-		struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe);
-		unsigned int page_size;
-		bool maybe_64K = false;
+			i915_pd_entry(pml4, __gen8_pte_index(start, 3));
+		struct i915_page_directory *pd =
+			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
 		gen8_pte_t encode = pte_encode;
+		unsigned int maybe_64K = -1;
+		unsigned int page_size;
 		gen8_pte_t *vaddr;
-		u16 index, max;
+		u16 index;
 
 		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
 		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
-		    rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
-			index = idx.pde;
-			max = I915_PDES;
-			page_size = I915_GTT_PAGE_SIZE_2M;
-
+		    rem >= I915_GTT_PAGE_SIZE_2M &&
+		    !__gen8_pte_index(start, 0)) {
+			index = __gen8_pte_index(start, 1);
 			encode |= GEN8_PDE_PS_2M;
+			page_size = I915_GTT_PAGE_SIZE_2M;
 
 			vaddr = kmap_atomic_px(pd);
 		} else {
-			struct i915_page_table *pt = i915_pt_entry(pd, idx.pde);
+			struct i915_page_table *pt =
+				i915_pt_entry(pd, __gen8_pte_index(start, 1));
 
-			index = idx.pte;
-			max = GEN8_PTES;
+			index = __gen8_pte_index(start, 0);
 			page_size = I915_GTT_PAGE_SIZE;
 
 			if (!index &&
 			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
 			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
 			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-			     rem >= (max - index) * I915_GTT_PAGE_SIZE))
-				maybe_64K = true;
+			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
+				maybe_64K = __gen8_pte_index(start, 1);
 
 			vaddr = kmap_atomic_px(pt);
 		}
@@ -1281,16 +1253,16 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 				iter->dma = sg_dma_address(iter->sg);
 				iter->max = iter->dma + rem;
 
-				if (maybe_64K && index < max &&
+				if (maybe_64K != -1 && index < I915_PDES &&
 				    !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
 				      (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
-				       rem >= (max - index) * I915_GTT_PAGE_SIZE)))
-					maybe_64K = false;
+				       rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
+					maybe_64K = -1;
 
 				if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
 					break;
 			}
-		} while (rem >= page_size && index < max);
+		} while (rem >= page_size && index < I915_PDES);
 
 		kunmap_atomic(vaddr);
 
@@ -1300,14 +1272,14 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 		 * it and have reached the end of the sg table and we have
 		 * enough padding.
 		 */
-		if (maybe_64K &&
-		    (index == max ||
+		if (maybe_64K != -1 &&
+		    (index == I915_PDES ||
 		     (i915_vm_has_scratch_64K(vma->vm) &&
 		      !iter->sg && IS_ALIGNED(vma->node.start +
 					      vma->node.size,
 					      I915_GTT_PAGE_SIZE_2M)))) {
 			vaddr = kmap_atomic_px(pd);
-			vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
+			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
 			kunmap_atomic(vaddr);
 			page_size = I915_GTT_PAGE_SIZE_64K;
 
@@ -1324,8 +1296,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 				u16 i;
 
 				encode = vma->vm->scratch[0].encode;
-				vaddr = kmap_atomic_px(i915_pt_entry(pd,
-								     idx.pde));
+				vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
 
 				for (i = 1; i < index; i += 16)
 					memset64(vaddr + i, encode, 15);
@@ -1351,13 +1322,13 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
 		gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level,
 					       flags);
 	} else {
-		struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
+		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
 
-		while (gen8_ppgtt_insert_pte_entries(ppgtt,
-						     i915_pdp_entry(pml4, idx.pml4e++),
-						     &iter, &idx, cache_level,
-						     flags))
-			GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
+		while ((idx = gen8_ppgtt_insert_pte_entries(ppgtt,
+							    i915_pd_entry(pml4, gen8_pd_index(idx, 3)),
+							    &iter, idx, cache_level,
+							    flags)))
+			;
 
 		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index d0128f52184b..4bcf61e48570 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -115,30 +115,19 @@ typedef u64 gen8_pte_t;
 #define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
 #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
 
-/* GEN8 32b style address is defined as a 3 level page table:
+/*
+ * GEN8 32b style address is defined as a 3 level page table:
  * 31:30 | 29:21 | 20:12 |  11:0
  * PDPE  |  PDE  |  PTE  | offset
  * The difference as compared to normal x86 3 level page table is the PDPEs are
  * programmed via register.
- */
-#define GEN8_3LVL_PDPES			4
-#define GEN8_PDE_SHIFT			21
-#define GEN8_PDE_MASK			0x1ff
-#define GEN8_PTE_SHIFT			12
-#define GEN8_PTE_MASK			0x1ff
-#define GEN8_PTES			I915_PTES(sizeof(gen8_pte_t))
-
-/* GEN8 48b style address is defined as a 4 level page table:
+ *
+ * GEN8 48b style address is defined as a 4 level page table:
  * 47:39 | 38:30 | 29:21 | 20:12 |  11:0
  * PML4E | PDPE  |  PDE  |  PTE  | offset
  */
-#define GEN8_PML4ES_PER_PML4		512
-#define GEN8_PML4E_SHIFT		39
-#define GEN8_PML4E_MASK			(GEN8_PML4ES_PER_PML4 - 1)
-#define GEN8_PDPE_SHIFT			30
-/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
- * tables */
-#define GEN8_PDPE_MASK			0x1ff
+#define GEN8_3LVL_PDPES			4
+#define GEN8_PTE_SHIFT			12
 
 #define PPAT_UNCACHED			(_PAGE_PWT | _PAGE_PCD)
 #define PPAT_CACHED_PDE			0 /* WB LLC */
@@ -522,15 +511,6 @@ static inline u32 gen6_pde_index(u32 addr)
 	return i915_pde_index(addr, GEN6_PDE_SHIFT);
 }
 
-static inline unsigned int
-i915_pdpes_per_pdp(const struct i915_address_space *vm)
-{
-	if (i915_vm_is_4lvl(vm))
-		return GEN8_PML4ES_PER_PML4;
-
-	return GEN8_3LVL_PDPES;
-}
-
 static inline struct i915_page_table *
 i915_pt_entry(const struct i915_page_directory * const pd,
 	      const unsigned short n)
@@ -545,66 +525,6 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
 	return pdp->entry[n];
 }
 
-static inline struct i915_page_directory *
-i915_pdp_entry(const struct i915_page_directory * const pml4,
-	       const unsigned short n)
-{
-	return pml4->entry[n];
-}
-
-/* Equivalent to the gen6 version, For each pde iterates over every pde
- * between from start until start + length. On gen8+ it simply iterates
- * over every page directory entry in a page directory.
- */
-#define gen8_for_each_pde(pt, pd, start, length, iter)			\
-	for (iter = gen8_pde_index(start);				\
-	     length > 0 && iter < I915_PDES &&				\
-		     (pt = i915_pt_entry(pd, iter), true);		\
-	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT);		\
-		    temp = min(temp - start, length);			\
-		    start += temp, length -= temp; }), ++iter)
-
-#define gen8_for_each_pdpe(pd, pdp, start, length, iter)		\
-	for (iter = gen8_pdpe_index(start);				\
-	     length > 0 && iter < i915_pdpes_per_pdp(vm) &&		\
-		     (pd = i915_pd_entry(pdp, iter), true);		\
-	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT);	\
-		    temp = min(temp - start, length);			\
-		    start += temp, length -= temp; }), ++iter)
-
-#define gen8_for_each_pml4e(pdp, pml4, start, length, iter)		\
-	for (iter = gen8_pml4e_index(start);				\
-	     length > 0 && iter < GEN8_PML4ES_PER_PML4 &&		\
-		     (pdp = i915_pdp_entry(pml4, iter), true);		\
-	     ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT);	\
-		    temp = min(temp - start, length);			\
-		    start += temp, length -= temp; }), ++iter)
-
-static inline u32 gen8_pte_index(u64 address)
-{
-	return i915_pte_index(address, GEN8_PDE_SHIFT);
-}
-
-static inline u32 gen8_pde_index(u64 address)
-{
-	return i915_pde_index(address, GEN8_PDE_SHIFT);
-}
-
-static inline u32 gen8_pdpe_index(u64 address)
-{
-	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
-}
-
-static inline u32 gen8_pml4e_index(u64 address)
-{
-	return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
-}
-
-static inline u64 gen8_pte_count(u64 address, u64 length)
-{
-	return i915_pte_count(address, length, GEN8_PDE_SHIFT);
-}
-
 static inline dma_addr_t
 i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
 {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/11] drm/i915/gtt: Use shallow dma pages for scratch
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
                   ` (10 preceding siblings ...)
  2019-07-07 21:00 ` [PATCH 11/11] drm/i915/gtt: Tidy up ppgtt insertion " Chris Wilson
@ 2019-07-07 21:41 ` Patchwork
  2019-07-07 21:46 ` ✗ Fi.CI.SPARSE: " Patchwork
  2019-07-07 22:00 ` ✓ Fi.CI.BAT: success " Patchwork
  13 siblings, 0 replies; 39+ messages in thread
From: Patchwork @ 2019-07-07 21:41 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/11] drm/i915/gtt: Use shallow dma pages for scratch
URL   : https://patchwork.freedesktop.org/series/63329/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
46b4d072b194 drm/i915/gtt: Use shallow dma pages for scratch
627dff785897 drm/i915/gtt: Wrap page_table with page_directory
-:392: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'px' - possible side-effects?
#392: FILE: drivers/gpu/drm/i915/i915_gem_gtt.h:261:
+#define px_base(px) \
+	__px_choose_expr(px, struct i915_page_dma *, __x, \
+	__px_choose_expr(px, struct i915_page_table *, &__x->base, \
+	__px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
+	(void)0)))

-:399: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'px' - possible side-effects?
#399: FILE: drivers/gpu/drm/i915/i915_gem_gtt.h:268:
+#define px_pt(px) \
+	__px_choose_expr(px, struct i915_page_table *, __x, \
+	__px_choose_expr(px, struct i915_page_directory *, &__x->pt, \
+	(void)0))

total: 0 errors, 0 warnings, 2 checks, 368 lines checked
3d125ca5b87e drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc
-:370: WARNING:LINE_SPACING: Missing a blank line after declarations
#370: FILE: drivers/gpu/drm/i915/i915_gem_gtt.c:1194:
+	dma_addr_t addr = sg_dma_address(sg);
+	return (struct sgt_dma) { sg, addr, addr + sg->length };

total: 0 errors, 1 warnings, 0 checks, 717 lines checked
ce67216bdcb8 drm/i915/gtt: Markup i915_ppgtt depth
4eeec22243cc drm/i915/gtt: Compute the radix for gen8 page table levels
04fdd50e75e5 drm/i915/gtt: Convert vm->scratch into an array
180600ec7746 drm/i915/gtt: Use NULL to encode scratch shadow entries
71da10070ceb drm/i915/gtt: Recursive cleanup for gen8
7537f8c4b528 drm/i915/gtt: Recursive ppgtt clear for gen8
22c1df3cb640 drm/i915/gtt: Recursive ppgtt alloc for gen8
53d77f98424d drm/i915/gtt: Tidy up ppgtt insertion for gen8
-:235: WARNING:LONG_LINE: line over 100 characters
#235: FILE: drivers/gpu/drm/i915/i915_gem_gtt.c:1328:
+							    i915_pd_entry(pml4, gen8_pd_index(idx, 3)),

total: 0 errors, 1 warnings, 0 checks, 336 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* ✗ Fi.CI.SPARSE: warning for series starting with [01/11] drm/i915/gtt: Use shallow dma pages for scratch
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
                   ` (11 preceding siblings ...)
  2019-07-07 21:41 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/11] drm/i915/gtt: Use shallow dma pages for scratch Patchwork
@ 2019-07-07 21:46 ` Patchwork
  2019-07-07 22:00 ` ✓ Fi.CI.BAT: success " Patchwork
  13 siblings, 0 replies; 39+ messages in thread
From: Patchwork @ 2019-07-07 21:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/11] drm/i915/gtt: Use shallow dma pages for scratch
URL   : https://patchwork.freedesktop.org/series/63329/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915/gtt: Use shallow dma pages for scratch
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1367:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1367:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1416:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1416:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1480:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1480:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1347:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1347:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1396:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1396:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1460:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1460:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1763:44: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1763:44: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1845:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1845:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1743:44: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1743:44: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1825:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1825:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:871:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:871:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:892:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:892:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:923:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:923:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:868:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:868:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:888:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:888:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:918:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:918:9: warning: expression using sizeof(void)

Commit: drm/i915/gtt: Wrap page_table with page_directory
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:888:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:888:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:918:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:918:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:880:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:880:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:910:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:910:9: warning: expression using sizeof(void)

Commit: drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1339:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1339:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1388:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1388:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1452:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1452:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1008:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1008:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1029:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1029:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1078:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1078:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1142:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1142:9: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:1008:9: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:1008:9: warning: expression using sizeof(void)

Commit: drm/i915/gtt: Markup i915_ppgtt depth
Okay!

Commit: drm/i915/gtt: Compute the radix for gen8 page table levels
+drivers/gpu/drm/i915/i915_gem_gtt.c:1017:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1017:9: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:1017:9: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:1017:9: warning: expression using sizeof(void)

Commit: drm/i915/gtt: Convert vm->scratch into an array
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1017:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1017:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1047:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1047:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1068:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1068:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1117:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1117:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1181:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1181:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1016:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1016:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1046:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1046:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1066:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1066:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1115:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1115:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1179:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1179:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1777:44: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1777:44: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1859:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1859:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1754:44: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1754:44: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1836:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1836:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:997:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:997:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:996:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:996:9: warning: expression using sizeof(void)

Commit: drm/i915/gtt: Use NULL to encode scratch shadow entries
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1016:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1016:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1046:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1046:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1066:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1066:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1115:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1115:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1179:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1179:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1003:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1003:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1031:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1031:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1049:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1049:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1098:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1098:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1162:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1162:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:996:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:996:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:985:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:985:9: warning: expression using sizeof(void)

Commit: drm/i915/gtt: Recursive cleanup for gen8
-drivers/gpu/drm/i915/i915_gem_gtt.c:973:9: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:973:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1508:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1508:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1477:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1477:9: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:348:14: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:348:14: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:348:14: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:348:14: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:973:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:973:9: warning: expression using sizeof(void)

Commit: drm/i915/gtt: Recursive ppgtt clear for gen8
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1001:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1001:9: warning: expression using sizeof(void)
+
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:955:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:955:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:973:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:973:9: warning: expression using sizeof(void)
+Error in reading or end of file.

Commit: drm/i915/gtt: Recursive ppgtt alloc for gen8
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1043:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1043:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1092:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1092:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1156:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1156:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1501:9: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem_gtt.c:1501:9: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:1093:20: warning: context imbalance in '__gen8_ppgtt_alloc' - different lock contexts for basic block
-drivers/gpu/drm/i915/i915_gem_gtt.c:354:14: warning: expression using sizeof(void)
-drivers/gpu/drm/i915/i915_gem_gtt.c:354:14: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:354:14: warning: expression using sizeof(void)
+drivers/gpu/drm/i915/i915_gem_gtt.c:354:14: warning: expression using sizeof(void)

Commit: drm/i915/gtt: Tidy up ppgtt insertion for gen8
Okay!

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/11] drm/i915/gtt: Use shallow dma pages for scratch
  2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
                   ` (12 preceding siblings ...)
  2019-07-07 21:46 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-07-07 22:00 ` Patchwork
  13 siblings, 0 replies; 39+ messages in thread
From: Patchwork @ 2019-07-07 22:00 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/11] drm/i915/gtt: Use shallow dma pages for scratch
URL   : https://patchwork.freedesktop.org/series/63329/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6428 -> Patchwork_13555
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13555/

Known issues
------------

  Here are the changes found in Patchwork_13555 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-icl-u3:          [PASS][1] -> [DMESG-WARN][2] ([fdo#107724])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6428/fi-icl-u3/igt@gem_exec_suspend@basic-s3.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13555/fi-icl-u3/igt@gem_exec_suspend@basic-s3.html

  * igt@i915_selftest@live_hangcheck:
    - fi-kbl-guc:         [PASS][3] -> [DMESG-WARN][4] ([fdo#111074])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6428/fi-kbl-guc/igt@i915_selftest@live_hangcheck.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13555/fi-kbl-guc/igt@i915_selftest@live_hangcheck.html

  
#### Possible fixes ####

  * igt@i915_selftest@live_hangcheck:
    - fi-kbl-8809g:       [DMESG-WARN][5] ([fdo#111074]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6428/fi-kbl-8809g/igt@i915_selftest@live_hangcheck.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13555/fi-kbl-8809g/igt@i915_selftest@live_hangcheck.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][7] ([fdo#109485]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6428/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13555/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  * igt@kms_frontbuffer_tracking@basic:
    - {fi-icl-u4}:        [FAIL][9] ([fdo#103167]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6428/fi-icl-u4/igt@kms_frontbuffer_tracking@basic.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13555/fi-icl-u4/igt@kms_frontbuffer_tracking@basic.html
    - fi-hsw-peppy:       [DMESG-WARN][11] ([fdo#102614]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6428/fi-hsw-peppy/igt@kms_frontbuffer_tracking@basic.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13555/fi-hsw-peppy/igt@kms_frontbuffer_tracking@basic.html
    - fi-icl-u2:          [FAIL][13] ([fdo#103167]) -> [PASS][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6428/fi-icl-u2/igt@kms_frontbuffer_tracking@basic.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13555/fi-icl-u2/igt@kms_frontbuffer_tracking@basic.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#109485]: https://bugs.freedesktop.org/show_bug.cgi?id=109485
  [fdo#111074]: https://bugs.freedesktop.org/show_bug.cgi?id=111074


Participating hosts (53 -> 45)
------------------------------

  Missing    (8): fi-kbl-soraka fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_6428 -> Patchwork_13555

  CI_DRM_6428: b48155ed6d4d6bdfb48ef317f8da109c11c98110 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5088: 3356087442806675438319578f1c964e51ee4965 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13555: 53d77f98424de3110c4f0f51c9ba2d180db7b2f8 @ git://anongit.freedesktop.org/gfx-ci/linux


== Kernel 32bit build ==

Warning: Kernel 32bit buildtest failed:
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13555/build_32bit.log

  CALL    scripts/checksyscalls.sh
  CALL    scripts/atomic/check-atomics.sh
  CHK     include/generated/compile.h
Kernel: arch/x86/boot/bzImage is ready  (#1)
  Building modules, stage 2.
  MODPOST 112 modules
ERROR: "__udivdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
ERROR: "__divdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
scripts/Makefile.modpost:91: recipe for target '__modpost' failed
make[1]: *** [__modpost] Error 1
Makefile:1287: recipe for target 'modules' failed
make: *** [modules] Error 2


== Linux commits ==

53d77f98424d drm/i915/gtt: Tidy up ppgtt insertion for gen8
22c1df3cb640 drm/i915/gtt: Recursive ppgtt alloc for gen8
7537f8c4b528 drm/i915/gtt: Recursive ppgtt clear for gen8
71da10070ceb drm/i915/gtt: Recursive cleanup for gen8
180600ec7746 drm/i915/gtt: Use NULL to encode scratch shadow entries
04fdd50e75e5 drm/i915/gtt: Convert vm->scratch into an array
4eeec22243cc drm/i915/gtt: Compute the radix for gen8 page table levels
ce67216bdcb8 drm/i915/gtt: Markup i915_ppgtt depth
3d125ca5b87e drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc
627dff785897 drm/i915/gtt: Wrap page_table with page_directory
46b4d072b194 drm/i915/gtt: Use shallow dma pages for scratch

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13555/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch
  2019-07-07 21:00 ` [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch Chris Wilson
@ 2019-07-09 12:24   ` Mika Kuoppala
  2019-07-09 12:29     ` Chris Wilson
  0 siblings, 1 reply; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-09 12:24 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We only use the dma pages for scratch, and so do not need to allocate
> the extra storage for the shadow page directory.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 192 ++++++++++++----------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |   6 +-
>  2 files changed, 85 insertions(+), 113 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 236c964dd761..937236913e70 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -594,25 +594,17 @@ static void cleanup_page_dma(struct i915_address_space *vm,
>  
>  #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
>  
> -#define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
> -#define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
> +#define fill_px(px, v) fill_page_dma(px_base(px), (v))
> +#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v))
>  
> -static void fill_page_dma(struct i915_address_space *vm,
> -			  struct i915_page_dma *p,
> -			  const u64 val)
> +static void fill_page_dma(struct i915_page_dma *p, const u64 val)
>  {
> -	u64 * const vaddr = kmap_atomic(p->page);
> -
> -	memset64(vaddr, val, PAGE_SIZE / sizeof(val));
> -
> -	kunmap_atomic(vaddr);
> +	kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES));

Neat.

I would go for 512 instead of I915_PDES. There is no magic
and there never will be magic as it is as const as carved into stone.

>  }
>  
> -static void fill_page_dma_32(struct i915_address_space *vm,
> -			     struct i915_page_dma *p,
> -			     const u32 v)
> +static void fill_page_dma_32(struct i915_page_dma *p, const u32 v)
>  {
> -	fill_page_dma(vm, p, (u64)v << 32 | v);
> +	fill_page_dma(p, (u64)v << 32 | v);
>  }
>  
>  static int
> @@ -687,6 +679,21 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
>  	__free_pages(p->page, order);
>  }
>  
> +static void free_scratch(struct i915_address_space *vm)
> +{
> +	if (!vm->scratch_page.daddr) /* set to 0 on clones */
> +		return;
> +
> +	if (vm->scratch_pdp.daddr)
> +		cleanup_page_dma(vm, &vm->scratch_pdp);
> +	if (vm->scratch_pd.daddr)
> +		cleanup_page_dma(vm, &vm->scratch_pd);
> +	if (vm->scratch_pt.daddr)
> +		cleanup_page_dma(vm, &vm->scratch_pt);
> +
> +	cleanup_scratch_page(vm);
> +}
> +
>  static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
>  {
>  	struct i915_page_table *pt;
> @@ -711,18 +718,6 @@ static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
>  	kfree(pt);
>  }
>  
> -static void gen8_initialize_pt(struct i915_address_space *vm,
> -			       struct i915_page_table *pt)
> -{
> -	fill_px(vm, pt, vm->scratch_pte);
> -}
> -
> -static void gen6_initialize_pt(struct i915_address_space *vm,
> -			       struct i915_page_table *pt)
> -{
> -	fill32_px(vm, pt, vm->scratch_pte);
> -}
> -
>  static struct i915_page_directory *__alloc_pd(void)
>  {
>  	struct i915_page_directory *pd;
> @@ -765,9 +760,11 @@ static void free_pd(struct i915_address_space *vm,
>  	kfree(pd);
>  }
>  
> -#define init_pd(vm, pd, to) {					\
> -	fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \
> -	memset_p((pd)->entry, (to), 512);				\
> +static void init_pd(struct i915_page_directory *pd,
> +		    struct i915_page_dma *scratch)
> +{
> +	fill_px(pd, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC));
> +	memset_p(pd->entry, scratch, 512);
>  }
>  
>  static inline void
> @@ -869,12 +866,11 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  	u32 pde;
>  
>  	gen8_for_each_pde(pt, pd, start, length, pde) {
> -		GEM_BUG_ON(pt == vm->scratch_pt);
> +		GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
>  
>  		atomic_inc(&pt->used);
>  		gen8_ppgtt_clear_pt(vm, pt, start, length);
> -		if (release_pd_entry(pd, pde, &pt->used,
> -				     px_base(vm->scratch_pt)))
> +		if (release_pd_entry(pd, pde, &pt->used, &vm->scratch_pt))
>  			free_pt(vm, pt);
>  	}
>  }
> @@ -890,12 +886,11 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>  	unsigned int pdpe;
>  
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		GEM_BUG_ON(pd == vm->scratch_pd);
> +		GEM_BUG_ON(px_base(pd) == &vm->scratch_pd);

Perhaps future will bring pd_points_scratch(pd).

Now the intriguing, bordering irritating, question in my mind is
that can we fold the scratch_pd and scratch_pdp to be the same thing.

Patch lgtm with some dislike towards I915_PDES,

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>  
>  		atomic_inc(&pd->used);
>  		gen8_ppgtt_clear_pd(vm, pd, start, length);
> -		if (release_pd_entry(pdp, pdpe, &pd->used,
> -				     px_base(vm->scratch_pd)))
> +		if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd))
>  			free_pd(vm, pd);
>  	}
>  }
> @@ -921,12 +916,11 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
>  	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
>  
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		GEM_BUG_ON(pdp == vm->scratch_pdp);
> +		GEM_BUG_ON(px_base(pdp) == &vm->scratch_pdp);
>  
>  		atomic_inc(&pdp->used);
>  		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
> -		if (release_pd_entry(pml4, pml4e, &pdp->used,
> -				     px_base(vm->scratch_pdp)))
> +		if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp))
>  			free_pd(vm, pdp);
>  	}
>  }
> @@ -1181,7 +1175,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
>  	int i;
>  
>  	for (i = 0; i < I915_PDES; i++) {
> -		if (pd->entry[i] != vm->scratch_pt)
> +		if (pd->entry[i] != &vm->scratch_pt)
>  			free_pt(vm, pd->entry[i]);
>  	}
>  }
> @@ -1218,37 +1212,35 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>  				I915_CACHE_LLC,
>  				vm->has_read_only);
>  
> -	vm->scratch_pt = alloc_pt(vm);
> -	if (IS_ERR(vm->scratch_pt)) {
> -		ret = PTR_ERR(vm->scratch_pt);
> +	if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) {
> +		ret = -ENOMEM;
>  		goto free_scratch_page;
>  	}
> +	fill_page_dma(&vm->scratch_pt, vm->scratch_pte);
>  
> -	vm->scratch_pd = alloc_pd(vm);
> -	if (IS_ERR(vm->scratch_pd)) {
> -		ret = PTR_ERR(vm->scratch_pd);
> +	if (unlikely(setup_page_dma(vm, &vm->scratch_pd))) {
> +		ret = -ENOMEM;
>  		goto free_pt;
>  	}
> +	fill_page_dma(&vm->scratch_pd,
> +		      gen8_pde_encode(vm->scratch_pd.daddr, I915_CACHE_LLC));
>  
>  	if (i915_vm_is_4lvl(vm)) {
> -		vm->scratch_pdp = alloc_pd(vm);
> -		if (IS_ERR(vm->scratch_pdp)) {
> -			ret = PTR_ERR(vm->scratch_pdp);
> +		if (unlikely(setup_page_dma(vm, &vm->scratch_pdp))) {
> +			ret = -ENOMEM;
>  			goto free_pd;
>  		}
> +		fill_page_dma(&vm->scratch_pdp,
> +			      gen8_pde_encode(vm->scratch_pdp.daddr,
> +					      I915_CACHE_LLC));
>  	}
>  
> -	gen8_initialize_pt(vm, vm->scratch_pt);
> -	init_pd(vm, vm->scratch_pd, vm->scratch_pt);
> -	if (i915_vm_is_4lvl(vm))
> -		init_pd(vm, vm->scratch_pdp, vm->scratch_pd);
> -
>  	return 0;
>  
>  free_pd:
> -	free_pd(vm, vm->scratch_pd);
> +	cleanup_page_dma(vm, &vm->scratch_pd);
>  free_pt:
> -	free_pt(vm, vm->scratch_pt);
> +	cleanup_page_dma(vm, &vm->scratch_pt);
>  free_scratch_page:
>  	cleanup_scratch_page(vm);
>  
> @@ -1292,18 +1284,6 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>  	return 0;
>  }
>  
> -static void gen8_free_scratch(struct i915_address_space *vm)
> -{
> -	if (!vm->scratch_page.daddr)
> -		return;
> -
> -	if (i915_vm_is_4lvl(vm))
> -		free_pd(vm, vm->scratch_pdp);
> -	free_pd(vm, vm->scratch_pd);
> -	free_pt(vm, vm->scratch_pt);
> -	cleanup_scratch_page(vm);
> -}
> -
>  static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>  				    struct i915_page_directory *pdp)
>  {
> @@ -1311,7 +1291,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>  	int i;
>  
>  	for (i = 0; i < pdpes; i++) {
> -		if (pdp->entry[i] == vm->scratch_pd)
> +		if (pdp->entry[i] == &vm->scratch_pd)
>  			continue;
>  
>  		gen8_free_page_tables(vm, pdp->entry[i]);
> @@ -1329,7 +1309,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
>  	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
>  		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
>  
> -		if (pdp == ppgtt->vm.scratch_pdp)
> +		if (px_base(pdp) == &ppgtt->vm.scratch_pdp)
>  			continue;
>  
>  		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
> @@ -1351,7 +1331,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
>  	else
>  		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
>  
> -	gen8_free_scratch(vm);
> +	free_scratch(vm);
>  }
>  
>  static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
> @@ -1367,7 +1347,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  	gen8_for_each_pde(pt, pd, start, length, pde) {
>  		const int count = gen8_pte_count(start, length);
>  
> -		if (pt == vm->scratch_pt) {
> +		if (px_base(pt) == &vm->scratch_pt) {
>  			spin_unlock(&pd->lock);
>  
>  			pt = fetch_and_zero(&alloc);
> @@ -1379,10 +1359,10 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  			}
>  
>  			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
> -				gen8_initialize_pt(vm, pt);
> +				fill_px(pt, vm->scratch_pte);
>  
>  			spin_lock(&pd->lock);
> -			if (pd->entry[pde] == vm->scratch_pt) {
> +			if (pd->entry[pde] == &vm->scratch_pt) {
>  				set_pd_entry(pd, pde, pt);
>  			} else {
>  				alloc = pt;
> @@ -1414,7 +1394,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  
>  	spin_lock(&pdp->lock);
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		if (pd == vm->scratch_pd) {
> +		if (px_base(pd) == &vm->scratch_pd) {
>  			spin_unlock(&pdp->lock);
>  
>  			pd = fetch_and_zero(&alloc);
> @@ -1425,10 +1405,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  				goto unwind;
>  			}
>  
> -			init_pd(vm, pd, vm->scratch_pt);
> +			init_pd(pd, &vm->scratch_pt);
>  
>  			spin_lock(&pdp->lock);
> -			if (pdp->entry[pdpe] == vm->scratch_pd) {
> +			if (pdp->entry[pdpe] == &vm->scratch_pd) {
>  				set_pd_entry(pdp, pdpe, pd);
>  			} else {
>  				alloc = pd;
> @@ -1449,7 +1429,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  	goto out;
>  
>  unwind_pd:
> -	if (release_pd_entry(pdp, pdpe, &pd->used, px_base(vm->scratch_pd)))
> +	if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd))
>  		free_pd(vm, pd);
>  unwind:
>  	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
> @@ -1478,7 +1458,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  
>  	spin_lock(&pml4->lock);
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		if (pdp == vm->scratch_pdp) {
> +		if (px_base(pdp) == &vm->scratch_pdp) {
>  			spin_unlock(&pml4->lock);
>  
>  			pdp = fetch_and_zero(&alloc);
> @@ -1489,10 +1469,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  				goto unwind;
>  			}
>  
> -			init_pd(vm, pdp, vm->scratch_pd);
> +			init_pd(pdp, &vm->scratch_pd);
>  
>  			spin_lock(&pml4->lock);
> -			if (pml4->entry[pml4e] == vm->scratch_pdp) {
> +			if (pml4->entry[pml4e] == &vm->scratch_pdp) {
>  				set_pd_entry(pml4, pml4e, pdp);
>  			} else {
>  				alloc = pdp;
> @@ -1513,7 +1493,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  	goto out;
>  
>  unwind_pdp:
> -	if (release_pd_entry(pml4, pml4e, &pdp->used, px_base(vm->scratch_pdp)))
> +	if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp))
>  		free_pd(vm, pdp);
>  unwind:
>  	gen8_ppgtt_clear_4lvl(vm, from, start - from);
> @@ -1537,7 +1517,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
>  		if (IS_ERR(pd))
>  			goto unwind;
>  
> -		init_pd(vm, pd, vm->scratch_pt);
> +		init_pd(pd, &vm->scratch_pt);
>  		set_pd_entry(pdp, pdpe, pd);
>  	}
>  
> @@ -1568,10 +1548,10 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
>  
>  static void init_pd_n(struct i915_address_space *vm,
>  		      struct i915_page_directory *pd,
> -		      struct i915_page_directory *to,
> +		      struct i915_page_dma *to,
>  		      const unsigned int entries)
>  {
> -	const u64 daddr = gen8_pde_encode(px_dma(to), I915_CACHE_LLC);
> +	const u64 daddr = gen8_pde_encode(to->daddr, I915_CACHE_LLC);
>  	u64 * const vaddr = kmap_atomic(pd->base.page);
>  
>  	memset64(vaddr, daddr, entries);
> @@ -1588,7 +1568,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
>  	if (i915_vm_is_4lvl(vm)) {
>  		pd = alloc_pd(vm);
>  		if (!IS_ERR(pd))
> -			init_pd(vm, pd, vm->scratch_pdp);
> +			init_pd(pd, &vm->scratch_pdp);
>  
>  		return pd;
>  	}
> @@ -1605,7 +1585,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
>  		return ERR_PTR(-ENOMEM);
>  	}
>  
> -	init_pd_n(vm, pd, vm->scratch_pd, GEN8_3LVL_PDPES);
> +	init_pd_n(vm, pd, &vm->scratch_pd, GEN8_3LVL_PDPES);
>  
>  	return pd;
>  }
> @@ -1678,7 +1658,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>  err_free_pd:
>  	free_pd(&ppgtt->vm, ppgtt->pd);
>  err_free_scratch:
> -	gen8_free_scratch(&ppgtt->vm);
> +	free_scratch(&ppgtt->vm);
>  err_free:
>  	kfree(ppgtt);
>  	return ERR_PTR(err);
> @@ -1763,7 +1743,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  		const unsigned int count = min(num_entries, GEN6_PTES - pte);
>  		gen6_pte_t *vaddr;
>  
> -		GEM_BUG_ON(pt == vm->scratch_pt);
> +		GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
>  
>  		num_entries -= count;
>  
> @@ -1800,7 +1780,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  	struct sgt_dma iter = sgt_dma(vma);
>  	gen6_pte_t *vaddr;
>  
> -	GEM_BUG_ON(i915_pt_entry(pd, act_pt) == vm->scratch_pt);
> +	GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch_pt);
>  
>  	vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
>  	do {
> @@ -1845,7 +1825,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  	gen6_for_each_pde(pt, pd, start, length, pde) {
>  		const unsigned int count = gen6_pte_count(start, length);
>  
> -		if (pt == vm->scratch_pt) {
> +		if (px_base(pt) == &vm->scratch_pt) {
>  			spin_unlock(&pd->lock);
>  
>  			pt = fetch_and_zero(&alloc);
> @@ -1856,10 +1836,10 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  				goto unwind_out;
>  			}
>  
> -			gen6_initialize_pt(vm, pt);
> +			fill32_px(pt, vm->scratch_pte);
>  
>  			spin_lock(&pd->lock);
> -			if (pd->entry[pde] == vm->scratch_pt) {
> +			if (pd->entry[pde] == &vm->scratch_pt) {
>  				pd->entry[pde] = pt;
>  				if (i915_vma_is_bound(ppgtt->vma,
>  						      I915_VMA_GLOBAL_BIND)) {
> @@ -1908,26 +1888,18 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
>  					 I915_CACHE_NONE,
>  					 PTE_READ_ONLY);
>  
> -	vm->scratch_pt = alloc_pt(vm);
> -	if (IS_ERR(vm->scratch_pt)) {
> +	if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) {
>  		cleanup_scratch_page(vm);
> -		return PTR_ERR(vm->scratch_pt);
> +		return -ENOMEM;
>  	}
> -
> -	gen6_initialize_pt(vm, vm->scratch_pt);
> +	fill_page_dma_32(&vm->scratch_pt, vm->scratch_pte);
>  
>  	gen6_for_all_pdes(unused, pd, pde)
> -		pd->entry[pde] = vm->scratch_pt;
> +		pd->entry[pde] = &vm->scratch_pt;
>  
>  	return 0;
>  }
>  
> -static void gen6_ppgtt_free_scratch(struct i915_address_space *vm)
> -{
> -	free_pt(vm, vm->scratch_pt);
> -	cleanup_scratch_page(vm);
> -}
> -
>  static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
>  {
>  	struct i915_page_directory * const pd = ppgtt->base.pd;
> @@ -1935,7 +1907,7 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
>  	u32 pde;
>  
>  	gen6_for_all_pdes(pt, pd, pde)
> -		if (pt != ppgtt->base.vm.scratch_pt)
> +		if (px_base(pt) != &ppgtt->base.vm.scratch_pt)
>  			free_pt(&ppgtt->base.vm, pt);
>  }
>  
> @@ -1950,7 +1922,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
>  	gen6_ppgtt_free_pd(ppgtt);
> -	gen6_ppgtt_free_scratch(vm);
> +	free_scratch(vm);
>  	kfree(ppgtt->base.pd);
>  }
>  
> @@ -1993,7 +1965,7 @@ static void pd_vma_unbind(struct i915_vma *vma)
>  {
>  	struct gen6_ppgtt *ppgtt = vma->private;
>  	struct i915_page_directory * const pd = ppgtt->base.pd;
> -	struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt;
> +	struct i915_page_dma * const scratch = &ppgtt->base.vm.scratch_pt;
>  	struct i915_page_table *pt;
>  	unsigned int pde;
>  
> @@ -2002,11 +1974,11 @@ static void pd_vma_unbind(struct i915_vma *vma)
>  
>  	/* Free all no longer used page tables */
>  	gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
> -		if (atomic_read(&pt->used) || pt == scratch_pt)
> +		if (px_base(pt) == scratch || atomic_read(&pt->used))
>  			continue;
>  
>  		free_pt(&ppgtt->base.vm, pt);
> -		pd->entry[pde] = scratch_pt;
> +		pd->entry[pde] = scratch;
>  	}
>  
>  	ppgtt->scan_for_unused_pt = false;
> @@ -2148,7 +2120,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
>  	return &ppgtt->base;
>  
>  err_scratch:
> -	gen6_ppgtt_free_scratch(&ppgtt->base.vm);
> +	free_scratch(&ppgtt->base.vm);
>  err_pd:
>  	kfree(ppgtt->base.pd);
>  err_free:
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 57a68ef4eda7..860850411a1b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -304,9 +304,9 @@ struct i915_address_space {
>  	u64 scratch_pte;
>  	int scratch_order;
>  	struct i915_page_dma scratch_page;
> -	struct i915_page_table *scratch_pt;
> -	struct i915_page_directory *scratch_pd;
> -	struct i915_page_directory *scratch_pdp; /* GEN8+ & 48b PPGTT */
> +	struct i915_page_dma scratch_pt;
> +	struct i915_page_dma scratch_pd;
> +	struct i915_page_dma scratch_pdp; /* GEN8+ & 48b PPGTT */
>  
>  	/**
>  	 * List of vma currently bound.
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch
  2019-07-09 12:24   ` Mika Kuoppala
@ 2019-07-09 12:29     ` Chris Wilson
  2019-07-09 12:41       ` Mika Kuoppala
  0 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2019-07-09 12:29 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-07-09 13:24:27)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > We only use the dma pages for scratch, and so do not need to allocate
> > the extra storage for the shadow page directory.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 192 ++++++++++++----------------
> >  drivers/gpu/drm/i915/i915_gem_gtt.h |   6 +-
> >  2 files changed, 85 insertions(+), 113 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 236c964dd761..937236913e70 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -594,25 +594,17 @@ static void cleanup_page_dma(struct i915_address_space *vm,
> >  
> >  #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
> >  
> > -#define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
> > -#define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
> > +#define fill_px(px, v) fill_page_dma(px_base(px), (v))
> > +#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v))
> >  
> > -static void fill_page_dma(struct i915_address_space *vm,
> > -                       struct i915_page_dma *p,
> > -                       const u64 val)
> > +static void fill_page_dma(struct i915_page_dma *p, const u64 val)
> >  {
> > -     u64 * const vaddr = kmap_atomic(p->page);
> > -
> > -     memset64(vaddr, val, PAGE_SIZE / sizeof(val));
> > -
> > -     kunmap_atomic(vaddr);
> > +     kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES));
> 
> Neat.
> 
> I would go for 512 instead of I915_PDES. There is no magic
> and there never will be magic as it is as const as carved into stone.

I was just going with I915_PDES and I915_PDE_MASK throughout. Later this
one becomes count, fwiw.
> 
> >  }
> >  
> > -static void fill_page_dma_32(struct i915_address_space *vm,
> > -                          struct i915_page_dma *p,
> > -                          const u32 v)
> > +static void fill_page_dma_32(struct i915_page_dma *p, const u32 v)
> >  {
> > -     fill_page_dma(vm, p, (u64)v << 32 | v);
> > +     fill_page_dma(p, (u64)v << 32 | v);
> >  }
> >  
> >  static int
> > @@ -687,6 +679,21 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
> >       __free_pages(p->page, order);
> >  }
> >  
> > +static void free_scratch(struct i915_address_space *vm)
> > +{
> > +     if (!vm->scratch_page.daddr) /* set to 0 on clones */
> > +             return;
> > +
> > +     if (vm->scratch_pdp.daddr)
> > +             cleanup_page_dma(vm, &vm->scratch_pdp);
> > +     if (vm->scratch_pd.daddr)
> > +             cleanup_page_dma(vm, &vm->scratch_pd);
> > +     if (vm->scratch_pt.daddr)
> > +             cleanup_page_dma(vm, &vm->scratch_pt);
> > +
> > +     cleanup_scratch_page(vm);
> > +}
> > +
> >  static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
> >  {
> >       struct i915_page_table *pt;
> > @@ -711,18 +718,6 @@ static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
> >       kfree(pt);
> >  }
> >  
> > -static void gen8_initialize_pt(struct i915_address_space *vm,
> > -                            struct i915_page_table *pt)
> > -{
> > -     fill_px(vm, pt, vm->scratch_pte);
> > -}
> > -
> > -static void gen6_initialize_pt(struct i915_address_space *vm,
> > -                            struct i915_page_table *pt)
> > -{
> > -     fill32_px(vm, pt, vm->scratch_pte);
> > -}
> > -
> >  static struct i915_page_directory *__alloc_pd(void)
> >  {
> >       struct i915_page_directory *pd;
> > @@ -765,9 +760,11 @@ static void free_pd(struct i915_address_space *vm,
> >       kfree(pd);
> >  }
> >  
> > -#define init_pd(vm, pd, to) {                                        \
> > -     fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \
> > -     memset_p((pd)->entry, (to), 512);                               \
> > +static void init_pd(struct i915_page_directory *pd,
> > +                 struct i915_page_dma *scratch)
> > +{
> > +     fill_px(pd, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC));
> > +     memset_p(pd->entry, scratch, 512);
> >  }
> >  
> >  static inline void
> > @@ -869,12 +866,11 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
> >       u32 pde;
> >  
> >       gen8_for_each_pde(pt, pd, start, length, pde) {
> > -             GEM_BUG_ON(pt == vm->scratch_pt);
> > +             GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
> >  
> >               atomic_inc(&pt->used);
> >               gen8_ppgtt_clear_pt(vm, pt, start, length);
> > -             if (release_pd_entry(pd, pde, &pt->used,
> > -                                  px_base(vm->scratch_pt)))
> > +             if (release_pd_entry(pd, pde, &pt->used, &vm->scratch_pt))
> >                       free_pt(vm, pt);
> >       }
> >  }
> > @@ -890,12 +886,11 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
> >       unsigned int pdpe;
> >  
> >       gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> > -             GEM_BUG_ON(pd == vm->scratch_pd);
> > +             GEM_BUG_ON(px_base(pd) == &vm->scratch_pd);
> 
> Perhaps future will bring pd_points_scratch(pd).
> 
> Now the intriguing, bordering irritating, question in my mind is
> that can we fold the scratch_pd and scratch_pdp to be the same thing.

No, we can fold the scratch_pd to be the same (dma wise) as they do need
to end up at the scratch_pte. And sadly we can't use the scratch_pte as
the filler for scratch_pd.

> Patch lgtm with some dislike towards I915_PDES,

I'm not keen on it tbh. But the mix of alternating between 512/0x1ff
does suggest to use some name.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch
  2019-07-09 12:29     ` Chris Wilson
@ 2019-07-09 12:41       ` Mika Kuoppala
  0 siblings, 0 replies; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-09 12:41 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-07-09 13:24:27)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > We only use the dma pages for scratch, and so do not need to allocate
>> > the extra storage for the shadow page directory.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> > ---
>> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 192 ++++++++++++----------------
>> >  drivers/gpu/drm/i915/i915_gem_gtt.h |   6 +-
>> >  2 files changed, 85 insertions(+), 113 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> > index 236c964dd761..937236913e70 100644
>> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> > @@ -594,25 +594,17 @@ static void cleanup_page_dma(struct i915_address_space *vm,
>> >  
>> >  #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
>> >  
>> > -#define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
>> > -#define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
>> > +#define fill_px(px, v) fill_page_dma(px_base(px), (v))
>> > +#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v))
>> >  
>> > -static void fill_page_dma(struct i915_address_space *vm,
>> > -                       struct i915_page_dma *p,
>> > -                       const u64 val)
>> > +static void fill_page_dma(struct i915_page_dma *p, const u64 val)
>> >  {
>> > -     u64 * const vaddr = kmap_atomic(p->page);
>> > -
>> > -     memset64(vaddr, val, PAGE_SIZE / sizeof(val));
>> > -
>> > -     kunmap_atomic(vaddr);
>> > +     kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES));
>> 
>> Neat.
>> 
>> I would go for 512 instead of I915_PDES. There is no magic
>> and there never will be magic as it is as const as carved into stone.
>
> I was just going with I915_PDES and I915_PDE_MASK throughout. Later this
> one becomes count, fwiw.
>> 
>> >  }
>> >  
>> > -static void fill_page_dma_32(struct i915_address_space *vm,
>> > -                          struct i915_page_dma *p,
>> > -                          const u32 v)
>> > +static void fill_page_dma_32(struct i915_page_dma *p, const u32 v)
>> >  {
>> > -     fill_page_dma(vm, p, (u64)v << 32 | v);
>> > +     fill_page_dma(p, (u64)v << 32 | v);
>> >  }
>> >  
>> >  static int
>> > @@ -687,6 +679,21 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
>> >       __free_pages(p->page, order);
>> >  }
>> >  
>> > +static void free_scratch(struct i915_address_space *vm)
>> > +{
>> > +     if (!vm->scratch_page.daddr) /* set to 0 on clones */
>> > +             return;
>> > +
>> > +     if (vm->scratch_pdp.daddr)
>> > +             cleanup_page_dma(vm, &vm->scratch_pdp);
>> > +     if (vm->scratch_pd.daddr)
>> > +             cleanup_page_dma(vm, &vm->scratch_pd);
>> > +     if (vm->scratch_pt.daddr)
>> > +             cleanup_page_dma(vm, &vm->scratch_pt);
>> > +
>> > +     cleanup_scratch_page(vm);
>> > +}
>> > +
>> >  static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
>> >  {
>> >       struct i915_page_table *pt;
>> > @@ -711,18 +718,6 @@ static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
>> >       kfree(pt);
>> >  }
>> >  
>> > -static void gen8_initialize_pt(struct i915_address_space *vm,
>> > -                            struct i915_page_table *pt)
>> > -{
>> > -     fill_px(vm, pt, vm->scratch_pte);
>> > -}
>> > -
>> > -static void gen6_initialize_pt(struct i915_address_space *vm,
>> > -                            struct i915_page_table *pt)
>> > -{
>> > -     fill32_px(vm, pt, vm->scratch_pte);
>> > -}
>> > -
>> >  static struct i915_page_directory *__alloc_pd(void)
>> >  {
>> >       struct i915_page_directory *pd;
>> > @@ -765,9 +760,11 @@ static void free_pd(struct i915_address_space *vm,
>> >       kfree(pd);
>> >  }
>> >  
>> > -#define init_pd(vm, pd, to) {                                        \
>> > -     fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \
>> > -     memset_p((pd)->entry, (to), 512);                               \
>> > +static void init_pd(struct i915_page_directory *pd,
>> > +                 struct i915_page_dma *scratch)
>> > +{
>> > +     fill_px(pd, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC));
>> > +     memset_p(pd->entry, scratch, 512);
>> >  }
>> >  
>> >  static inline void
>> > @@ -869,12 +866,11 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>> >       u32 pde;
>> >  
>> >       gen8_for_each_pde(pt, pd, start, length, pde) {
>> > -             GEM_BUG_ON(pt == vm->scratch_pt);
>> > +             GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
>> >  
>> >               atomic_inc(&pt->used);
>> >               gen8_ppgtt_clear_pt(vm, pt, start, length);
>> > -             if (release_pd_entry(pd, pde, &pt->used,
>> > -                                  px_base(vm->scratch_pt)))
>> > +             if (release_pd_entry(pd, pde, &pt->used, &vm->scratch_pt))
>> >                       free_pt(vm, pt);
>> >       }
>> >  }
>> > @@ -890,12 +886,11 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>> >       unsigned int pdpe;
>> >  
>> >       gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
>> > -             GEM_BUG_ON(pd == vm->scratch_pd);
>> > +             GEM_BUG_ON(px_base(pd) == &vm->scratch_pd);
>> 
>> Perhaps future will bring pd_points_scratch(pd).
>> 
>> Now the intriguing, bordering irritating, question in my mind is
>> that can we fold the scratch_pd and scratch_pdp to be the same thing.
>
> No, we can fold the scratch_pd to be the same (dma wise) as they do need
> to end up at the scratch_pte. And sadly we can't use the scratch_pte as
> the filler for scratch_pd.

Oh indeed. it needs to be hierarchy even on upper level to break
out. *blush*

>
>> Patch lgtm with some dislike towards I915_PDES,
>
> I'm not keen on it tbh. But the mix of alternating between 512/0x1ff
> does suggest to use some name.

512 everywhere.
-Mika
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 02/11] drm/i915/gtt: Wrap page_table with page_directory
  2019-07-07 21:00 ` [PATCH 02/11] drm/i915/gtt: Wrap page_table with page_directory Chris Wilson
@ 2019-07-09 14:43   ` Mika Kuoppala
  2019-07-09 14:46     ` Chris Wilson
  0 siblings, 1 reply; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-09 14:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The page directory extends the page table with the shadow entries. Make
> the page directory struct embed the page table for easier code reuse.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gt/intel_ringbuffer.c |   2 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c        | 100 ++++++++++-----------
>  drivers/gpu/drm/i915/i915_gem_gtt.h        |  31 +++++--
>  3 files changed, 70 insertions(+), 63 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> index b33cfc56f623..9163b5238082 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
> @@ -1503,7 +1503,7 @@ static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt)
>  
>  	*cs++ = MI_LOAD_REGISTER_IMM(1);
>  	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
> -	*cs++ = ppgtt->pd->base.ggtt_offset << 10;
> +	*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
>  
>  	intel_ring_advance(rq, cs);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 937236913e70..1fa93f56792e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -107,6 +107,8 @@
>   *
>   */
>  
> +#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
> +

This remains unused.

>  static int
>  i915_get_ggtt_vma_pages(struct i915_vma *vma);
>  
> @@ -712,28 +714,17 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
>  	return pt;
>  }
>  
> -static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
> -{
> -	cleanup_page_dma(vm, &pt->base);
> -	kfree(pt);
> -}
> -
>  static struct i915_page_directory *__alloc_pd(void)
>  {
>  	struct i915_page_directory *pd;
>  
>  	pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
> -
>  	if (unlikely(!pd))
>  		return NULL;
>  
> -	memset(&pd->base, 0, sizeof(pd->base));
> -	atomic_set(&pd->used, 0);
> +	atomic_set(px_used(pd), 0);
>  	spin_lock_init(&pd->lock);
>  
> -	/* for safety */
> -	pd->entry[0] = NULL;

Perhaps yes this is superfluous as we (almost) always init after this,
or in the next call to vm. The blowout would be clearer if we forget,
but can live without.

> -
>  	return pd;
>  }
>  
> @@ -745,7 +736,7 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
>  	if (unlikely(!pd))
>  		return ERR_PTR(-ENOMEM);
>  
> -	if (unlikely(setup_page_dma(vm, &pd->base))) {
> +	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
>  		kfree(pd);
>  		return ERR_PTR(-ENOMEM);
>  	}
> @@ -753,13 +744,14 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
>  	return pd;
>  }
>  
> -static void free_pd(struct i915_address_space *vm,
> -		    struct i915_page_directory *pd)
> +static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
>  {
> -	cleanup_page_dma(vm, &pd->base);
> +	cleanup_page_dma(vm, pd);
>  	kfree(pd);
>  }
>  
> +#define free_px(vm, px) free_pd(vm, px_base(px))
> +
>  static void init_pd(struct i915_page_directory *pd,
>  		    struct i915_page_dma *scratch)
>  {
> @@ -784,9 +776,9 @@ __set_pd_entry(struct i915_page_directory * const pd,
>  	       struct i915_page_dma * const to,
>  	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
>  {
> -	GEM_BUG_ON(atomic_read(&pd->used) > 512);
> +	GEM_BUG_ON(atomic_read(px_used(pd)) > 512);

In hindsight I should have added comment here to explain why not >= 512
but not a problem with this patch.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>  
> -	atomic_inc(&pd->used);
> +	atomic_inc(px_used(pd));
>  	pd->entry[pde] = to;
>  	write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
>  }
> @@ -797,11 +789,11 @@ __clear_pd_entry(struct i915_page_directory * const pd,
>  		 struct i915_page_dma * const to,
>  		 u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
>  {
> -	GEM_BUG_ON(atomic_read(&pd->used) == 0);
> +	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
>  
>  	write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
>  	pd->entry[pde] = to;
> -	atomic_dec(&pd->used);
> +	atomic_dec(px_used(pd));
>  }
>  
>  #define set_pd_entry(pd, pde, to) \
> @@ -813,13 +805,13 @@ __clear_pd_entry(struct i915_page_directory * const pd,
>  static bool
>  release_pd_entry(struct i915_page_directory * const pd,
>  		 const unsigned short pde,
> -		 atomic_t *counter,
> +		 struct i915_page_table * const pt,
>  		 struct i915_page_dma * const scratch)
>  {
>  	bool free = false;
>  
>  	spin_lock(&pd->lock);
> -	if (atomic_dec_and_test(counter)) {
> +	if (atomic_dec_and_test(&pt->used)) {
>  		clear_pd_entry(pd, pde, scratch);
>  		free = true;
>  	}
> @@ -870,8 +862,8 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  
>  		atomic_inc(&pt->used);
>  		gen8_ppgtt_clear_pt(vm, pt, start, length);
> -		if (release_pd_entry(pd, pde, &pt->used, &vm->scratch_pt))
> -			free_pt(vm, pt);
> +		if (release_pd_entry(pd, pde, pt, &vm->scratch_pt))
> +			free_px(vm, pt);
>  	}
>  }
>  
> @@ -888,10 +880,10 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
>  		GEM_BUG_ON(px_base(pd) == &vm->scratch_pd);
>  
> -		atomic_inc(&pd->used);
> +		atomic_inc(px_used(pd));
>  		gen8_ppgtt_clear_pd(vm, pd, start, length);
> -		if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd))
> -			free_pd(vm, pd);
> +		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
> +			free_px(vm, pd);
>  	}
>  }
>  
> @@ -918,10 +910,10 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
>  		GEM_BUG_ON(px_base(pdp) == &vm->scratch_pdp);
>  
> -		atomic_inc(&pdp->used);
> +		atomic_inc(px_used(pdp));
>  		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
> -		if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp))
> -			free_pd(vm, pdp);
> +		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
> +			free_px(vm, pdp);
>  	}
>  }
>  
> @@ -1176,7 +1168,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
>  
>  	for (i = 0; i < I915_PDES; i++) {
>  		if (pd->entry[i] != &vm->scratch_pt)
> -			free_pt(vm, pd->entry[i]);
> +			free_pd(vm, pd->entry[i]);
>  	}
>  }
>  
> @@ -1255,9 +1247,9 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>  	int i;
>  
>  	if (create)
> -		atomic_inc(&ppgtt->pd->used); /* never remove */
> +		atomic_inc(px_used(ppgtt->pd)); /* never remove */
>  	else
> -		atomic_dec(&ppgtt->pd->used);
> +		atomic_dec(px_used(ppgtt->pd));
>  
>  	if (i915_vm_is_4lvl(vm)) {
>  		const u64 daddr = px_dma(ppgtt->pd);
> @@ -1298,7 +1290,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>  		free_pd(vm, pdp->entry[i]);
>  	}
>  
> -	free_pd(vm, pdp);
> +	free_px(vm, pdp);
>  }
>  
>  static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
> @@ -1315,7 +1307,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
>  		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
>  	}
>  
> -	free_pd(&ppgtt->vm, pml4);
> +	free_px(&ppgtt->vm, pml4);
>  }
>  
>  static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
> @@ -1379,7 +1371,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
>  out:
>  	if (alloc)
> -		free_pt(vm, alloc);
> +		free_px(vm, alloc);
>  	return ret;
>  }
>  
> @@ -1415,7 +1407,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  				pd = pdp->entry[pdpe];
>  			}
>  		}
> -		atomic_inc(&pd->used);
> +		atomic_inc(px_used(pd));
>  		spin_unlock(&pdp->lock);
>  
>  		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
> @@ -1423,19 +1415,19 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  			goto unwind_pd;
>  
>  		spin_lock(&pdp->lock);
> -		atomic_dec(&pd->used);
> +		atomic_dec(px_used(pd));
>  	}
>  	spin_unlock(&pdp->lock);
>  	goto out;
>  
>  unwind_pd:
> -	if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd))
> -		free_pd(vm, pd);
> +	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
> +		free_px(vm, pd);
>  unwind:
>  	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
>  out:
>  	if (alloc)
> -		free_pd(vm, alloc);
> +		free_px(vm, alloc);
>  	return ret;
>  }
>  
> @@ -1479,7 +1471,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  				pdp = pml4->entry[pml4e];
>  			}
>  		}
> -		atomic_inc(&pdp->used);
> +		atomic_inc(px_used(pdp));
>  		spin_unlock(&pml4->lock);
>  
>  		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
> @@ -1487,19 +1479,19 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  			goto unwind_pdp;
>  
>  		spin_lock(&pml4->lock);
> -		atomic_dec(&pdp->used);
> +		atomic_dec(px_used(pdp));
>  	}
>  	spin_unlock(&pml4->lock);
>  	goto out;
>  
>  unwind_pdp:
> -	if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp))
> -		free_pd(vm, pdp);
> +	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
> +		free_px(vm, pdp);
>  unwind:
>  	gen8_ppgtt_clear_4lvl(vm, from, start - from);
>  out:
>  	if (alloc)
> -		free_pd(vm, alloc);
> +		free_px(vm, alloc);
>  	return ret;
>  }
>  
> @@ -1525,7 +1517,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
>  
>  unwind:
>  	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
> -	atomic_set(&pdp->used, 0);
> +	atomic_set(px_used(pdp), 0);
>  	return -ENOMEM;
>  }
>  
> @@ -1552,7 +1544,7 @@ static void init_pd_n(struct i915_address_space *vm,
>  		      const unsigned int entries)
>  {
>  	const u64 daddr = gen8_pde_encode(to->daddr, I915_CACHE_LLC);
> -	u64 * const vaddr = kmap_atomic(pd->base.page);
> +	u64 * const vaddr = kmap_atomic_px(pd);
>  
>  	memset64(vaddr, daddr, entries);
>  	kunmap_atomic(vaddr);
> @@ -1580,7 +1572,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
>  
>  	pd->entry[GEN8_3LVL_PDPES] = NULL;
>  
> -	if (unlikely(setup_page_dma(vm, &pd->base))) {
> +	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
>  		kfree(pd);
>  		return ERR_PTR(-ENOMEM);
>  	}
> @@ -1656,7 +1648,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
>  	return ppgtt;
>  
>  err_free_pd:
> -	free_pd(&ppgtt->vm, ppgtt->pd);
> +	free_px(&ppgtt->vm, ppgtt->pd);
>  err_free_scratch:
>  	free_scratch(&ppgtt->vm);
>  err_free:
> @@ -1867,7 +1859,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  	gen6_ppgtt_clear_range(vm, from, start - from);
>  out:
>  	if (alloc)
> -		free_pt(vm, alloc);
> +		free_px(vm, alloc);
>  	intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
>  	return ret;
>  }
> @@ -1908,7 +1900,7 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
>  
>  	gen6_for_all_pdes(pt, pd, pde)
>  		if (px_base(pt) != &ppgtt->base.vm.scratch_pt)
> -			free_pt(&ppgtt->base.vm, pt);
> +			free_px(&ppgtt->base.vm, pt);
>  }
>  
>  static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
> @@ -1949,7 +1941,7 @@ static int pd_vma_bind(struct i915_vma *vma,
>  	struct i915_page_table *pt;
>  	unsigned int pde;
>  
> -	ppgtt->base.pd->base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
> +	px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
>  	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
>  
>  	gen6_for_all_pdes(pt, ppgtt->base.pd, pde)
> @@ -1977,7 +1969,7 @@ static void pd_vma_unbind(struct i915_vma *vma)
>  		if (px_base(pt) == scratch || atomic_read(&pt->used))
>  			continue;
>  
> -		free_pt(&ppgtt->base.vm, pt);
> +		free_px(&ppgtt->base.vm, pt);
>  		pd->entry[pde] = scratch;
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 860850411a1b..48bb8c5125e3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -240,21 +240,37 @@ struct i915_page_dma {
>  	};
>  };
>  
> -#define px_base(px) (&(px)->base)
> -#define px_dma(px) (px_base(px)->daddr)
> -
>  struct i915_page_table {
>  	struct i915_page_dma base;
>  	atomic_t used;
>  };
>  
>  struct i915_page_directory {
> -	struct i915_page_dma base;
> -	atomic_t used;
> +	struct i915_page_table pt;
>  	spinlock_t lock;
>  	void *entry[512];
>  };
>  
> +#define __px_choose_expr(x, type, expr, other) \
> +	__builtin_choose_expr( \
> +	__builtin_types_compatible_p(typeof(x), type) || \
> +	__builtin_types_compatible_p(typeof(x), const type), \
> +	({ type __x = (type)(x); expr; }), \
> +	other)
> +
> +#define px_base(px) \
> +	__px_choose_expr(px, struct i915_page_dma *, __x, \
> +	__px_choose_expr(px, struct i915_page_table *, &__x->base, \
> +	__px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
> +	(void)0)))
> +#define px_dma(px) (px_base(px)->daddr)
> +
> +#define px_pt(px) \
> +	__px_choose_expr(px, struct i915_page_table *, __x, \
> +	__px_choose_expr(px, struct i915_page_directory *, &__x->pt, \
> +	(void)0))
> +#define px_used(px) (&px_pt(px)->used)
> +
>  struct i915_vma_ops {
>  	/* Map an object into an address space with the given cache flags. */
>  	int (*bind_vma)(struct i915_vma *vma,
> @@ -589,10 +605,9 @@ static inline u64 gen8_pte_count(u64 address, u64 length)
>  static inline dma_addr_t
>  i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
>  {
> -	struct i915_page_directory *pd;
> +	struct i915_page_dma *pt = ppgtt->pd->entry[n];
>  
> -	pd = i915_pdp_entry(ppgtt->pd, n);
> -	return px_dma(pd);
> +	return px_dma(pt);
>  }
>  
>  static inline struct i915_ggtt *
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 02/11] drm/i915/gtt: Wrap page_table with page_directory
  2019-07-09 14:43   ` Mika Kuoppala
@ 2019-07-09 14:46     ` Chris Wilson
  0 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2019-07-09 14:46 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-07-09 15:43:15)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> >  static struct i915_page_directory *__alloc_pd(void)
> >  {
> >       struct i915_page_directory *pd;
> >  
> >       pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
> > -
> >       if (unlikely(!pd))
> >               return NULL;
> >  
> > -     memset(&pd->base, 0, sizeof(pd->base));
> > -     atomic_set(&pd->used, 0);
> > +     atomic_set(px_used(pd), 0);
> >       spin_lock_init(&pd->lock);
> >  
> > -     /* for safety */
> > -     pd->entry[0] = NULL;
> 
> Perhaps yes this is superfluous as we (almost) always init after this,
> or in the next call to vm. The blowout would be clearer if we forget,
> but can live without.

Further down the line, we don't even allocate it if we don't use it :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 03/11] drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc
  2019-07-07 21:00 ` [PATCH 03/11] drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc Chris Wilson
@ 2019-07-09 14:59   ` Mika Kuoppala
  0 siblings, 0 replies; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-09 14:59 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> In preparation for refactoring the free/clear/alloc, first move the code
> around so that we can avoid forward declarations in the next set of
> patches.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Diff got funky at some point but after applying
end result looked good.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 673 ++++++++++++++--------------
>  1 file changed, 337 insertions(+), 336 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 1fa93f56792e..da4db76ce054 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -831,6 +831,104 @@ static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt)
>  	ppgtt->pd_dirty_engines = ALL_ENGINES;
>  }
>  
> +static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
> +{
> +	struct i915_address_space *vm = &ppgtt->vm;
> +	struct drm_i915_private *dev_priv = vm->i915;
> +	enum vgt_g2v_type msg;
> +	int i;
> +
> +	if (create)
> +		atomic_inc(px_used(ppgtt->pd)); /* never remove */
> +	else
> +		atomic_dec(px_used(ppgtt->pd));
> +
> +	if (i915_vm_is_4lvl(vm)) {
> +		const u64 daddr = px_dma(ppgtt->pd);
> +
> +		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
> +		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
> +
> +		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
> +				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
> +	} else {
> +		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
> +			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
> +
> +			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
> +			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
> +		}
> +
> +		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
> +				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
> +	}
> +
> +	I915_WRITE(vgtif_reg(g2v_notify), msg);
> +
> +	return 0;
> +}
> +
> +static void gen8_free_page_tables(struct i915_address_space *vm,
> +				  struct i915_page_directory *pd)
> +{
> +	int i;
> +
> +	for (i = 0; i < I915_PDES; i++) {
> +		if (pd->entry[i] != &vm->scratch_pt)
> +			free_pd(vm, pd->entry[i]);
> +	}
> +}
> +
> +static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
> +				    struct i915_page_directory *pdp)
> +{
> +	const unsigned int pdpes = i915_pdpes_per_pdp(vm);
> +	int i;
> +
> +	for (i = 0; i < pdpes; i++) {
> +		if (pdp->entry[i] == &vm->scratch_pd)
> +			continue;
> +
> +		gen8_free_page_tables(vm, pdp->entry[i]);
> +		free_pd(vm, pdp->entry[i]);
> +	}
> +
> +	free_px(vm, pdp);
> +}
> +
> +static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
> +{
> +	struct i915_page_directory * const pml4 = ppgtt->pd;
> +	int i;
> +
> +	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
> +		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
> +
> +		if (px_base(pdp) == &ppgtt->vm.scratch_pdp)
> +			continue;
> +
> +		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
> +	}
> +
> +	free_px(&ppgtt->vm, pml4);
> +}
> +
> +static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
> +{
> +	struct drm_i915_private *i915 = vm->i915;
> +	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> +
> +	if (intel_vgpu_active(i915))
> +		gen8_ppgtt_notify_vgt(ppgtt, false);
> +
> +	if (i915_vm_is_4lvl(vm))
> +		gen8_ppgtt_cleanup_4lvl(ppgtt);
> +	else
> +		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
> +
> +	free_scratch(vm);
> +}
> +
>  /* Removes entries from a single page table, releasing it if it's empty.
>   * Caller can use the return value to update higher-level entries.
>   */
> @@ -917,95 +1015,265 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
>  	}
>  }
>  
> -static inline struct sgt_dma {
> -	struct scatterlist *sg;
> -	dma_addr_t dma, max;
> -} sgt_dma(struct i915_vma *vma) {
> -	struct scatterlist *sg = vma->pages->sgl;
> -	dma_addr_t addr = sg_dma_address(sg);
> -	return (struct sgt_dma) { sg, addr, addr + sg->length };
> -}
> -
> -struct gen8_insert_pte {
> -	u16 pml4e;
> -	u16 pdpe;
> -	u16 pde;
> -	u16 pte;
> -};
>  
> -static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
> +static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
> +			       struct i915_page_directory *pd,
> +			       u64 start, u64 length)
>  {
> -	return (struct gen8_insert_pte) {
> -		 gen8_pml4e_index(start),
> -		 gen8_pdpe_index(start),
> -		 gen8_pde_index(start),
> -		 gen8_pte_index(start),
> -	};
> -}
> +	struct i915_page_table *pt, *alloc = NULL;
> +	u64 from = start;
> +	unsigned int pde;
> +	int ret = 0;
>  
> -static __always_inline bool
> -gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
> -			      struct i915_page_directory *pdp,
> -			      struct sgt_dma *iter,
> -			      struct gen8_insert_pte *idx,
> -			      enum i915_cache_level cache_level,
> -			      u32 flags)
> -{
> -	struct i915_page_directory *pd;
> -	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
> -	gen8_pte_t *vaddr;
> -	bool ret;
> +	spin_lock(&pd->lock);
> +	gen8_for_each_pde(pt, pd, start, length, pde) {
> +		const int count = gen8_pte_count(start, length);
>  
> -	GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
> -	pd = i915_pd_entry(pdp, idx->pdpe);
> -	vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
> -	do {
> -		vaddr[idx->pte] = pte_encode | iter->dma;
> +		if (px_base(pt) == &vm->scratch_pt) {
> +			spin_unlock(&pd->lock);
>  
> -		iter->dma += I915_GTT_PAGE_SIZE;
> -		if (iter->dma >= iter->max) {
> -			iter->sg = __sg_next(iter->sg);
> -			if (!iter->sg) {
> -				ret = false;
> -				break;
> +			pt = fetch_and_zero(&alloc);
> +			if (!pt)
> +				pt = alloc_pt(vm);
> +			if (IS_ERR(pt)) {
> +				ret = PTR_ERR(pt);
> +				goto unwind;
>  			}
>  
> -			iter->dma = sg_dma_address(iter->sg);
> -			iter->max = iter->dma + iter->sg->length;
> +			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
> +				fill_px(pt, vm->scratch_pte);
> +
> +			spin_lock(&pd->lock);
> +			if (pd->entry[pde] == &vm->scratch_pt) {
> +				set_pd_entry(pd, pde, pt);
> +			} else {
> +				alloc = pt;
> +				pt = pd->entry[pde];
> +			}
>  		}
>  
> -		if (++idx->pte == GEN8_PTES) {
> -			idx->pte = 0;
> +		atomic_add(count, &pt->used);
> +	}
> +	spin_unlock(&pd->lock);
> +	goto out;
>  
> -			if (++idx->pde == I915_PDES) {
> -				idx->pde = 0;
> +unwind:
> +	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
> +out:
> +	if (alloc)
> +		free_px(vm, alloc);
> +	return ret;
> +}
>  
> -				/* Limited by sg length for 3lvl */
> -				if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
> -					idx->pdpe = 0;
> -					ret = true;
> -					break;
> -				}
> +static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
> +				struct i915_page_directory *pdp,
> +				u64 start, u64 length)
> +{
> +	struct i915_page_directory *pd, *alloc = NULL;
> +	u64 from = start;
> +	unsigned int pdpe;
> +	int ret = 0;
>  
> -				GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
> -				pd = pdp->entry[idx->pdpe];
> +	spin_lock(&pdp->lock);
> +	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> +		if (px_base(pd) == &vm->scratch_pd) {
> +			spin_unlock(&pdp->lock);
> +
> +			pd = fetch_and_zero(&alloc);
> +			if (!pd)
> +				pd = alloc_pd(vm);
> +			if (IS_ERR(pd)) {
> +				ret = PTR_ERR(pd);
> +				goto unwind;
>  			}
>  
> -			kunmap_atomic(vaddr);
> -			vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
> +			init_pd(pd, &vm->scratch_pt);
> +
> +			spin_lock(&pdp->lock);
> +			if (pdp->entry[pdpe] == &vm->scratch_pd) {
> +				set_pd_entry(pdp, pdpe, pd);
> +			} else {
> +				alloc = pd;
> +				pd = pdp->entry[pdpe];
> +			}
>  		}
> -	} while (1);
> -	kunmap_atomic(vaddr);
> +		atomic_inc(px_used(pd));
> +		spin_unlock(&pdp->lock);
> +
> +		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
> +		if (unlikely(ret))
> +			goto unwind_pd;
> +
> +		spin_lock(&pdp->lock);
> +		atomic_dec(px_used(pd));
> +	}
> +	spin_unlock(&pdp->lock);
> +	goto out;
>  
> +unwind_pd:
> +	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
> +		free_px(vm, pd);
> +unwind:
> +	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
> +out:
> +	if (alloc)
> +		free_px(vm, alloc);
>  	return ret;
>  }
>  
> -static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
> -				   struct i915_vma *vma,
> -				   enum i915_cache_level cache_level,
> -				   u32 flags)
> +static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
> +				 u64 start, u64 length)
>  {
> -	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> +	return gen8_ppgtt_alloc_pdp(vm,
> +				    i915_vm_to_ppgtt(vm)->pd, start, length);
> +}
> +
> +static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
> +				 u64 start, u64 length)
> +{
> +	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> +	struct i915_page_directory * const pml4 = ppgtt->pd;
> +	struct i915_page_directory *pdp, *alloc = NULL;
> +	u64 from = start;
> +	int ret = 0;
> +	u32 pml4e;
> +
> +	spin_lock(&pml4->lock);
> +	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> +		if (px_base(pdp) == &vm->scratch_pdp) {
> +			spin_unlock(&pml4->lock);
> +
> +			pdp = fetch_and_zero(&alloc);
> +			if (!pdp)
> +				pdp = alloc_pd(vm);
> +			if (IS_ERR(pdp)) {
> +				ret = PTR_ERR(pdp);
> +				goto unwind;
> +			}
> +
> +			init_pd(pdp, &vm->scratch_pd);
> +
> +			spin_lock(&pml4->lock);
> +			if (pml4->entry[pml4e] == &vm->scratch_pdp) {
> +				set_pd_entry(pml4, pml4e, pdp);
> +			} else {
> +				alloc = pdp;
> +				pdp = pml4->entry[pml4e];
> +			}
> +		}
> +		atomic_inc(px_used(pdp));
> +		spin_unlock(&pml4->lock);
> +
> +		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
> +		if (unlikely(ret))
> +			goto unwind_pdp;
> +
> +		spin_lock(&pml4->lock);
> +		atomic_dec(px_used(pdp));
> +	}
> +	spin_unlock(&pml4->lock);
> +	goto out;
> +
> +unwind_pdp:
> +	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
> +		free_px(vm, pdp);
> +unwind:
> +	gen8_ppgtt_clear_4lvl(vm, from, start - from);
> +out:
> +	if (alloc)
> +		free_px(vm, alloc);
> +	return ret;
> +}
> +
> +static inline struct sgt_dma {
> +	struct scatterlist *sg;
> +	dma_addr_t dma, max;
> +} sgt_dma(struct i915_vma *vma) {
> +	struct scatterlist *sg = vma->pages->sgl;
> +	dma_addr_t addr = sg_dma_address(sg);
> +	return (struct sgt_dma) { sg, addr, addr + sg->length };
> +}
> +
> +struct gen8_insert_pte {
> +	u16 pml4e;
> +	u16 pdpe;
> +	u16 pde;
> +	u16 pte;
> +};
> +
> +static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
> +{
> +	return (struct gen8_insert_pte) {
> +		 gen8_pml4e_index(start),
> +		 gen8_pdpe_index(start),
> +		 gen8_pde_index(start),
> +		 gen8_pte_index(start),
> +	};
> +}
> +
> +static __always_inline bool
> +gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
> +			      struct i915_page_directory *pdp,
> +			      struct sgt_dma *iter,
> +			      struct gen8_insert_pte *idx,
> +			      enum i915_cache_level cache_level,
> +			      u32 flags)
> +{
> +	struct i915_page_directory *pd;
> +	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
> +	gen8_pte_t *vaddr;
> +	bool ret;
> +
> +	GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
> +	pd = i915_pd_entry(pdp, idx->pdpe);
> +	vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
> +	do {
> +		vaddr[idx->pte] = pte_encode | iter->dma;
> +
> +		iter->dma += I915_GTT_PAGE_SIZE;
> +		if (iter->dma >= iter->max) {
> +			iter->sg = __sg_next(iter->sg);
> +			if (!iter->sg) {
> +				ret = false;
> +				break;
> +			}
> +
> +			iter->dma = sg_dma_address(iter->sg);
> +			iter->max = iter->dma + iter->sg->length;
> +		}
> +
> +		if (++idx->pte == GEN8_PTES) {
> +			idx->pte = 0;
> +
> +			if (++idx->pde == I915_PDES) {
> +				idx->pde = 0;
> +
> +				/* Limited by sg length for 3lvl */
> +				if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
> +					idx->pdpe = 0;
> +					ret = true;
> +					break;
> +				}
> +
> +				GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
> +				pd = pdp->entry[idx->pdpe];
> +			}
> +
> +			kunmap_atomic(vaddr);
> +			vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
> +		}
> +	} while (1);
> +	kunmap_atomic(vaddr);
> +
> +	return ret;
> +}
> +
> +static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
> +				   struct i915_vma *vma,
> +				   enum i915_cache_level cache_level,
> +				   u32 flags)
> +{
> +	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct sgt_dma iter = sgt_dma(vma);
>  	struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
>  
> @@ -1161,17 +1429,6 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
>  	}
>  }
>  
> -static void gen8_free_page_tables(struct i915_address_space *vm,
> -				  struct i915_page_directory *pd)
> -{
> -	int i;
> -
> -	for (i = 0; i < I915_PDES; i++) {
> -		if (pd->entry[i] != &vm->scratch_pt)
> -			free_pd(vm, pd->entry[i]);
> -	}
> -}
> -
>  static int gen8_init_scratch(struct i915_address_space *vm)
>  {
>  	int ret;
> @@ -1239,262 +1496,6 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>  	return ret;
>  }
>  
> -static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
> -{
> -	struct i915_address_space *vm = &ppgtt->vm;
> -	struct drm_i915_private *dev_priv = vm->i915;
> -	enum vgt_g2v_type msg;
> -	int i;
> -
> -	if (create)
> -		atomic_inc(px_used(ppgtt->pd)); /* never remove */
> -	else
> -		atomic_dec(px_used(ppgtt->pd));
> -
> -	if (i915_vm_is_4lvl(vm)) {
> -		const u64 daddr = px_dma(ppgtt->pd);
> -
> -		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
> -		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
> -
> -		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
> -				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
> -	} else {
> -		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
> -			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
> -
> -			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
> -			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
> -		}
> -
> -		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
> -				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
> -	}
> -
> -	I915_WRITE(vgtif_reg(g2v_notify), msg);
> -
> -	return 0;
> -}
> -
> -static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
> -				    struct i915_page_directory *pdp)
> -{
> -	const unsigned int pdpes = i915_pdpes_per_pdp(vm);
> -	int i;
> -
> -	for (i = 0; i < pdpes; i++) {
> -		if (pdp->entry[i] == &vm->scratch_pd)
> -			continue;
> -
> -		gen8_free_page_tables(vm, pdp->entry[i]);
> -		free_pd(vm, pdp->entry[i]);
> -	}
> -
> -	free_px(vm, pdp);
> -}
> -
> -static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
> -{
> -	struct i915_page_directory * const pml4 = ppgtt->pd;
> -	int i;
> -
> -	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
> -		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
> -
> -		if (px_base(pdp) == &ppgtt->vm.scratch_pdp)
> -			continue;
> -
> -		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
> -	}
> -
> -	free_px(&ppgtt->vm, pml4);
> -}
> -
> -static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
> -{
> -	struct drm_i915_private *i915 = vm->i915;
> -	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -
> -	if (intel_vgpu_active(i915))
> -		gen8_ppgtt_notify_vgt(ppgtt, false);
> -
> -	if (i915_vm_is_4lvl(vm))
> -		gen8_ppgtt_cleanup_4lvl(ppgtt);
> -	else
> -		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
> -
> -	free_scratch(vm);
> -}
> -
> -static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
> -			       struct i915_page_directory *pd,
> -			       u64 start, u64 length)
> -{
> -	struct i915_page_table *pt, *alloc = NULL;
> -	u64 from = start;
> -	unsigned int pde;
> -	int ret = 0;
> -
> -	spin_lock(&pd->lock);
> -	gen8_for_each_pde(pt, pd, start, length, pde) {
> -		const int count = gen8_pte_count(start, length);
> -
> -		if (px_base(pt) == &vm->scratch_pt) {
> -			spin_unlock(&pd->lock);
> -
> -			pt = fetch_and_zero(&alloc);
> -			if (!pt)
> -				pt = alloc_pt(vm);
> -			if (IS_ERR(pt)) {
> -				ret = PTR_ERR(pt);
> -				goto unwind;
> -			}
> -
> -			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
> -				fill_px(pt, vm->scratch_pte);
> -
> -			spin_lock(&pd->lock);
> -			if (pd->entry[pde] == &vm->scratch_pt) {
> -				set_pd_entry(pd, pde, pt);
> -			} else {
> -				alloc = pt;
> -				pt = pd->entry[pde];
> -			}
> -		}
> -
> -		atomic_add(count, &pt->used);
> -	}
> -	spin_unlock(&pd->lock);
> -	goto out;
> -
> -unwind:
> -	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
> -out:
> -	if (alloc)
> -		free_px(vm, alloc);
> -	return ret;
> -}
> -
> -static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
> -				struct i915_page_directory *pdp,
> -				u64 start, u64 length)
> -{
> -	struct i915_page_directory *pd, *alloc = NULL;
> -	u64 from = start;
> -	unsigned int pdpe;
> -	int ret = 0;
> -
> -	spin_lock(&pdp->lock);
> -	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		if (px_base(pd) == &vm->scratch_pd) {
> -			spin_unlock(&pdp->lock);
> -
> -			pd = fetch_and_zero(&alloc);
> -			if (!pd)
> -				pd = alloc_pd(vm);
> -			if (IS_ERR(pd)) {
> -				ret = PTR_ERR(pd);
> -				goto unwind;
> -			}
> -
> -			init_pd(pd, &vm->scratch_pt);
> -
> -			spin_lock(&pdp->lock);
> -			if (pdp->entry[pdpe] == &vm->scratch_pd) {
> -				set_pd_entry(pdp, pdpe, pd);
> -			} else {
> -				alloc = pd;
> -				pd = pdp->entry[pdpe];
> -			}
> -		}
> -		atomic_inc(px_used(pd));
> -		spin_unlock(&pdp->lock);
> -
> -		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
> -		if (unlikely(ret))
> -			goto unwind_pd;
> -
> -		spin_lock(&pdp->lock);
> -		atomic_dec(px_used(pd));
> -	}
> -	spin_unlock(&pdp->lock);
> -	goto out;
> -
> -unwind_pd:
> -	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
> -		free_px(vm, pd);
> -unwind:
> -	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
> -out:
> -	if (alloc)
> -		free_px(vm, alloc);
> -	return ret;
> -}
> -
> -static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
> -				 u64 start, u64 length)
> -{
> -	return gen8_ppgtt_alloc_pdp(vm,
> -				    i915_vm_to_ppgtt(vm)->pd, start, length);
> -}
> -
> -static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
> -				 u64 start, u64 length)
> -{
> -	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -	struct i915_page_directory * const pml4 = ppgtt->pd;
> -	struct i915_page_directory *pdp, *alloc = NULL;
> -	u64 from = start;
> -	int ret = 0;
> -	u32 pml4e;
> -
> -	spin_lock(&pml4->lock);
> -	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		if (px_base(pdp) == &vm->scratch_pdp) {
> -			spin_unlock(&pml4->lock);
> -
> -			pdp = fetch_and_zero(&alloc);
> -			if (!pdp)
> -				pdp = alloc_pd(vm);
> -			if (IS_ERR(pdp)) {
> -				ret = PTR_ERR(pdp);
> -				goto unwind;
> -			}
> -
> -			init_pd(pdp, &vm->scratch_pd);
> -
> -			spin_lock(&pml4->lock);
> -			if (pml4->entry[pml4e] == &vm->scratch_pdp) {
> -				set_pd_entry(pml4, pml4e, pdp);
> -			} else {
> -				alloc = pdp;
> -				pdp = pml4->entry[pml4e];
> -			}
> -		}
> -		atomic_inc(px_used(pdp));
> -		spin_unlock(&pml4->lock);
> -
> -		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
> -		if (unlikely(ret))
> -			goto unwind_pdp;
> -
> -		spin_lock(&pml4->lock);
> -		atomic_dec(px_used(pdp));
> -	}
> -	spin_unlock(&pml4->lock);
> -	goto out;
> -
> -unwind_pdp:
> -	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
> -		free_px(vm, pdp);
> -unwind:
> -	gen8_ppgtt_clear_4lvl(vm, from, start - from);
> -out:
> -	if (alloc)
> -		free_px(vm, alloc);
> -	return ret;
> -}
> -
>  static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
>  {
>  	struct i915_address_space *vm = &ppgtt->vm;
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels
  2019-07-07 21:00 ` [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels Chris Wilson
@ 2019-07-09 15:21   ` Chris Wilson
  2019-07-10  9:24   ` Mika Kuoppala
  2019-07-10 13:49   ` Mika Kuoppala
  2 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2019-07-09 15:21 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2019-07-07 22:00:18)
> The radix levels of each page directory are easily determined so replace
> the numerous hardcoded constants with precomputed derived constants.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 2fc60e8acd9a..271305705c1c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -868,6 +868,45 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>         return 0;
>  }
>  
> +/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
> +#define gen8_pd_shift(lvl) ((lvl) * ilog2(I915_PDES))
> +#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
> +#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
> +#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
> +
> +static inline unsigned int
> +gen8_pd_range(u64 addr, u64 end, int lvl, unsigned int *idx)
> +{
> +       const int shift = gen8_pd_shift(lvl);
> +       const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
> +
> +       GEM_BUG_ON(addr >= end);
> +       end += ~mask >> gen8_pd_shift(1);
> +
> +       *idx = i915_pde_index(addr, shift);
> +       if ((addr ^ end) & mask)
> +               return I915_PDES - *idx;
> +       else
> +               return i915_pde_index(end, shift) - *idx;
> +}
> +
> +static inline bool gen8_pd_subsumes(u64 addr, u64 end, int lvl)
> +{
> +       const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
> +
> +       GEM_BUG_ON(addr >= end);
> +       return (addr ^ end) & mask && (addr & ~mask) == 0;
> +}
> +
> +static inline unsigned int gen8_pt_count(u64 addr, u64 end)
> +{
> +       GEM_BUG_ON(addr >= end);
> +       if ((addr ^ end) & ~I915_PDE_MASK)
> +               return I915_PDES - (addr & I915_PDE_MASK);
> +       else
> +               return end - addr;
> +}

So this is the question, do you want these as 512 and 0x1ff?

Or just define gen8_pd_shift(lvl) as ((lvl) * ilog(512))

and work from there. Hmm.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth
  2019-07-07 21:00 ` [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth Chris Wilson
@ 2019-07-10  8:17   ` Mika Kuoppala
  2019-07-10  8:25     ` Chris Wilson
  0 siblings, 1 reply; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-10  8:17 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> This will be useful to consolidate recursive code.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
>  drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
>  2 files changed, 4 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index da4db76ce054..2fc60e8acd9a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1537,6 +1537,8 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
>  	ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
>  	ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
>  	ppgtt->vm.vma_ops.clear_pages = clear_pages;
> +
> +	ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;

Perhaps it becomes evident later in the series why top and
not level, so these would be 4 and 3 here.

-Mika

>  }
>  
>  static void init_pd_n(struct i915_address_space *vm,
> @@ -2086,6 +2088,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
>  		return ERR_PTR(-ENOMEM);
>  
>  	ppgtt_init(&ppgtt->base, &i915->gt);
> +	ppgtt->base.vm.top = 1;
>  
>  	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
>  	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 48bb8c5125e3..119b6d33b266 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -323,6 +323,7 @@ struct i915_address_space {
>  	struct i915_page_dma scratch_pt;
>  	struct i915_page_dma scratch_pd;
>  	struct i915_page_dma scratch_pdp; /* GEN8+ & 48b PPGTT */
> +	int top;
>  
>  	/**
>  	 * List of vma currently bound.
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth
  2019-07-10  8:17   ` Mika Kuoppala
@ 2019-07-10  8:25     ` Chris Wilson
  2019-07-10 14:25       ` Mika Kuoppala
  0 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2019-07-10  8:25 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-07-10 09:17:27)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > This will be useful to consolidate recursive code.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
> >  drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
> >  2 files changed, 4 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index da4db76ce054..2fc60e8acd9a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -1537,6 +1537,8 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
> >       ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
> >       ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
> >       ppgtt->vm.vma_ops.clear_pages = clear_pages;
> > +
> > +     ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
> 
> Perhaps it becomes evident later in the series why top and
> not level, so these would be 4 and 3 here.

Because we use top and not level :)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels
  2019-07-07 21:00 ` [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels Chris Wilson
  2019-07-09 15:21   ` Chris Wilson
@ 2019-07-10  9:24   ` Mika Kuoppala
  2019-07-10  9:28     ` Chris Wilson
  2019-07-10 13:49   ` Mika Kuoppala
  2 siblings, 1 reply; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-10  9:24 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The radix levels of each page directory are easily determined so replace
> the numerous hardcoded constants with precomputed derived constants.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 2fc60e8acd9a..271305705c1c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -868,6 +868,45 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>  	return 0;
>  }
>  
> +/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
> +#define gen8_pd_shift(lvl) ((lvl) * ilog2(I915_PDES))
> +#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
> +#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
> +#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
> +
> +static inline unsigned int
> +gen8_pd_range(u64 addr, u64 end, int lvl, unsigned int *idx)
> +{
> +	const int shift = gen8_pd_shift(lvl);
> +	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
> +
> +	GEM_BUG_ON(addr >= end);

GEM_BUG_ON(!lvl) ?

> +	end += ~mask >> gen8_pd_shift(1);
> +
> +	*idx = i915_pde_index(addr, shift);

As I see no usage of this macro yet, this looks
wrong as the shift doesn't include the pte shift?

For example for address for first page, we could get index of
7.

-Mika

> +	if ((addr ^ end) & mask)
> +		return I915_PDES - *idx;
> +	else
> +		return i915_pde_index(end, shift) - *idx;
> +}
> +
> +static inline bool gen8_pd_subsumes(u64 addr, u64 end, int lvl)
> +{
> +	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
> +
> +	GEM_BUG_ON(addr >= end);
> +	return (addr ^ end) & mask && (addr & ~mask) == 0;
> +}
> +
> +static inline unsigned int gen8_pt_count(u64 addr, u64 end)
> +{
> +	GEM_BUG_ON(addr >= end);
> +	if ((addr ^ end) & ~I915_PDE_MASK)
> +		return I915_PDES - (addr & I915_PDE_MASK);
> +	else
> +		return end - addr;
> +}
> +
>  static void gen8_free_page_tables(struct i915_address_space *vm,
>  				  struct i915_page_directory *pd)
>  {
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels
  2019-07-10  9:24   ` Mika Kuoppala
@ 2019-07-10  9:28     ` Chris Wilson
  0 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2019-07-10  9:28 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-07-10 10:24:48)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > The radix levels of each page directory are easily determined so replace
> > the numerous hardcoded constants with precomputed derived constants.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++++++++++++++++++++
> >  1 file changed, 39 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 2fc60e8acd9a..271305705c1c 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -868,6 +868,45 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
> >       return 0;
> >  }
> >  
> > +/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
> > +#define gen8_pd_shift(lvl) ((lvl) * ilog2(I915_PDES))
> > +#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
> > +#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
> > +#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
> > +
> > +static inline unsigned int
> > +gen8_pd_range(u64 addr, u64 end, int lvl, unsigned int *idx)
> > +{
> > +     const int shift = gen8_pd_shift(lvl);
> > +     const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
> > +
> > +     GEM_BUG_ON(addr >= end);
> 
> GEM_BUG_ON(!lvl) ?

It worked for !lvl so I left it out.

> > +     end += ~mask >> gen8_pd_shift(1);
> > +
> > +     *idx = i915_pde_index(addr, shift);
> 
> As I see no usage of this macro yet, this looks
> wrong as the shift doesn't include the pte shift?

Why would it since I'm not working on page addresses but pd indices? :-p
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels
  2019-07-07 21:00 ` [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels Chris Wilson
  2019-07-09 15:21   ` Chris Wilson
  2019-07-10  9:24   ` Mika Kuoppala
@ 2019-07-10 13:49   ` Mika Kuoppala
  2019-07-10 13:55     ` Chris Wilson
  2019-07-10 14:55     ` Mika Kuoppala
  2 siblings, 2 replies; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-10 13:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The radix levels of each page directory are easily determined so replace
> the numerous hardcoded constants with precomputed derived constants.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 2fc60e8acd9a..271305705c1c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -868,6 +868,45 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>  	return 0;
>  }
>  
> +/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
> +#define gen8_pd_shift(lvl) ((lvl) * ilog2(I915_PDES))

Could be just (lvl) * 9. But looking at ilog2() it will be
so both are fine.

> +#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
> +#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
> +#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
> +
> +static inline unsigned int
> +gen8_pd_range(u64 addr, u64 end, int lvl, unsigned int *idx)

I was enough confused (even tho the last function reveals
it clearly) that in irc we concluded that addr as a
first parameter is misleading and converged on 'start'

> +{
> +	const int shift = gen8_pd_shift(lvl);
> +	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
> +
> +	GEM_BUG_ON(addr >= end);
> +	end += ~mask >> gen8_pd_shift(1);
> +
> +	*idx = i915_pde_index(addr, shift);
> +	if ((addr ^ end) & mask)
> +		return I915_PDES - *idx;
> +	else
> +		return i915_pde_index(end, shift) - *idx;
> +}
> +
> +static inline bool gen8_pd_subsumes(u64 addr, u64 end, int lvl)
> +{

Just a suggestion gen8_pd_contains() for emphasis on exclusivity.
But well, this is fine too. I guess what reads better in callsite,
(which we dont see yet!)

> +	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
> +
> +	GEM_BUG_ON(addr >= end);
> +	return (addr ^ end) & mask && (addr & ~mask) == 0;
> +}
> +
> +static inline unsigned int gen8_pt_count(u64 addr, u64 end)
> +{
> +	GEM_BUG_ON(addr >= end);
> +	if ((addr ^ end) & ~I915_PDE_MASK)
> +		return I915_PDES - (addr & I915_PDE_MASK);

Ok, I yield on 512. I915_PDES is fine as it atleast
couples it to mask :O

With s/addr/start,

Reviewed-by: Mika Kuoppala <mika.kuoppala@mika.kuoppala@linux.intel.com>

> +	else
> +		return end - addr;
> +}
> +
>  static void gen8_free_page_tables(struct i915_address_space *vm,
>  				  struct i915_page_directory *pd)
>  {
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels
  2019-07-10 13:49   ` Mika Kuoppala
@ 2019-07-10 13:55     ` Chris Wilson
  2019-07-10 14:55     ` Mika Kuoppala
  1 sibling, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2019-07-10 13:55 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-07-10 14:49:05)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > +static inline unsigned int gen8_pt_count(u64 addr, u64 end)
> > +{
> > +     GEM_BUG_ON(addr >= end);
> > +     if ((addr ^ end) & ~I915_PDE_MASK)
> > +             return I915_PDES - (addr & I915_PDE_MASK);
> 
> Ok, I yield on 512. I915_PDES is fine as it atleast
> couples it to mask :O

I had already removed them all! :-p

Give or take a few GEN8_PDES since that ends up being the single
constant we require.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 06/11] drm/i915/gtt: Convert vm->scratch into an array
  2019-07-07 21:00 ` [PATCH 06/11] drm/i915/gtt: Convert vm->scratch into an array Chris Wilson
@ 2019-07-10 14:18   ` Mika Kuoppala
  2019-07-10 14:28     ` Chris Wilson
  0 siblings, 1 reply; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-10 14:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Each level has its own scratch. Make the levels more obvious by forgoing
> the fancy similarly names and replace them with a number. 0 is the bottom
> most level, the physical page used for actual data; 1+ are the page
> directories.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 206 ++++++++++++----------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  14 +-
>  2 files changed, 99 insertions(+), 121 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 271305705c1c..b7882f06214a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -634,7 +634,7 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
>  	gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
>  
>  	do {
> -		int order = get_order(size);
> +		unsigned int order = get_order(size);
>  		struct page *page;
>  		dma_addr_t addr;
>  
> @@ -653,8 +653,8 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
>  		if (unlikely(!IS_ALIGNED(addr, size)))
>  			goto unmap_page;
>  
> -		vm->scratch_page.page = page;
> -		vm->scratch_page.daddr = addr;
> +		vm->scratch[0].base.page = page;
> +		vm->scratch[0].base.daddr = addr;
>  		vm->scratch_order = order;
>  		return 0;
>  
> @@ -673,8 +673,8 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
>  
>  static void cleanup_scratch_page(struct i915_address_space *vm)
>  {
> -	struct i915_page_dma *p = &vm->scratch_page;
> -	int order = vm->scratch_order;
> +	struct i915_page_dma *p = px_base(&vm->scratch[0]);
> +	unsigned int order = vm->scratch_order;
>  
>  	dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
>  		       PCI_DMA_BIDIRECTIONAL);
> @@ -683,15 +683,16 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
>  
>  static void free_scratch(struct i915_address_space *vm)
>  {
> -	if (!vm->scratch_page.daddr) /* set to 0 on clones */
> +	int i;
> +
> +	if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
>  		return;
>  
> -	if (vm->scratch_pdp.daddr)
> -		cleanup_page_dma(vm, &vm->scratch_pdp);
> -	if (vm->scratch_pd.daddr)
> -		cleanup_page_dma(vm, &vm->scratch_pd);
> -	if (vm->scratch_pt.daddr)
> -		cleanup_page_dma(vm, &vm->scratch_pt);
> +	for (i = 1; i <= vm->top; i++) {
> +		if (!px_dma(&vm->scratch[i]))
> +			break;
> +		cleanup_page_dma(vm, px_base(&vm->scratch[i]));
> +	}
>  
>  	cleanup_scratch_page(vm);
>  }
> @@ -753,9 +754,9 @@ static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
>  #define free_px(vm, px) free_pd(vm, px_base(px))
>  
>  static void init_pd(struct i915_page_directory *pd,
> -		    struct i915_page_dma *scratch)
> +		    struct i915_page_scratch *scratch)
>  {
> -	fill_px(pd, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC));
> +	fill_px(pd, scratch->encode);
>  	memset_p(pd->entry, scratch, 512);
>  }
>  
> @@ -783,30 +784,26 @@ __set_pd_entry(struct i915_page_directory * const pd,
>  	write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
>  }
>  
> +#define set_pd_entry(pd, pde, to) \
> +	__set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode)
> +
>  static inline void
> -__clear_pd_entry(struct i915_page_directory * const pd,
> -		 const unsigned short pde,
> -		 struct i915_page_dma * const to,
> -		 u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
> +clear_pd_entry(struct i915_page_directory * const pd,
> +	       const unsigned short pde,
> +	       struct i915_page_scratch * const scratch)
>  {
>  	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
>  
> -	write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
> -	pd->entry[pde] = to;
> +	write_dma_entry(px_base(pd), pde, scratch->encode);
> +	pd->entry[pde] = scratch;
>  	atomic_dec(px_used(pd));
>  }
>  
> -#define set_pd_entry(pd, pde, to) \
> -	__set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode)
> -
> -#define clear_pd_entry(pd, pde, to) \
> -	__clear_pd_entry((pd), (pde), (to), gen8_pde_encode)
> -
>  static bool
>  release_pd_entry(struct i915_page_directory * const pd,
>  		 const unsigned short pde,
>  		 struct i915_page_table * const pt,
> -		 struct i915_page_dma * const scratch)
> +		 struct i915_page_scratch * const scratch)
>  {
>  	bool free = false;
>  
> @@ -913,7 +910,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
>  	int i;
>  
>  	for (i = 0; i < I915_PDES; i++) {
> -		if (pd->entry[i] != &vm->scratch_pt)
> +		if (pd->entry[i] != &vm->scratch[1])
>  			free_pd(vm, pd->entry[i]);
>  	}
>  }
> @@ -925,7 +922,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>  	int i;
>  
>  	for (i = 0; i < pdpes; i++) {
> -		if (pdp->entry[i] == &vm->scratch_pd)
> +		if (pdp->entry[i] == &vm->scratch[2])
>  			continue;
>  
>  		gen8_free_page_tables(vm, pdp->entry[i]);
> @@ -943,7 +940,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
>  	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
>  		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
>  
> -		if (px_base(pdp) == &ppgtt->vm.scratch_pdp)
> +		if (px_base(pdp) == px_base(&ppgtt->vm.scratch[3]))
>  			continue;
>  
>  		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
> @@ -979,7 +976,9 @@ static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
>  	gen8_pte_t *vaddr;
>  
>  	vaddr = kmap_atomic_px(pt);
> -	memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries);
> +	memset64(vaddr + gen8_pte_index(start),
> +		 vm->scratch[0].encode,
> +		 num_entries);
>  	kunmap_atomic(vaddr);
>  
>  	GEM_BUG_ON(num_entries > atomic_read(&pt->used));
> @@ -995,11 +994,11 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  	u32 pde;
>  
>  	gen8_for_each_pde(pt, pd, start, length, pde) {
> -		GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
> +		GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
>  
>  		atomic_inc(&pt->used);
>  		gen8_ppgtt_clear_pt(vm, pt, start, length);
> -		if (release_pd_entry(pd, pde, pt, &vm->scratch_pt))
> +		if (release_pd_entry(pd, pde, pt, &vm->scratch[1]))
>  			free_px(vm, pt);
>  	}
>  }
> @@ -1015,11 +1014,11 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>  	unsigned int pdpe;
>  
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		GEM_BUG_ON(px_base(pd) == &vm->scratch_pd);
> +		GEM_BUG_ON(px_base(pd) == px_base(&vm->scratch[2]));
>  
>  		atomic_inc(px_used(pd));
>  		gen8_ppgtt_clear_pd(vm, pd, start, length);
> -		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
> +		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
>  			free_px(vm, pd);
>  	}
>  }
> @@ -1045,16 +1044,15 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
>  	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
>  
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		GEM_BUG_ON(px_base(pdp) == &vm->scratch_pdp);
> +		GEM_BUG_ON(px_base(pdp) == px_base(&vm->scratch[3]));
>  
>  		atomic_inc(px_used(pdp));
>  		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
> -		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
> +		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
>  			free_px(vm, pdp);
>  	}
>  }
>  
> -
>  static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  			       struct i915_page_directory *pd,
>  			       u64 start, u64 length)
> @@ -1068,7 +1066,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  	gen8_for_each_pde(pt, pd, start, length, pde) {
>  		const int count = gen8_pte_count(start, length);
>  
> -		if (px_base(pt) == &vm->scratch_pt) {
> +		if (px_base(pt) == px_base(&vm->scratch[1])) {
>  			spin_unlock(&pd->lock);
>  
>  			pt = fetch_and_zero(&alloc);
> @@ -1080,10 +1078,10 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  			}
>  
>  			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
> -				fill_px(pt, vm->scratch_pte);
> +				fill_px(pt, vm->scratch[0].encode);
>  
>  			spin_lock(&pd->lock);
> -			if (pd->entry[pde] == &vm->scratch_pt) {
> +			if (pd->entry[pde] == &vm->scratch[1]) {
>  				set_pd_entry(pd, pde, pt);
>  			} else {
>  				alloc = pt;
> @@ -1115,7 +1113,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  
>  	spin_lock(&pdp->lock);
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		if (px_base(pd) == &vm->scratch_pd) {
> +		if (px_base(pd) == px_base(&vm->scratch[2])) {
>  			spin_unlock(&pdp->lock);
>  
>  			pd = fetch_and_zero(&alloc);
> @@ -1126,10 +1124,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  				goto unwind;
>  			}
>  
> -			init_pd(pd, &vm->scratch_pt);
> +			init_pd(pd, &vm->scratch[1]);
>  
>  			spin_lock(&pdp->lock);
> -			if (pdp->entry[pdpe] == &vm->scratch_pd) {
> +			if (pdp->entry[pdpe] == &vm->scratch[2]) {
>  				set_pd_entry(pdp, pdpe, pd);
>  			} else {
>  				alloc = pd;
> @@ -1150,7 +1148,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  	goto out;
>  
>  unwind_pd:
> -	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
> +	if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
>  		free_px(vm, pd);
>  unwind:
>  	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
> @@ -1179,7 +1177,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  
>  	spin_lock(&pml4->lock);
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		if (px_base(pdp) == &vm->scratch_pdp) {
> +		if (px_base(pdp) == px_base(&vm->scratch[3])) {
>  			spin_unlock(&pml4->lock);
>  
>  			pdp = fetch_and_zero(&alloc);
> @@ -1190,10 +1188,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  				goto unwind;
>  			}
>  
> -			init_pd(pdp, &vm->scratch_pd);
> +			init_pd(pdp, &vm->scratch[2]);
>  
>  			spin_lock(&pml4->lock);
> -			if (pml4->entry[pml4e] == &vm->scratch_pdp) {
> +			if (pml4->entry[pml4e] == &vm->scratch[3]) {
>  				set_pd_entry(pml4, pml4e, pdp);
>  			} else {
>  				alloc = pdp;
> @@ -1214,7 +1212,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  	goto out;
>  
>  unwind_pdp:
> -	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
> +	if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
>  		free_px(vm, pdp);
>  unwind:
>  	gen8_ppgtt_clear_4lvl(vm, from, start - from);
> @@ -1428,7 +1426,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
>  			if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
>  				u16 i;
>  
> -				encode = vma->vm->scratch_pte;
> +				encode = vma->vm->scratch[0].encode;
>  				vaddr = kmap_atomic_px(i915_pt_entry(pd,
>  								     idx.pde));
>  
> @@ -1471,6 +1469,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
>  static int gen8_init_scratch(struct i915_address_space *vm)
>  {
>  	int ret;
> +	int i;
>  
>  	/*
>  	 * If everybody agrees to not to write into the scratch page,
> @@ -1484,10 +1483,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>  		GEM_BUG_ON(!clone->has_read_only);
>  
>  		vm->scratch_order = clone->scratch_order;
> -		vm->scratch_pte = clone->scratch_pte;
> -		vm->scratch_pt  = clone->scratch_pt;
> -		vm->scratch_pd  = clone->scratch_pd;
> -		vm->scratch_pdp = clone->scratch_pdp;
> +		memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
> +		px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
>  		return 0;
>  	}
>  
> @@ -1495,44 +1492,25 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>  	if (ret)
>  		return ret;
>  
> -	vm->scratch_pte =
> -		gen8_pte_encode(vm->scratch_page.daddr,
> -				I915_CACHE_LLC,
> -				vm->has_read_only);
> +	vm->scratch[0].encode =
> +		gen8_pte_encode(px_dma(&vm->scratch[0]),
> +				I915_CACHE_LLC, vm->has_read_only);
>  
> -	if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) {
> -		ret = -ENOMEM;
> -		goto free_scratch_page;
> -	}
> -	fill_page_dma(&vm->scratch_pt, vm->scratch_pte);
> +	for (i = 1; i <= vm->top; i++) {
> +		if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
> +			goto free_scratch;
>  
> -	if (unlikely(setup_page_dma(vm, &vm->scratch_pd))) {
> -		ret = -ENOMEM;
> -		goto free_pt;
> -	}
> -	fill_page_dma(&vm->scratch_pd,
> -		      gen8_pde_encode(vm->scratch_pd.daddr, I915_CACHE_LLC));
> -
> -	if (i915_vm_is_4lvl(vm)) {
> -		if (unlikely(setup_page_dma(vm, &vm->scratch_pdp))) {
> -			ret = -ENOMEM;
> -			goto free_pd;
> -		}
> -		fill_page_dma(&vm->scratch_pdp,
> -			      gen8_pde_encode(vm->scratch_pdp.daddr,
> -					      I915_CACHE_LLC));
> +		fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
> +		vm->scratch[i].encode =
> +			gen8_pde_encode(px_dma(&vm->scratch[i]),
> +					I915_CACHE_LLC);

Ok. The new code makes perfect sense here.

And with it confusion arises: how did we manage
to get the old code work with pdp encoding pointing to itself?

-Mika

>  	}
>  
>  	return 0;
>  
> -free_pd:
> -	cleanup_page_dma(vm, &vm->scratch_pd);
> -free_pt:
> -	cleanup_page_dma(vm, &vm->scratch_pt);
> -free_scratch_page:
> -	cleanup_scratch_page(vm);
> -
> -	return ret;
> +free_scratch:
> +	free_scratch(vm);
> +	return -ENOMEM;
>  }
>  
>  static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
> @@ -1549,7 +1527,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
>  		if (IS_ERR(pd))
>  			goto unwind;
>  
> -		init_pd(pd, &vm->scratch_pt);
> +		init_pd(pd, &vm->scratch[1]);
>  		set_pd_entry(pdp, pdpe, pd);
>  	}
>  
> @@ -1582,16 +1560,15 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
>  
>  static void init_pd_n(struct i915_address_space *vm,
>  		      struct i915_page_directory *pd,
> -		      struct i915_page_dma *to,
> +		      struct i915_page_scratch *scratch,
>  		      const unsigned int entries)
>  {
> -	const u64 daddr = gen8_pde_encode(to->daddr, I915_CACHE_LLC);
>  	u64 * const vaddr = kmap_atomic_px(pd);
>  
> -	memset64(vaddr, daddr, entries);
> +	memset64(vaddr, scratch->encode, entries);
>  	kunmap_atomic(vaddr);
>  
> -	memset_p(pd->entry, to, entries);
> +	memset_p(pd->entry, scratch, entries);
>  }
>  
>  static struct i915_page_directory *
> @@ -1602,7 +1579,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
>  	if (i915_vm_is_4lvl(vm)) {
>  		pd = alloc_pd(vm);
>  		if (!IS_ERR(pd))
> -			init_pd(pd, &vm->scratch_pdp);
> +			init_pd(pd, &vm->scratch[3]);
>  
>  		return pd;
>  	}
> @@ -1619,7 +1596,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
>  		return ERR_PTR(-ENOMEM);
>  	}
>  
> -	init_pd_n(vm, pd, &vm->scratch_pd, GEN8_3LVL_PDPES);
> +	init_pd_n(vm, pd, &vm->scratch[2], GEN8_3LVL_PDPES);
>  
>  	return pd;
>  }
> @@ -1766,7 +1743,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  {
>  	struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
>  	const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
> -	const gen6_pte_t scratch_pte = vm->scratch_pte;
> +	const gen6_pte_t scratch_pte = vm->scratch[0].encode;
>  	unsigned int pde = first_entry / GEN6_PTES;
>  	unsigned int pte = first_entry % GEN6_PTES;
>  	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
> @@ -1777,7 +1754,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  		const unsigned int count = min(num_entries, GEN6_PTES - pte);
>  		gen6_pte_t *vaddr;
>  
> -		GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
> +		GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
>  
>  		num_entries -= count;
>  
> @@ -1814,7 +1791,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  	struct sgt_dma iter = sgt_dma(vma);
>  	gen6_pte_t *vaddr;
>  
> -	GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch_pt);
> +	GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
>  
>  	vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
>  	do {
> @@ -1859,7 +1836,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  	gen6_for_each_pde(pt, pd, start, length, pde) {
>  		const unsigned int count = gen6_pte_count(start, length);
>  
> -		if (px_base(pt) == &vm->scratch_pt) {
> +		if (px_base(pt) == px_base(&vm->scratch[1])) {
>  			spin_unlock(&pd->lock);
>  
>  			pt = fetch_and_zero(&alloc);
> @@ -1870,10 +1847,10 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  				goto unwind_out;
>  			}
>  
> -			fill32_px(pt, vm->scratch_pte);
> +			fill32_px(pt, vm->scratch[0].encode);
>  
>  			spin_lock(&pd->lock);
> -			if (pd->entry[pde] == &vm->scratch_pt) {
> +			if (pd->entry[pde] == &vm->scratch[1]) {
>  				pd->entry[pde] = pt;
>  				if (i915_vma_is_bound(ppgtt->vma,
>  						      I915_VMA_GLOBAL_BIND)) {
> @@ -1910,26 +1887,23 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
>  {
>  	struct i915_address_space * const vm = &ppgtt->base.vm;
>  	struct i915_page_directory * const pd = ppgtt->base.pd;
> -	struct i915_page_table *unused;
> -	u32 pde;
>  	int ret;
>  
>  	ret = setup_scratch_page(vm, __GFP_HIGHMEM);
>  	if (ret)
>  		return ret;
>  
> -	vm->scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
> -					 I915_CACHE_NONE,
> -					 PTE_READ_ONLY);
> +	vm->scratch[0].encode =
> +		vm->pte_encode(px_dma(&vm->scratch[0]),
> +			       I915_CACHE_NONE, PTE_READ_ONLY);
>  
> -	if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) {
> +	if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
>  		cleanup_scratch_page(vm);
>  		return -ENOMEM;
>  	}
> -	fill_page_dma_32(&vm->scratch_pt, vm->scratch_pte);
>  
> -	gen6_for_all_pdes(unused, pd, pde)
> -		pd->entry[pde] = &vm->scratch_pt;
> +	fill32_px(&vm->scratch[1], vm->scratch[0].encode);
> +	memset_p(pd->entry, &vm->scratch[1], I915_PDES);
>  
>  	return 0;
>  }
> @@ -1937,11 +1911,13 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
>  static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
>  {
>  	struct i915_page_directory * const pd = ppgtt->base.pd;
> +	struct i915_page_dma * const scratch =
> +		px_base(&ppgtt->base.vm.scratch[1]);
>  	struct i915_page_table *pt;
>  	u32 pde;
>  
>  	gen6_for_all_pdes(pt, pd, pde)
> -		if (px_base(pt) != &ppgtt->base.vm.scratch_pt)
> +		if (px_base(pt) != scratch)
>  			free_px(&ppgtt->base.vm, pt);
>  }
>  
> @@ -1999,7 +1975,8 @@ static void pd_vma_unbind(struct i915_vma *vma)
>  {
>  	struct gen6_ppgtt *ppgtt = vma->private;
>  	struct i915_page_directory * const pd = ppgtt->base.pd;
> -	struct i915_page_dma * const scratch = &ppgtt->base.vm.scratch_pt;
> +	struct i915_page_dma * const scratch =
> +		px_base(&ppgtt->base.vm.scratch[1]);
>  	struct i915_page_table *pt;
>  	unsigned int pde;
>  
> @@ -2405,7 +2382,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  	unsigned first_entry = start / I915_GTT_PAGE_SIZE;
>  	unsigned num_entries = length / I915_GTT_PAGE_SIZE;
> -	const gen8_pte_t scratch_pte = vm->scratch_pte;
> +	const gen8_pte_t scratch_pte = vm->scratch[0].encode;
>  	gen8_pte_t __iomem *gtt_base =
>  		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
>  	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
> @@ -2530,8 +2507,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
>  		 first_entry, num_entries, max_entries))
>  		num_entries = max_entries;
>  
> -	scratch_pte = vm->scratch_pte;
> -
> +	scratch_pte = vm->scratch[0].encode;
>  	for (i = 0; i < num_entries; i++)
>  		iowrite32(scratch_pte, &gtt_base[i]);
>  }
> @@ -3005,8 +2981,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
>  		return ret;
>  	}
>  
> -	ggtt->vm.scratch_pte =
> -		ggtt->vm.pte_encode(ggtt->vm.scratch_page.daddr,
> +	ggtt->vm.scratch[0].encode =
> +		ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
>  				    I915_CACHE_NONE, 0);
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 119b6d33b266..669b204d4c13 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -240,6 +240,11 @@ struct i915_page_dma {
>  	};
>  };
>  
> +struct i915_page_scratch {
> +	struct i915_page_dma base;
> +	u64 encode;
> +};
> +
>  struct i915_page_table {
>  	struct i915_page_dma base;
>  	atomic_t used;
> @@ -260,9 +265,10 @@ struct i915_page_directory {
>  
>  #define px_base(px) \
>  	__px_choose_expr(px, struct i915_page_dma *, __x, \
> +	__px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
>  	__px_choose_expr(px, struct i915_page_table *, &__x->base, \
>  	__px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
> -	(void)0)))
> +	(void)0))))
>  #define px_dma(px) (px_base(px)->daddr)
>  
>  #define px_pt(px) \
> @@ -317,12 +323,8 @@ struct i915_address_space {
>  #define VM_CLASS_GGTT 0
>  #define VM_CLASS_PPGTT 1
>  
> -	u64 scratch_pte;
> +	struct i915_page_scratch scratch[4];
>  	int scratch_order;
> -	struct i915_page_dma scratch_page;
> -	struct i915_page_dma scratch_pt;
> -	struct i915_page_dma scratch_pd;
> -	struct i915_page_dma scratch_pdp; /* GEN8+ & 48b PPGTT */
>  	int top;
>  
>  	/**
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth
  2019-07-10  8:25     ` Chris Wilson
@ 2019-07-10 14:25       ` Mika Kuoppala
  2019-07-10 14:35         ` Chris Wilson
  0 siblings, 1 reply; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-10 14:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-07-10 09:17:27)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > This will be useful to consolidate recursive code.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > ---
>> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
>> >  drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
>> >  2 files changed, 4 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> > index da4db76ce054..2fc60e8acd9a 100644
>> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> > @@ -1537,6 +1537,8 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
>> >       ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
>> >       ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
>> >       ppgtt->vm.vma_ops.clear_pages = clear_pages;
>> > +
>> > +     ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
>> 
>> Perhaps it becomes evident later in the series why top and
>> not level, so these would be 4 and 3 here.
>
> Because we use top and not level :)

You make me substract one with my biological processor.
It is hard.

Please do remake the i915_vm_is_4lvl() and include.
-Mika
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 06/11] drm/i915/gtt: Convert vm->scratch into an array
  2019-07-10 14:18   ` Mika Kuoppala
@ 2019-07-10 14:28     ` Chris Wilson
  2019-07-10 14:53       ` Mika Kuoppala
  0 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2019-07-10 14:28 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-07-10 15:18:32)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > -     if (i915_vm_is_4lvl(vm)) {
> > -             if (unlikely(setup_page_dma(vm, &vm->scratch_pdp))) {
> > -                     ret = -ENOMEM;
> > -                     goto free_pd;
> > -             }
> > -             fill_page_dma(&vm->scratch_pdp,
> > -                           gen8_pde_encode(vm->scratch_pdp.daddr,
> > -                                           I915_CACHE_LLC));
> > +             fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
> > +             vm->scratch[i].encode =
> > +                     gen8_pde_encode(px_dma(&vm->scratch[i]),
> > +                                     I915_CACHE_LLC);
> 
> Ok. The new code makes perfect sense here.
> 
> And with it confusion arises: how did we manage
> to get the old code work with pdp encoding pointing to itself?

What the.... You're right. That must have caused some funky GPU hangs if
people tried to access something far outside of their set.

Hmm, you know that's exactly what live_contexts/vm_isolation tries. Well
it tries to write into random invalid addresses and see if the writes
affect scratch of another context. Did I choose randomly carefully
enough? Hmm. offset &= -sizeof(u32); Is there a danger there that's only
u32 and not u64. Maybe. But otherwise it looks like it should be picking
a prng over the whole vm->total and so should be tripping over the
recursion :|
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth
  2019-07-10 14:25       ` Mika Kuoppala
@ 2019-07-10 14:35         ` Chris Wilson
  2019-07-10 14:50           ` Mika Kuoppala
  0 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2019-07-10 14:35 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-07-10 15:25:38)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Quoting Mika Kuoppala (2019-07-10 09:17:27)
> >> Chris Wilson <chris@chris-wilson.co.uk> writes:
> >> 
> >> > This will be useful to consolidate recursive code.
> >> >
> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> > ---
> >> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
> >> >  drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
> >> >  2 files changed, 4 insertions(+)
> >> >
> >> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >> > index da4db76ce054..2fc60e8acd9a 100644
> >> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> >> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >> > @@ -1537,6 +1537,8 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
> >> >       ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
> >> >       ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
> >> >       ppgtt->vm.vma_ops.clear_pages = clear_pages;
> >> > +
> >> > +     ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
> >> 
> >> Perhaps it becomes evident later in the series why top and
> >> not level, so these would be 4 and 3 here.
> >
> > Because we use top and not level :)
> 
> You make me substract one with my biological processor.
> It is hard.
> 
> Please do remake the i915_vm_is_4lvl() and include.

I'm tempted to put the gtt_depth in the device info.

How do you want i915_vm_is_4lvl() remade? The special case going
forward is really is_3lvl?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth
  2019-07-10 14:35         ` Chris Wilson
@ 2019-07-10 14:50           ` Mika Kuoppala
  2019-07-10 15:03             ` Chris Wilson
  0 siblings, 1 reply; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-10 14:50 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-07-10 15:25:38)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > Quoting Mika Kuoppala (2019-07-10 09:17:27)
>> >> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> >> 
>> >> > This will be useful to consolidate recursive code.
>> >> >
>> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> >> > ---
>> >> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
>> >> >  drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
>> >> >  2 files changed, 4 insertions(+)
>> >> >
>> >> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> > index da4db76ce054..2fc60e8acd9a 100644
>> >> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> > @@ -1537,6 +1537,8 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
>> >> >       ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
>> >> >       ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
>> >> >       ppgtt->vm.vma_ops.clear_pages = clear_pages;
>> >> > +
>> >> > +     ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
>> >> 
>> >> Perhaps it becomes evident later in the series why top and
>> >> not level, so these would be 4 and 3 here.
>> >
>> > Because we use top and not level :)
>> 
>> You make me substract one with my biological processor.
>> It is hard.
>> 
>> Please do remake the i915_vm_is_4lvl() and include.
>
> I'm tempted to put the gtt_depth in the device info.
>
> How do you want i915_vm_is_4lvl() remade? The special case going
> forward is really is_3lvl?

No strong feelings here. How about i915_vm_get_lvl(vm)
{ return top + 1; } ?

But anything which looks sleek on callsites is fine.

-Mika
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 06/11] drm/i915/gtt: Convert vm->scratch into an array
  2019-07-10 14:28     ` Chris Wilson
@ 2019-07-10 14:53       ` Mika Kuoppala
  0 siblings, 0 replies; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-10 14:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-07-10 15:18:32)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> > -     if (i915_vm_is_4lvl(vm)) {
>> > -             if (unlikely(setup_page_dma(vm, &vm->scratch_pdp))) {
>> > -                     ret = -ENOMEM;
>> > -                     goto free_pd;
>> > -             }
>> > -             fill_page_dma(&vm->scratch_pdp,
>> > -                           gen8_pde_encode(vm->scratch_pdp.daddr,
>> > -                                           I915_CACHE_LLC));
>> > +             fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
>> > +             vm->scratch[i].encode =
>> > +                     gen8_pde_encode(px_dma(&vm->scratch[i]),
>> > +                                     I915_CACHE_LLC);
>> 
>> Ok. The new code makes perfect sense here.
>> 
>> And with it confusion arises: how did we manage
>> to get the old code work with pdp encoding pointing to itself?
>
> What the.... You're right. That must have caused some funky GPU hangs if
> people tried to access something far outside of their set.
>
> Hmm, you know that's exactly what live_contexts/vm_isolation tries. Well
> it tries to write into random invalid addresses and see if the writes
> affect scratch of another context. Did I choose randomly carefully
> enough? Hmm. offset &= -sizeof(u32); Is there a danger there that's only
> u32 and not u64. Maybe. But otherwise it looks like it should be picking
> a prng over the whole vm->total and so should be tripping over the
> recursion :|

Worth investigation surely.
But this patch looks good and  makes it sane,

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels
  2019-07-10 13:49   ` Mika Kuoppala
  2019-07-10 13:55     ` Chris Wilson
@ 2019-07-10 14:55     ` Mika Kuoppala
  1 sibling, 0 replies; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-10 14:55 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Mika Kuoppala <mika.kuoppala@linux.intel.com> writes:

> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
>> The radix levels of each page directory are easily determined so replace
>> the numerous hardcoded constants with precomputed derived constants.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>  drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++++++++++++++++++++
>>  1 file changed, 39 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index 2fc60e8acd9a..271305705c1c 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -868,6 +868,45 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>>  	return 0;
>>  }
>>  
>> +/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
>> +#define gen8_pd_shift(lvl) ((lvl) * ilog2(I915_PDES))
>
> Could be just (lvl) * 9. But looking at ilog2() it will be
> so both are fine.
>
>> +#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
>> +#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
>> +#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
>> +
>> +static inline unsigned int
>> +gen8_pd_range(u64 addr, u64 end, int lvl, unsigned int *idx)
>
> I was enough confused (even tho the last function reveals
> it clearly) that in irc we concluded that addr as a
> first parameter is misleading and converged on 'start'
>
>> +{
>> +	const int shift = gen8_pd_shift(lvl);
>> +	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
>> +
>> +	GEM_BUG_ON(addr >= end);
>> +	end += ~mask >> gen8_pd_shift(1);
>> +
>> +	*idx = i915_pde_index(addr, shift);
>> +	if ((addr ^ end) & mask)
>> +		return I915_PDES - *idx;
>> +	else
>> +		return i915_pde_index(end, shift) - *idx;
>> +}
>> +
>> +static inline bool gen8_pd_subsumes(u64 addr, u64 end, int lvl)
>> +{
>
> Just a suggestion gen8_pd_contains() for emphasis on exclusivity.
> But well, this is fine too. I guess what reads better in callsite,
> (which we dont see yet!)
>
>> +	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
>> +
>> +	GEM_BUG_ON(addr >= end);
>> +	return (addr ^ end) & mask && (addr & ~mask) == 0;
>> +}
>> +
>> +static inline unsigned int gen8_pt_count(u64 addr, u64 end)
>> +{
>> +	GEM_BUG_ON(addr >= end);
>> +	if ((addr ^ end) & ~I915_PDE_MASK)
>> +		return I915_PDES - (addr & I915_PDE_MASK);
>
> Ok, I yield on 512. I915_PDES is fine as it atleast
> couples it to mask :O
>
> With s/addr/start,
>
> Reviewed-by: Mika Kuoppala <mika.kuoppala@mika.kuoppala@linux.intel.com>

not long till vacation, hanging there but it starts to show...

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>
>> +	else
>> +		return end - addr;
>> +}
>> +
>>  static void gen8_free_page_tables(struct i915_address_space *vm,
>>  				  struct i915_page_directory *pd)
>>  {
>> -- 
>> 2.20.1
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth
  2019-07-10 14:50           ` Mika Kuoppala
@ 2019-07-10 15:03             ` Chris Wilson
  2019-07-10 15:11               ` Mika Kuoppala
  0 siblings, 1 reply; 39+ messages in thread
From: Chris Wilson @ 2019-07-10 15:03 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-07-10 15:50:37)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Quoting Mika Kuoppala (2019-07-10 15:25:38)
> >> Chris Wilson <chris@chris-wilson.co.uk> writes:
> >> 
> >> > Quoting Mika Kuoppala (2019-07-10 09:17:27)
> >> >> Chris Wilson <chris@chris-wilson.co.uk> writes:
> >> >> 
> >> >> > This will be useful to consolidate recursive code.
> >> >> >
> >> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> >> > ---
> >> >> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
> >> >> >  drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
> >> >> >  2 files changed, 4 insertions(+)
> >> >> >
> >> >> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >> >> > index da4db76ce054..2fc60e8acd9a 100644
> >> >> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> >> >> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >> >> > @@ -1537,6 +1537,8 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
> >> >> >       ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
> >> >> >       ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
> >> >> >       ppgtt->vm.vma_ops.clear_pages = clear_pages;
> >> >> > +
> >> >> > +     ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
> >> >> 
> >> >> Perhaps it becomes evident later in the series why top and
> >> >> not level, so these would be 4 and 3 here.
> >> >
> >> > Because we use top and not level :)
> >> 
> >> You make me substract one with my biological processor.
> >> It is hard.
> >> 
> >> Please do remake the i915_vm_is_4lvl() and include.
> >
> > I'm tempted to put the gtt_depth in the device info.
> >
> > How do you want i915_vm_is_4lvl() remade? The special case going
> > forward is really is_3lvl?
> 
> No strong feelings here. How about i915_vm_get_lvl(vm)
> { return top + 1; } ?

Who's going to be calling get_lvl() though? The one time where it might
be useful, we just use "<= top" instead.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth
  2019-07-10 15:03             ` Chris Wilson
@ 2019-07-10 15:11               ` Mika Kuoppala
  0 siblings, 0 replies; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-10 15:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-07-10 15:50:37)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > Quoting Mika Kuoppala (2019-07-10 15:25:38)
>> >> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> >> 
>> >> > Quoting Mika Kuoppala (2019-07-10 09:17:27)
>> >> >> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> >> >> 
>> >> >> > This will be useful to consolidate recursive code.
>> >> >> >
>> >> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> >> >> > ---
>> >> >> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
>> >> >> >  drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
>> >> >> >  2 files changed, 4 insertions(+)
>> >> >> >
>> >> >> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> >> > index da4db76ce054..2fc60e8acd9a 100644
>> >> >> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> >> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> >> > @@ -1537,6 +1537,8 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
>> >> >> >       ppgtt->vm.vma_ops.unbind_vma  = ppgtt_unbind_vma;
>> >> >> >       ppgtt->vm.vma_ops.set_pages   = ppgtt_set_pages;
>> >> >> >       ppgtt->vm.vma_ops.clear_pages = clear_pages;
>> >> >> > +
>> >> >> > +     ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
>> >> >> 
>> >> >> Perhaps it becomes evident later in the series why top and
>> >> >> not level, so these would be 4 and 3 here.
>> >> >
>> >> > Because we use top and not level :)
>> >> 
>> >> You make me substract one with my biological processor.
>> >> It is hard.
>> >> 
>> >> Please do remake the i915_vm_is_4lvl() and include.
>> >
>> > I'm tempted to put the gtt_depth in the device info.
>> >
>> > How do you want i915_vm_is_4lvl() remade? The special case going
>> > forward is really is_3lvl?
>> 
>> No strong feelings here. How about i915_vm_get_lvl(vm)
>> { return top + 1; } ?
>
> Who's going to be calling get_lvl() though? The one time where it might
> be useful, we just use "<= top" instead.

Hmm right, prolly way too generic to query for lvl and compare. So
then it is the one which reads best on the few callsites it
will sit on.

-Mika

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 07/11] drm/i915/gtt: Use NULL to encode scratch shadow entries
  2019-07-07 21:00 ` [PATCH 07/11] drm/i915/gtt: Use NULL to encode scratch shadow entries Chris Wilson
@ 2019-07-10 16:21   ` Mika Kuoppala
  2019-07-10 17:28     ` Chris Wilson
  0 siblings, 1 reply; 39+ messages in thread
From: Mika Kuoppala @ 2019-07-10 16:21 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We can simplify our gtt walking code by comparing against NULL for
> scratch entries as opposed to looking up the distinct per-level scratch
> pointer.
>
> The only caveat is to remember to protect external parties and map the
> NULL to the scratch top pd.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 124 +++++++++-------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |   2 +-
>  2 files changed, 41 insertions(+), 85 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index b7882f06214a..a99b89502a90 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -596,18 +596,17 @@ static void cleanup_page_dma(struct i915_address_space *vm,
>  
>  #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
>  
> -#define fill_px(px, v) fill_page_dma(px_base(px), (v))
> -#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v))
> -
> -static void fill_page_dma(struct i915_page_dma *p, const u64 val)
> +static void
> +fill_page_dma(struct i915_page_dma *p, const u64 val, unsigned int count)
>  {
> -	kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES));
> +	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
>  }
>  
> -static void fill_page_dma_32(struct i915_page_dma *p, const u32 v)
> -{
> -	fill_page_dma(p, (u64)v << 32 | v);
> -}
> +#define fill_px(px, v) fill_page_dma(px_base(px), (v), I915_PDES)
> +#define fill32_px(px, v) do { \
> +	u64 vv = lower_32_bits(v); \
> +	fill_px(px, vv << 32 | vv); \
> +} while (0)
>  
>  static int
>  setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
> @@ -711,7 +710,6 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
>  	}
>  
>  	atomic_set(&pt->used, 0);
> -
>  	return pt;
>  }
>  
> @@ -719,13 +717,11 @@ static struct i915_page_directory *__alloc_pd(void)
>  {
>  	struct i915_page_directory *pd;
>  
> -	pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
> +	pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
>  	if (unlikely(!pd))
>  		return NULL;
>  
> -	atomic_set(px_used(pd), 0);
>  	spin_lock_init(&pd->lock);
> -
>  	return pd;
>  }
>  
> @@ -753,63 +749,56 @@ static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
>  
>  #define free_px(vm, px) free_pd(vm, px_base(px))
>  
> -static void init_pd(struct i915_page_directory *pd,
> -		    struct i915_page_scratch *scratch)
> -{
> -	fill_px(pd, scratch->encode);
> -	memset_p(pd->entry, scratch, 512);
> -}
> -
>  static inline void
>  write_dma_entry(struct i915_page_dma * const pdma,
> -		const unsigned short pde,
> +		const unsigned short idx,
>  		const u64 encoded_entry)
>  {
>  	u64 * const vaddr = kmap_atomic(pdma->page);
>  
> -	vaddr[pde] = encoded_entry;
> +	vaddr[idx] = encoded_entry;
>  	kunmap_atomic(vaddr);
>  }
>  
>  static inline void
>  __set_pd_entry(struct i915_page_directory * const pd,
> -	       const unsigned short pde,
> +	       const unsigned short idx,

My excuse was that as it is a pd, the pde fits.
Considering that now it is at any level and pde
in bspec is reserved for the last level, idx
is better.

>  	       struct i915_page_dma * const to,
>  	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
>  {
>  	GEM_BUG_ON(atomic_read(px_used(pd)) > 512);
>  
>  	atomic_inc(px_used(pd));
> -	pd->entry[pde] = to;
> -	write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
> +	pd->entry[idx] = to;
> +	write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
>  }
>  
> -#define set_pd_entry(pd, pde, to) \
> -	__set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode)
> +#define set_pd_entry(pd, idx, to) \
> +	__set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
>  
>  static inline void
>  clear_pd_entry(struct i915_page_directory * const pd,
> -	       const unsigned short pde,
> -	       struct i915_page_scratch * const scratch)
> +	       const unsigned short idx,
> +	       const struct i915_page_scratch * const scratch)
>  {
>  	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
>  
> -	write_dma_entry(px_base(pd), pde, scratch->encode);
> -	pd->entry[pde] = scratch;
> +	write_dma_entry(px_base(pd), idx, scratch->encode);
> +	pd->entry[idx] = NULL;
>  	atomic_dec(px_used(pd));
>  }
>  
>  static bool
>  release_pd_entry(struct i915_page_directory * const pd,
> -		 const unsigned short pde,
> +		 const unsigned short idx,
>  		 struct i915_page_table * const pt,
> -		 struct i915_page_scratch * const scratch)
> +		 const struct i915_page_scratch * const scratch)
>  {
>  	bool free = false;
>  
>  	spin_lock(&pd->lock);
>  	if (atomic_dec_and_test(&pt->used)) {
> -		clear_pd_entry(pd, pde, scratch);
> +		clear_pd_entry(pd, idx, scratch);
>  		free = true;
>  	}
>  	spin_unlock(&pd->lock);
> @@ -910,7 +899,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
>  	int i;
>  
>  	for (i = 0; i < I915_PDES; i++) {
> -		if (pd->entry[i] != &vm->scratch[1])
> +		if (pd->entry[i])
>  			free_pd(vm, pd->entry[i]);
>  	}
>  }
> @@ -922,7 +911,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>  	int i;
>  
>  	for (i = 0; i < pdpes; i++) {
> -		if (pdp->entry[i] == &vm->scratch[2])
> +		if (!pdp->entry[i])
>  			continue;
>  
>  		gen8_free_page_tables(vm, pdp->entry[i]);
> @@ -940,7 +929,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
>  	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
>  		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
>  
> -		if (px_base(pdp) == px_base(&ppgtt->vm.scratch[3]))
> +		if (!pdp)
>  			continue;
>  
>  		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
> @@ -994,8 +983,6 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  	u32 pde;
>  
>  	gen8_for_each_pde(pt, pd, start, length, pde) {
> -		GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
> -

Null oops is atleast equally descriptive.

>  		atomic_inc(&pt->used);
>  		gen8_ppgtt_clear_pt(vm, pt, start, length);
>  		if (release_pd_entry(pd, pde, pt, &vm->scratch[1]))
> @@ -1014,8 +1001,6 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>  	unsigned int pdpe;
>  
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		GEM_BUG_ON(px_base(pd) == px_base(&vm->scratch[2]));
> -
>  		atomic_inc(px_used(pd));
>  		gen8_ppgtt_clear_pd(vm, pd, start, length);
>  		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
> @@ -1044,8 +1029,6 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
>  	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
>  
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		GEM_BUG_ON(px_base(pdp) == px_base(&vm->scratch[3]));
> -
>  		atomic_inc(px_used(pdp));
>  		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
>  		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
> @@ -1066,7 +1049,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  	gen8_for_each_pde(pt, pd, start, length, pde) {
>  		const int count = gen8_pte_count(start, length);
>  
> -		if (px_base(pt) == px_base(&vm->scratch[1])) {
> +		if (!pt) {
>  			spin_unlock(&pd->lock);
>  
>  			pt = fetch_and_zero(&alloc);
> @@ -1081,7 +1064,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  				fill_px(pt, vm->scratch[0].encode);
>  
>  			spin_lock(&pd->lock);
> -			if (pd->entry[pde] == &vm->scratch[1]) {
> +			if (!pd->entry[pde]) {
>  				set_pd_entry(pd, pde, pt);
>  			} else {
>  				alloc = pt;
> @@ -1113,7 +1096,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  
>  	spin_lock(&pdp->lock);
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		if (px_base(pd) == px_base(&vm->scratch[2])) {
> +		if (!pd) {
>  			spin_unlock(&pdp->lock);
>  
>  			pd = fetch_and_zero(&alloc);
> @@ -1124,10 +1107,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  				goto unwind;
>  			}
>  
> -			init_pd(pd, &vm->scratch[1]);
> +			fill_px(pd, vm->scratch[1].encode);
>  
>  			spin_lock(&pdp->lock);
> -			if (pdp->entry[pdpe] == &vm->scratch[2]) {
> +			if (!pdp->entry[pdpe]) {
>  				set_pd_entry(pdp, pdpe, pd);
>  			} else {
>  				alloc = pd;
> @@ -1177,7 +1160,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  
>  	spin_lock(&pml4->lock);
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		if (px_base(pdp) == px_base(&vm->scratch[3])) {
> +		if (!pdp) {
>  			spin_unlock(&pml4->lock);
>  
>  			pdp = fetch_and_zero(&alloc);
> @@ -1188,10 +1171,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  				goto unwind;
>  			}
>  
> -			init_pd(pdp, &vm->scratch[2]);
> +			fill_px(pdp, vm->scratch[2].encode);
>  
>  			spin_lock(&pml4->lock);
> -			if (pml4->entry[pml4e] == &vm->scratch[3]) {
> +			if (!pml4->entry[pml4e]) {
>  				set_pd_entry(pml4, pml4e, pdp);
>  			} else {
>  				alloc = pdp;
> @@ -1527,7 +1510,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
>  		if (IS_ERR(pd))
>  			goto unwind;
>  
> -		init_pd(pd, &vm->scratch[1]);
> +		fill_px(pd, vm->scratch[1].encode);
>  		set_pd_entry(pdp, pdpe, pd);
>  	}
>  
> @@ -1558,46 +1541,19 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
>  	ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
>  }
>  
> -static void init_pd_n(struct i915_address_space *vm,
> -		      struct i915_page_directory *pd,
> -		      struct i915_page_scratch *scratch,
> -		      const unsigned int entries)
> -{
> -	u64 * const vaddr = kmap_atomic_px(pd);
> -
> -	memset64(vaddr, scratch->encode, entries);
> -	kunmap_atomic(vaddr);
> -
> -	memset_p(pd->entry, scratch, entries);
> -}
> -
>  static struct i915_page_directory *
>  gen8_alloc_top_pd(struct i915_address_space *vm)
>  {
> +	const unsigned int count = vm->total >> __gen8_pte_shift(vm->top);

Nice.

>  	struct i915_page_directory *pd;
>  
> -	if (i915_vm_is_4lvl(vm)) {
> -		pd = alloc_pd(vm);
> -		if (!IS_ERR(pd))
> -			init_pd(pd, &vm->scratch[3]);
> +	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));

I don't have to think. I like that.

>  
> +	pd = alloc_pd(vm);
> +	if (IS_ERR(pd))
>  		return pd;
> -	}
> -
> -	/* 3lvl */
> -	pd = __alloc_pd();
> -	if (!pd)
> -		return ERR_PTR(-ENOMEM);
> -
> -	pd->entry[GEN8_3LVL_PDPES] = NULL;

Ok you dont like the sentry. Perhaps you could write
a few soothing words how noisily we crash if we
run long on this runway. If the tower sees and
sends firetrucks, it is fine.

> -
> -	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
> -		kfree(pd);
> -		return ERR_PTR(-ENOMEM);
> -	}
> -
> -	init_pd_n(vm, pd, &vm->scratch[2], GEN8_3LVL_PDPES);
>  
> +	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
>  	return pd;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 669b204d4c13..2341944b9b17 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -610,7 +610,7 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
>  {
>  	struct i915_page_dma *pt = ppgtt->pd->entry[n];
>  
> -	return px_dma(pt);
> +	return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));

No other external users.

Earlier in series I yearned a little of is_entry_available|free|empty.
But the comparing against null beats any.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>  }
>  
>  static inline struct i915_ggtt *
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH 07/11] drm/i915/gtt: Use NULL to encode scratch shadow entries
  2019-07-10 16:21   ` Mika Kuoppala
@ 2019-07-10 17:28     ` Chris Wilson
  0 siblings, 0 replies; 39+ messages in thread
From: Chris Wilson @ 2019-07-10 17:28 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-07-10 17:21:15)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > +     pd = alloc_pd(vm);
> > +     if (IS_ERR(pd))
> >               return pd;
> > -     }
> > -
> > -     /* 3lvl */
> > -     pd = __alloc_pd();
> > -     if (!pd)
> > -             return ERR_PTR(-ENOMEM);
> > -
> > -     pd->entry[GEN8_3LVL_PDPES] = NULL;
> 
> Ok you dont like the sentry. Perhaps you could write
> a few soothing words how noisily we crash if we
> run long on this runway. If the tower sees and
> sends firetrucks, it is fine.

It's a tight allocation (or becomes one a patch or two down); you have to
enable slab_debug to see fireworks in the redzone. Or kasan to detect
the out-of-bounds write.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2019-07-10 17:29 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
2019-07-07 21:00 ` [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch Chris Wilson
2019-07-09 12:24   ` Mika Kuoppala
2019-07-09 12:29     ` Chris Wilson
2019-07-09 12:41       ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 02/11] drm/i915/gtt: Wrap page_table with page_directory Chris Wilson
2019-07-09 14:43   ` Mika Kuoppala
2019-07-09 14:46     ` Chris Wilson
2019-07-07 21:00 ` [PATCH 03/11] drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc Chris Wilson
2019-07-09 14:59   ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth Chris Wilson
2019-07-10  8:17   ` Mika Kuoppala
2019-07-10  8:25     ` Chris Wilson
2019-07-10 14:25       ` Mika Kuoppala
2019-07-10 14:35         ` Chris Wilson
2019-07-10 14:50           ` Mika Kuoppala
2019-07-10 15:03             ` Chris Wilson
2019-07-10 15:11               ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels Chris Wilson
2019-07-09 15:21   ` Chris Wilson
2019-07-10  9:24   ` Mika Kuoppala
2019-07-10  9:28     ` Chris Wilson
2019-07-10 13:49   ` Mika Kuoppala
2019-07-10 13:55     ` Chris Wilson
2019-07-10 14:55     ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 06/11] drm/i915/gtt: Convert vm->scratch into an array Chris Wilson
2019-07-10 14:18   ` Mika Kuoppala
2019-07-10 14:28     ` Chris Wilson
2019-07-10 14:53       ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 07/11] drm/i915/gtt: Use NULL to encode scratch shadow entries Chris Wilson
2019-07-10 16:21   ` Mika Kuoppala
2019-07-10 17:28     ` Chris Wilson
2019-07-07 21:00 ` [PATCH 08/11] drm/i915/gtt: Recursive cleanup for gen8 Chris Wilson
2019-07-07 21:00 ` [PATCH 09/11] drm/i915/gtt: Recursive ppgtt clear " Chris Wilson
2019-07-07 21:00 ` [PATCH 10/11] drm/i915/gtt: Recursive ppgtt alloc " Chris Wilson
2019-07-07 21:00 ` [PATCH 11/11] drm/i915/gtt: Tidy up ppgtt insertion " Chris Wilson
2019-07-07 21:41 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/11] drm/i915/gtt: Use shallow dma pages for scratch Patchwork
2019-07-07 21:46 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-07-07 22:00 ` ✓ Fi.CI.BAT: success " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.