All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages()
@ 2017-02-02 15:02 Chris Wilson
  2017-02-02 15:02 ` [PATCH 02/19] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Chris Wilson
                   ` (19 more replies)
  0 siblings, 20 replies; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

The predominant VMA class is normal GTT, so allow gcc to emphasize that
path and avoid unnecessary stack movement.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 61 +++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ec360ab939b8..f8cef51cf24c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2615,14 +2615,16 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
 	struct drm_i915_gem_object *obj = vma->obj;
-	u32 pte_flags = 0;
-	int ret;
+	u32 pte_flags;
 
-	ret = i915_get_ggtt_vma_pages(vma);
-	if (ret)
-		return ret;
+	if (unlikely(!vma->pages)) {
+		int ret = i915_get_ggtt_vma_pages(vma);
+		if (ret)
+			return ret;
+	}
 
 	/* Currently applicable only to VLV */
+	pte_flags = 0;
 	if (obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
@@ -2647,18 +2649,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
 	u32 pte_flags;
-	int ret;
 
-	ret = i915_get_ggtt_vma_pages(vma);
-	if (ret)
-		return ret;
+	if (unlikely(!vma->pages)) {
+		int ret = i915_get_ggtt_vma_pages(vma);
+		if (ret)
+			return ret;
+	}
 
 	/* Currently applicable only to VLV */
 	pte_flags = 0;
 	if (vma->obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
-
 	if (flags & I915_VMA_GLOBAL_BIND) {
 		intel_runtime_pm_get(i915);
 		vma->vm->insert_entries(vma->vm,
@@ -3397,9 +3399,9 @@ rotate_pages(const dma_addr_t *in, unsigned int offset,
 	return sg;
 }
 
-static struct sg_table *
-intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
-			  struct drm_i915_gem_object *obj)
+static noinline struct sg_table *
+intel_rotate_pages(struct intel_rotation_info *rot_info,
+		   struct drm_i915_gem_object *obj)
 {
 	const size_t n_pages = obj->base.size / PAGE_SIZE;
 	unsigned int size = intel_rotation_info_size(rot_info);
@@ -3460,7 +3462,7 @@ intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
 	return ERR_PTR(ret);
 }
 
-static struct sg_table *
+static noinline struct sg_table *
 intel_partial_pages(const struct i915_ggtt_view *view,
 		    struct drm_i915_gem_object *obj)
 {
@@ -3514,7 +3516,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
 static int
 i915_get_ggtt_vma_pages(struct i915_vma *vma)
 {
-	int ret = 0;
+	int ret;
 
 	/* The vma->pages are only valid within the lifespan of the borrowed
 	 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
@@ -3523,32 +3525,33 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
 	 */
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
 
-	if (vma->pages)
+	switch (vma->ggtt_view.type) {
+	case I915_GGTT_VIEW_NORMAL:
+		vma->pages = vma->obj->mm.pages;
 		return 0;
 
-	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
-		vma->pages = vma->obj->mm.pages;
-	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
+	case I915_GGTT_VIEW_ROTATED:
 		vma->pages =
-			intel_rotate_fb_obj_pages(&vma->ggtt_view.rotated,
-						  vma->obj);
-	else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
+			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
+		break;
+
+	case I915_GGTT_VIEW_PARTIAL:
 		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
-	else
+		break;
+
+	default:
 		WARN_ONCE(1, "GGTT view %u not implemented!\n",
 			  vma->ggtt_view.type);
+		return -EINVAL;
+	}
 
-	if (!vma->pages) {
-		DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
-			  vma->ggtt_view.type);
-		ret = -EINVAL;
-	} else if (IS_ERR(vma->pages)) {
+	ret = 0;
+	if (unlikely(IS_ERR(vma->pages))) {
 		ret = PTR_ERR(vma->pages);
 		vma->pages = NULL;
 		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
 			  vma->ggtt_view.type, ret);
 	}
-
 	return ret;
 }
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 02/19] drm/i915: Micro-optimise gen6_ppgtt_insert_entries()
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-09 11:34   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Chris Wilson
                   ` (18 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Inline the address computation to avoid the vfunc call for every page.
We still have to pay the high overhead of sg_page_iter_next(), but now
at least GCC can optimise the inner most loop, giving a significant
boost to some thrashing Unreal Engine workloads.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 68 ++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f8cef51cf24c..0d540c244e85 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1885,6 +1885,11 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	}
 }
 
+struct sgt_dma {
+	struct scatterlist *sg;
+	dma_addr_t dma, max;
+};
+
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      uint64_t start,
@@ -1894,27 +1899,34 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned act_pt = first_entry / GEN6_PTES;
 	unsigned act_pte = first_entry % GEN6_PTES;
-	gen6_pte_t *pt_vaddr = NULL;
-	struct sgt_iter sgt_iter;
-	dma_addr_t addr;
+	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
+	struct sgt_dma iter;
+	gen6_pte_t *vaddr;
+
+	vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
+	iter.sg = pages->sgl;
+	iter.dma = sg_dma_address(iter.sg);
+	iter.max = iter.dma + iter.sg->length;
+	do {
+		vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
 
-	for_each_sgt_dma(addr, sgt_iter, pages) {
-		if (pt_vaddr == NULL)
-			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
+		iter.dma += PAGE_SIZE;
+		if (iter.dma == iter.max) {
+			iter.sg = __sg_next(iter.sg);
+			if (!iter.sg)
+				break;
 
-		pt_vaddr[act_pte] =
-			vm->pte_encode(addr, cache_level, flags);
+			iter.dma = sg_dma_address(iter.sg);
+			iter.max = iter.dma + iter.sg->length;
+		}
 
 		if (++act_pte == GEN6_PTES) {
-			kunmap_px(ppgtt, pt_vaddr);
-			pt_vaddr = NULL;
-			act_pt++;
+			kunmap_px(ppgtt, vaddr);
+			vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
 			act_pte = 0;
 		}
-	}
-
-	if (pt_vaddr)
-		kunmap_px(ppgtt, pt_vaddr);
+	} while (1);
+	kunmap_px(ppgtt, vaddr);
 }
 
 static int gen6_alloc_va_range(struct i915_address_space *vm,
@@ -2496,27 +2508,13 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     enum i915_cache_level level, u32 flags)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	struct sgt_iter sgt_iter;
-	gen6_pte_t __iomem *gtt_entries;
-	gen6_pte_t gtt_entry;
+	gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
+	unsigned int i = start >> PAGE_SHIFT;
+	struct sgt_iter iter;
 	dma_addr_t addr;
-	int i = 0;
-
-	gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
-
-	for_each_sgt_dma(addr, sgt_iter, st) {
-		gtt_entry = vm->pte_encode(addr, level, flags);
-		iowrite32(gtt_entry, &gtt_entries[i++]);
-	}
-
-	/* XXX: This serves as a posting read to make sure that the PTE has
-	 * actually been updated. There is some concern that even though
-	 * registers and PTEs are within the same BAR that they are potentially
-	 * of NUMA access patterns. Therefore, even with the way we assume
-	 * hardware should work, we must keep this posting read for paranoia.
-	 */
-	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1]) != gtt_entry);
+	for_each_sgt_dma(addr, iter, st)
+		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
+	wmb();
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
  2017-02-02 15:02 ` [PATCH 02/19] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-02 15:32   ` Chris Wilson
  2017-02-02 15:02 ` [PATCH 04/19] drm/i915: Don't special case teardown of aliasing_ppgtt Chris Wilson
                   ` (17 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Improve the sg iteration and in hte process eliminate a bug in
miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
full length of the sg table.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 157 +++++++++++++++++++-----------------
 1 file changed, 82 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0d540c244e85..f503fc0d8530 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -745,9 +745,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	unsigned int num_entries = gen8_pte_count(start, length);
 	unsigned int pte = gen8_pte_index(start);
 	unsigned int pte_end = pte + num_entries;
-	gen8_pte_t *pt_vaddr;
-	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-						 I915_CACHE_LLC);
+	gen8_pte_t scratch_pte =
+		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
+	gen8_pte_t *vaddr;
 
 	if (WARN_ON(!px_page(pt)))
 		return false;
@@ -759,12 +759,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	if (bitmap_empty(pt->used_ptes, GEN8_PTES))
 		return true;
 
-	pt_vaddr = kmap_px(pt);
-
+	vaddr = kmap_px(pt);
 	while (pte < pte_end)
-		pt_vaddr[pte++] = scratch_pte;
-
-	kunmap_px(ppgtt, pt_vaddr);
+		vaddr[pte++] = scratch_pte;
+	kunmap_px(ppgtt, vaddr);
 
 	return false;
 }
@@ -872,71 +870,93 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
 }
 
-static void
-gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
+struct sgt_dma {
+	struct scatterlist *sg;
+	dma_addr_t dma, max;
+};
+
+static __always_inline bool
+gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 			      struct i915_page_directory_pointer *pdp,
-			      struct sg_page_iter *sg_iter,
-			      uint64_t start,
+			      struct sgt_dma *iter,
+			      u64 start,
 			      enum i915_cache_level cache_level)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	gen8_pte_t *pt_vaddr;
-	unsigned pdpe = gen8_pdpe_index(start);
-	unsigned pde = gen8_pde_index(start);
-	unsigned pte = gen8_pte_index(start);
+	unsigned int pdpe = gen8_pdpe_index(start);
+	unsigned int pde = gen8_pde_index(start);
+	unsigned int pte = gen8_pte_index(start);
+	struct i915_page_directory *pd;
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
+	gen8_pte_t *vaddr;
+	bool ret = true;
 
-	pt_vaddr = NULL;
+	pd = pdp->page_directory[pdpe];
+	vaddr = kmap_px(pd->page_table[pde]);
+	do {
+		vaddr[pte] = pte_encode | iter->dma;
+		iter->dma += PAGE_SIZE;
+		if (iter->dma >= iter->max) {
+			iter->sg = __sg_next(iter->sg);
+			if (!iter->sg) {
+				ret = false;
+				break;
+			}
 
-	while (__sg_page_iter_next(sg_iter)) {
-		if (pt_vaddr == NULL) {
-			struct i915_page_directory *pd = pdp->page_directory[pdpe];
-			struct i915_page_table *pt = pd->page_table[pde];
-			pt_vaddr = kmap_px(pt);
+			iter->dma = sg_dma_address(iter->sg);
+			iter->max = iter->dma + iter->sg->length;
 		}
 
-		pt_vaddr[pte] =
-			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
-					cache_level);
 		if (++pte == GEN8_PTES) {
-			kunmap_px(ppgtt, pt_vaddr);
-			pt_vaddr = NULL;
 			if (++pde == I915_PDES) {
-				if (++pdpe == I915_PDPES_PER_PDP(vm->i915))
-					break;
+				pd = pdp->page_directory[++pdpe];
 				pde = 0;
 			}
+
+			kunmap_px(ppgtt, vaddr);
+			vaddr = kmap_px(pd->page_table[pde]);
 			pte = 0;
 		}
-	}
+	} while (1);
+	kunmap_px(ppgtt, vaddr);
 
-	if (pt_vaddr)
-		kunmap_px(ppgtt, pt_vaddr);
+	return ret;
 }
 
-static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
-				      struct sg_table *pages,
-				      uint64_t start,
-				      enum i915_cache_level cache_level,
-				      u32 unused)
+static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
+				   struct sg_table *pages,
+				   uint64_t start,
+				   enum i915_cache_level cache_level,
+				   u32 unused)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct sg_page_iter sg_iter;
+	struct sgt_dma iter = {
+		.sg = pages->sgl,
+		.dma = sg_dma_address(iter.sg),
+		.max = iter.dma + iter.sg->length,
+	};
 
-	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
+	gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter,
+				      start, cache_level);
+}
 
-	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
-		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
-					      cache_level);
-	} else {
-		struct i915_page_directory_pointer *pdp;
-		uint64_t pml4e;
-		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
+static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
+				   struct sg_table *pages,
+				   uint64_t start,
+				   enum i915_cache_level cache_level,
+				   u32 unused)
+{
+	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct sgt_dma iter = {
+		.sg = pages->sgl,
+		.dma = sg_dma_address(iter.sg),
+		.max = iter.dma + iter.sg->length,
+	};
+	struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
+	unsigned int pml4e = gen8_pml4e_index(start);
 
-		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
-			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
-						      start, cache_level);
-		}
-	}
+	while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter,
+					     start, cache_level))
+		;
 }
 
 static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
@@ -1588,7 +1608,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.start = 0;
 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
 	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
-	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
@@ -1603,6 +1622,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
 		ppgtt->base.total = 1ULL << 48;
 		ppgtt->switch_mm = gen8_48b_mm_switch;
+
+		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
 	} else {
 		ret = __pdp_init(dev_priv, &ppgtt->pdp);
 		if (ret)
@@ -1619,6 +1640,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 			if (ret)
 				goto free_scratch;
 		}
+
+		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
 	}
 
 	if (intel_vgpu_active(dev_priv))
@@ -1885,11 +1908,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	}
 }
 
-struct sgt_dma {
-	struct scatterlist *sg;
-	dma_addr_t dma, max;
-};
-
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      uint64_t start,
@@ -2427,26 +2445,15 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	struct sgt_iter sgt_iter;
 	gen8_pte_t __iomem *gtt_entries;
-	gen8_pte_t gtt_entry;
+	gen8_pte_t pte_encode = gen8_pte_encode(0, level);
 	dma_addr_t addr;
-	int i = 0;
-
-	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
 
-	for_each_sgt_dma(addr, sgt_iter, st) {
-		gtt_entry = gen8_pte_encode(addr, level);
-		gen8_set_pte(&gtt_entries[i++], gtt_entry);
-	}
+	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
+	gtt_entries += start >> PAGE_SHIFT;
+	for_each_sgt_dma(addr, sgt_iter, st)
+		gen8_set_pte(gtt_entries++, pte_encode | addr);
 
-	/*
-	 * XXX: This serves as a posting read to make sure that the PTE has
-	 * actually been updated. There is some concern that even though
-	 * registers and PTEs are within the same BAR that they are potentially
-	 * of NUMA access patterns. Therefore, even with the way we assume
-	 * hardware should work, we must keep this posting read for paranoia.
-	 */
-	if (i != 0)
-		WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
+	wmb();
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 04/19] drm/i915: Don't special case teardown of aliasing_ppgtt
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
  2017-02-02 15:02 ` [PATCH 02/19] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Chris Wilson
  2017-02-02 15:02 ` [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-06 14:21   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 05/19] drm/i915: Split ggtt/alasing_gtt unbind_vma Chris Wilson
                   ` (16 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

The aliasing_ppgtt is a regular ppgtt, and we can use the regular
i915_ppgtt_put() to properly tear it down.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 53 +++++++++++--------------------------
 1 file changed, 15 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f503fc0d8530..921aa1e902b4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2225,23 +2225,6 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
 }
 
-static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
-			   struct drm_i915_private *dev_priv,
-			   struct drm_i915_file_private *file_priv,
-			   const char *name)
-{
-	int ret;
-
-	ret = __hw_ppgtt_init(ppgtt, dev_priv);
-	if (ret == 0) {
-		kref_init(&ppgtt->ref);
-		i915_address_space_init(&ppgtt->base, dev_priv, name);
-		ppgtt->base.file = file_priv;
-	}
-
-	return ret;
-}
-
 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
 {
 	gtt_write_workarounds(dev_priv);
@@ -2279,12 +2262,16 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv,
 	if (!ppgtt)
 		return ERR_PTR(-ENOMEM);
 
-	ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name);
+	ret = __hw_ppgtt_init(ppgtt, dev_priv);
 	if (ret) {
 		kfree(ppgtt);
 		return ERR_PTR(ret);
 	}
 
+	kref_init(&ppgtt->ref);
+	i915_address_space_init(&ppgtt->base, dev_priv, name);
+	ppgtt->base.file = fpriv;
+
 	trace_i915_ppgtt_create(&ppgtt->base);
 
 	return ppgtt;
@@ -2775,21 +2762,17 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 			       ggtt->base.total - PAGE_SIZE, PAGE_SIZE);
 
 	if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
-		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-		if (!ppgtt) {
-			ret = -ENOMEM;
+		ppgtt = i915_ppgtt_create(dev_priv, NULL, "[alias]");
+		if (IS_ERR(ppgtt)) {
+			ret = PTR_ERR(ppgtt);
 			goto err;
 		}
 
-		ret = __hw_ppgtt_init(ppgtt, dev_priv);
-		if (ret)
-			goto err_ppgtt;
-
 		if (ppgtt->base.allocate_va_range) {
 			ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
 							    ppgtt->base.total);
 			if (ret)
-				goto err_ppgtt_cleanup;
+				goto err_ppgtt;
 		}
 
 		ppgtt->base.clear_range(&ppgtt->base,
@@ -2803,10 +2786,8 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 
 	return 0;
 
-err_ppgtt_cleanup:
-	ppgtt->base.cleanup(&ppgtt->base);
 err_ppgtt:
-	kfree(ppgtt);
+	i915_ppgtt_put(ppgtt);
 err:
 	drm_mm_remove_node(&ggtt->error_capture);
 	return ret;
@@ -2829,26 +2810,22 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 		WARN_ON(i915_vma_unbind(vma));
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	if (dev_priv->mm.aliasing_ppgtt) {
-		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-		ppgtt->base.cleanup(&ppgtt->base);
-		kfree(ppgtt);
-	}
-
 	i915_gem_cleanup_stolen(&dev_priv->drm);
 
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	if (dev_priv->mm.aliasing_ppgtt)
+		i915_ppgtt_put(dev_priv->mm.aliasing_ppgtt);
+
 	if (drm_mm_node_allocated(&ggtt->error_capture))
 		drm_mm_remove_node(&ggtt->error_capture);
 
 	if (drm_mm_initialized(&ggtt->base.mm)) {
 		intel_vgt_deballoon(dev_priv);
-
-		mutex_lock(&dev_priv->drm.struct_mutex);
 		i915_address_space_fini(&ggtt->base);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 
 	ggtt->base.cleanup(&ggtt->base);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	arch_phys_wc_del(ggtt->mtrr);
 	io_mapping_fini(&ggtt->mappable);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 05/19] drm/i915: Split ggtt/alasing_gtt unbind_vma
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (2 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 04/19] drm/i915: Don't special case teardown of aliasing_ppgtt Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-06 15:07   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 06/19] drm/i915: Convert clflushed pagetables over to WC maps Chris Wilson
                   ` (15 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Similar to how we already split the bind_vma for ggtt/aliasing_gtt, also
split up the unbind for symmetry.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 921aa1e902b4..45bab7b7b026 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2635,6 +2635,15 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	return 0;
 }
 
+static void ggtt_unbind_vma(struct i915_vma *vma)
+{
+	struct drm_i915_private *i915 = vma->vm->i915;
+
+	intel_runtime_pm_get(i915);
+	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+	intel_runtime_pm_put(i915);
+}
+
 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 				 enum i915_cache_level cache_level,
 				 u32 flags)
@@ -2671,22 +2680,21 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	return 0;
 }
 
-static void ggtt_unbind_vma(struct i915_vma *vma)
+static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
-	struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
-	const u64 size = min(vma->size, vma->node.size);
 
 	if (vma->flags & I915_VMA_GLOBAL_BIND) {
 		intel_runtime_pm_get(i915);
-		vma->vm->clear_range(vma->vm,
-				     vma->node.start, size);
+		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
 		intel_runtime_pm_put(i915);
 	}
 
-	if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
-		appgtt->base.clear_range(&appgtt->base,
-					 vma->node.start, size);
+	if (vma->flags & I915_VMA_LOCAL_BIND) {
+		struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base;
+
+		vm->clear_range(vm, vma->node.start, vma->size);
+	}
 }
 
 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
@@ -2780,8 +2788,12 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 					ppgtt->base.total);
 
 		dev_priv->mm.aliasing_ppgtt = ppgtt;
+
 		WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
 		ggtt->base.bind_vma = aliasing_gtt_bind_vma;
+
+		WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma);
+		ggtt->base.unbind_vma = aliasing_gtt_unbind_vma;
 	}
 
 	return 0;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 06/19] drm/i915: Convert clflushed pagetables over to WC maps
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (3 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 05/19] drm/i915: Split ggtt/alasing_gtt unbind_vma Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-09 15:08   ` Mika Kuoppala
  2017-02-02 15:02 ` [PATCH 07/19] drm/i915: Remove kmap/kunmap wrappers Chris Wilson
                   ` (14 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We flush the entire page every time we update a few bytes, making the
update of a page table many, many times slower than is required. If we
create a WC map of the page for our updates, we can avoid the clflush
but incur additional cost for creating the pagetable. We amoritize that
cost by reusing page vmappings, and only changing the page protection in
batches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 329 ++++++++++++++++++------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   5 +
 2 files changed, 172 insertions(+), 162 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 45bab7b7b026..302aee193ce5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -340,43 +340,69 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
 	return pte;
 }
 
-static int __setup_page_dma(struct drm_i915_private *dev_priv,
-			    struct i915_page_dma *p, gfp_t flags)
+static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 {
-	struct device *kdev = &dev_priv->drm.pdev->dev;
+	struct page *page;
 
-	p->page = alloc_page(flags);
-	if (!p->page)
-		return -ENOMEM;
+	if (vm->free_pages.nr)
+		return vm->free_pages.pages[--vm->free_pages.nr];
 
-	p->daddr = dma_map_page(kdev,
-				p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	page = alloc_page(gfp);
+	if (!page)
+		return NULL;
 
-	if (dma_mapping_error(kdev, p->daddr)) {
-		__free_page(p->page);
-		return -EINVAL;
+	if (vm->pt_kmap_wc)
+		set_pages_array_wc(&page, 1);
+
+	return page;
+}
+
+static void vm_free_pages_release(struct i915_address_space *vm)
+{
+	GEM_BUG_ON(!pagevec_count(&vm->free_pages));
+
+	if (vm->pt_kmap_wc)
+		set_pages_array_wb(vm->free_pages.pages,
+				   pagevec_count(&vm->free_pages));
+
+	__pagevec_release(&vm->free_pages);
+}
+
+static void vm_free_page(struct i915_address_space *vm, struct page *page)
+{
+	if (!pagevec_add(&vm->free_pages, page))
+		vm_free_pages_release(vm);
+}
+
+static int __setup_page_dma(struct i915_address_space *vm,
+			    struct i915_page_dma *p,
+			    gfp_t gfp)
+{
+	p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
+	if (unlikely(!p->page))
+		return -ENOMEM;
+
+	p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
+				PCI_DMA_BIDIRECTIONAL);
+	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
+		vm_free_page(vm, p->page);
+		return -ENOMEM;
 	}
 
 	return 0;
 }
 
-static int setup_page_dma(struct drm_i915_private *dev_priv,
+static int setup_page_dma(struct i915_address_space *vm,
 			  struct i915_page_dma *p)
 {
-	return __setup_page_dma(dev_priv, p, I915_GFP_DMA);
+	return __setup_page_dma(vm, p, I915_GFP_DMA);
 }
 
-static void cleanup_page_dma(struct drm_i915_private *dev_priv,
+static void cleanup_page_dma(struct i915_address_space *vm,
 			     struct i915_page_dma *p)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-
-	if (WARN_ON(!p->page))
-		return;
-
-	dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	__free_page(p->page);
-	memset(p, 0, sizeof(*p));
+	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	vm_free_page(vm, p->page);
 }
 
 static void *kmap_page_dma(struct i915_page_dma *p)
@@ -387,67 +413,54 @@ static void *kmap_page_dma(struct i915_page_dma *p)
 /* We use the flushing unmap only with ppgtt structures:
  * page directories, page tables and scratch pages.
  */
-static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr)
+static void kunmap_page_dma(void *vaddr)
 {
-	/* There are only few exceptions for gen >=6. chv and bxt.
-	 * And we are not sure about the latter so play safe for now.
-	 */
-	if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
-		drm_clflush_virt_range(vaddr, PAGE_SIZE);
-
 	kunmap_atomic(vaddr);
 }
 
 #define kmap_px(px) kmap_page_dma(px_base(px))
-#define kunmap_px(ppgtt, vaddr) \
-		kunmap_page_dma((ppgtt)->base.i915, (vaddr))
+#define kunmap_px(vaddr) kunmap_page_dma((vaddr))
 
-#define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px))
-#define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px))
-#define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v))
-#define fill32_px(dev_priv, px, v) \
-		fill_page_dma_32((dev_priv), px_base(px), (v))
+#define setup_px(vm, px) setup_page_dma((vm), px_base(px))
+#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
+#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
+#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
 
-static void fill_page_dma(struct drm_i915_private *dev_priv,
-			  struct i915_page_dma *p, const uint64_t val)
+static void fill_page_dma(struct i915_address_space *vm,
+			  struct i915_page_dma *p,
+			  const u64 val)
 {
+	u64 * const vaddr = kmap_page_dma(p);
 	int i;
-	uint64_t * const vaddr = kmap_page_dma(p);
 
 	for (i = 0; i < 512; i++)
 		vaddr[i] = val;
 
-	kunmap_page_dma(dev_priv, vaddr);
+	kunmap_page_dma(vaddr);
 }
 
-static void fill_page_dma_32(struct drm_i915_private *dev_priv,
-			     struct i915_page_dma *p, const uint32_t val32)
+static void fill_page_dma_32(struct i915_address_space *vm,
+			     struct i915_page_dma *p,
+			     const u32 v)
 {
-	uint64_t v = val32;
-
-	v = v << 32 | val32;
-
-	fill_page_dma(dev_priv, p, v);
+	fill_page_dma(vm, p, (u64)v << 32 | v);
 }
 
 static int
-setup_scratch_page(struct drm_i915_private *dev_priv,
-		   struct i915_page_dma *scratch,
-		   gfp_t gfp)
+setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 {
-	return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO);
+	return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO);
 }
 
-static void cleanup_scratch_page(struct drm_i915_private *dev_priv,
-				 struct i915_page_dma *scratch)
+static void cleanup_scratch_page(struct i915_address_space *vm)
 {
-	cleanup_page_dma(dev_priv, scratch);
+	cleanup_page_dma(vm, &vm->scratch_page);
 }
 
-static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
+static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 {
 	struct i915_page_table *pt;
-	const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES;
+	const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES;
 	int ret = -ENOMEM;
 
 	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
@@ -460,7 +473,7 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
 	if (!pt->used_ptes)
 		goto fail_bitmap;
 
-	ret = setup_px(dev_priv, pt);
+	ret = setup_px(vm, pt);
 	if (ret)
 		goto fail_page_m;
 
@@ -474,10 +487,9 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
 	return ERR_PTR(ret);
 }
 
-static void free_pt(struct drm_i915_private *dev_priv,
-		    struct i915_page_table *pt)
+static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
 {
-	cleanup_px(dev_priv, pt);
+	cleanup_px(vm, pt);
 	kfree(pt->used_ptes);
 	kfree(pt);
 }
@@ -490,7 +502,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
 	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
 				      I915_CACHE_LLC);
 
-	fill_px(vm->i915, pt, scratch_pte);
+	fill_px(vm, pt, scratch_pte);
 }
 
 static void gen6_initialize_pt(struct i915_address_space *vm,
@@ -503,10 +515,10 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
 	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
 				     I915_CACHE_LLC, 0);
 
-	fill32_px(vm->i915, pt, scratch_pte);
+	fill32_px(vm, pt, scratch_pte);
 }
 
-static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
+static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 {
 	struct i915_page_directory *pd;
 	int ret = -ENOMEM;
@@ -520,7 +532,7 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
 	if (!pd->used_pdes)
 		goto fail_bitmap;
 
-	ret = setup_px(dev_priv, pd);
+	ret = setup_px(vm, pd);
 	if (ret)
 		goto fail_page_m;
 
@@ -534,11 +546,11 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
 	return ERR_PTR(ret);
 }
 
-static void free_pd(struct drm_i915_private *dev_priv,
+static void free_pd(struct i915_address_space *vm,
 		    struct i915_page_directory *pd)
 {
 	if (px_page(pd)) {
-		cleanup_px(dev_priv, pd);
+		cleanup_px(vm, pd);
 		kfree(pd->used_pdes);
 		kfree(pd);
 	}
@@ -551,7 +563,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
 
 	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
 
-	fill_px(vm->i915, pd, scratch_pde);
+	fill_px(vm, pd, scratch_pde);
 }
 
 static int __pdp_init(struct drm_i915_private *dev_priv,
@@ -585,23 +597,23 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp)
 	pdp->page_directory = NULL;
 }
 
-static struct
-i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv)
+static struct i915_page_directory_pointer *
+alloc_pdp(struct i915_address_space *vm)
 {
 	struct i915_page_directory_pointer *pdp;
 	int ret = -ENOMEM;
 
-	WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv));
+	WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
 
 	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
 	if (!pdp)
 		return ERR_PTR(-ENOMEM);
 
-	ret = __pdp_init(dev_priv, pdp);
+	ret = __pdp_init(vm->i915, pdp);
 	if (ret)
 		goto fail_bitmap;
 
-	ret = setup_px(dev_priv, pdp);
+	ret = setup_px(vm, pdp);
 	if (ret)
 		goto fail_page_m;
 
@@ -615,12 +627,12 @@ i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv)
 	return ERR_PTR(ret);
 }
 
-static void free_pdp(struct drm_i915_private *dev_priv,
+static void free_pdp(struct i915_address_space *vm,
 		     struct i915_page_directory_pointer *pdp)
 {
 	__pdp_fini(pdp);
-	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
-		cleanup_px(dev_priv, pdp);
+	if (USES_FULL_48BIT_PPGTT(vm->i915)) {
+		cleanup_px(vm, pdp);
 		kfree(pdp);
 	}
 }
@@ -632,7 +644,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
 
 	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
 
-	fill_px(vm->i915, pdp, scratch_pdpe);
+	fill_px(vm, pdp, scratch_pdpe);
 }
 
 static void gen8_initialize_pml4(struct i915_address_space *vm,
@@ -643,7 +655,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
 	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
 					  I915_CACHE_LLC);
 
-	fill_px(vm->i915, pml4, scratch_pml4e);
+	fill_px(vm, pml4, scratch_pml4e);
 }
 
 static void
@@ -659,20 +671,18 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt,
 
 	page_directorypo = kmap_px(pdp);
 	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
-	kunmap_px(ppgtt, page_directorypo);
+	kunmap_px(page_directorypo);
 }
 
 static void
-gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt,
-		 struct i915_pml4 *pml4,
+gen8_setup_pml4e(struct i915_pml4 *pml4,
 		 struct i915_page_directory_pointer *pdp,
 		 int index)
 {
 	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
 
-	WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev)));
 	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
-	kunmap_px(ppgtt, pagemap);
+	kunmap_px(pagemap);
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
@@ -741,7 +751,6 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 				uint64_t start,
 				uint64_t length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	unsigned int num_entries = gen8_pte_count(start, length);
 	unsigned int pte = gen8_pte_index(start);
 	unsigned int pte_end = pte + num_entries;
@@ -762,7 +771,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	vaddr = kmap_px(pt);
 	while (pte < pte_end)
 		vaddr[pte++] = scratch_pte;
-	kunmap_px(ppgtt, vaddr);
+	kunmap_px(vaddr);
 
 	return false;
 }
@@ -775,7 +784,6 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 				uint64_t start,
 				uint64_t length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_table *pt;
 	uint64_t pde;
 	gen8_pde_t *pde_vaddr;
@@ -790,8 +798,8 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 			__clear_bit(pde, pd->used_pdes);
 			pde_vaddr = kmap_px(pd);
 			pde_vaddr[pde] = scratch_pde;
-			kunmap_px(ppgtt, pde_vaddr);
-			free_pt(vm->i915, pt);
+			kunmap_px(pde_vaddr);
+			free_pt(vm, pt);
 		}
 	}
 
@@ -820,7 +828,7 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
 			__clear_bit(pdpe, pdp->used_pdpes);
 			gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);
-			free_pd(vm->i915, pd);
+			free_pd(vm, pd);
 		}
 	}
 
@@ -841,7 +849,6 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
 				  uint64_t start,
 				  uint64_t length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory_pointer *pdp;
 	uint64_t pml4e;
 
@@ -853,8 +860,8 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
 
 		if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
 			__clear_bit(pml4e, pml4->used_pml4es);
-			gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e);
-			free_pdp(vm->i915, pdp);
+			gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e);
+			free_pdp(vm, pdp);
 		}
 	}
 }
@@ -912,12 +919,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 				pde = 0;
 			}
 
-			kunmap_px(ppgtt, vaddr);
+			kunmap_px(vaddr);
 			vaddr = kmap_px(pd->page_table[pde]);
 			pte = 0;
 		}
 	} while (1);
-	kunmap_px(ppgtt, vaddr);
+	kunmap_px(vaddr);
 
 	return ret;
 }
@@ -959,7 +966,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
 		;
 }
 
-static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
+static void gen8_free_page_tables(struct i915_address_space *vm,
 				  struct i915_page_directory *pd)
 {
 	int i;
@@ -971,34 +978,33 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
 		if (WARN_ON(!pd->page_table[i]))
 			continue;
 
-		free_pt(dev_priv, pd->page_table[i]);
+		free_pt(vm, pd->page_table[i]);
 		pd->page_table[i] = NULL;
 	}
 }
 
 static int gen8_init_scratch(struct i915_address_space *vm)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	int ret;
 
-	ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
+	ret = setup_scratch_page(vm, I915_GFP_DMA);
 	if (ret)
 		return ret;
 
-	vm->scratch_pt = alloc_pt(dev_priv);
+	vm->scratch_pt = alloc_pt(vm);
 	if (IS_ERR(vm->scratch_pt)) {
 		ret = PTR_ERR(vm->scratch_pt);
 		goto free_scratch_page;
 	}
 
-	vm->scratch_pd = alloc_pd(dev_priv);
+	vm->scratch_pd = alloc_pd(vm);
 	if (IS_ERR(vm->scratch_pd)) {
 		ret = PTR_ERR(vm->scratch_pd);
 		goto free_pt;
 	}
 
-	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
-		vm->scratch_pdp = alloc_pdp(dev_priv);
+	if (USES_FULL_48BIT_PPGTT(dev)) {
+		vm->scratch_pdp = alloc_pdp(vm);
 		if (IS_ERR(vm->scratch_pdp)) {
 			ret = PTR_ERR(vm->scratch_pdp);
 			goto free_pd;
@@ -1013,11 +1019,11 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 	return 0;
 
 free_pd:
-	free_pd(dev_priv, vm->scratch_pd);
+	free_pd(vm, vm->scratch_pd);
 free_pt:
-	free_pt(dev_priv, vm->scratch_pt);
+	free_pt(vm, vm->scratch_pt);
 free_scratch_page:
-	cleanup_scratch_page(dev_priv, &vm->scratch_page);
+	cleanup_scratch_page(vm);
 
 	return ret;
 }
@@ -1055,44 +1061,41 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
 
 static void gen8_free_scratch(struct i915_address_space *vm)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
-
-	if (USES_FULL_48BIT_PPGTT(dev_priv))
-		free_pdp(dev_priv, vm->scratch_pdp);
-	free_pd(dev_priv, vm->scratch_pd);
-	free_pt(dev_priv, vm->scratch_pt);
-	cleanup_scratch_page(dev_priv, &vm->scratch_page);
+	if (USES_FULL_48BIT_PPGTT(vm->i915))
+		free_pdp(vm, vm->scratch_pdp);
+	free_pd(vm, vm->scratch_pd);
+	free_pt(vm, vm->scratch_pt);
+	cleanup_scratch_page(vm);
 }
 
-static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv,
+static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
 				    struct i915_page_directory_pointer *pdp)
 {
 	int i;
 
-	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) {
+	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
 		if (WARN_ON(!pdp->page_directory[i]))
 			continue;
 
-		gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
-		free_pd(dev_priv, pdp->page_directory[i]);
+		gen8_free_page_tables(vm, pdp->page_directory[i]);
+		free_pd(vm, pdp->page_directory[i]);
 	}
 
-	free_pdp(dev_priv, pdp);
+	free_pdp(vm, pdp);
 }
 
 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
 {
-	struct drm_i915_private *dev_priv = ppgtt->base.i915;
 	int i;
 
 	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
 		if (WARN_ON(!ppgtt->pml4.pdps[i]))
 			continue;
 
-		gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]);
+		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
 	}
 
-	cleanup_px(dev_priv, &ppgtt->pml4);
+	cleanup_px(&ppgtt->base, &ppgtt->pml4);
 }
 
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
@@ -1103,8 +1106,8 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 	if (intel_vgpu_active(dev_priv))
 		gen8_ppgtt_notify_vgt(ppgtt, false);
 
-	if (!USES_FULL_48BIT_PPGTT(dev_priv))
-		gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp);
+	if (!USES_FULL_48BIT_PPGTT(vm->i915))
+		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
 	else
 		gen8_ppgtt_cleanup_4lvl(ppgtt);
 
@@ -1135,7 +1138,6 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 				     uint64_t length,
 				     unsigned long *new_pts)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_page_table *pt;
 	uint32_t pde;
 
@@ -1147,7 +1149,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 			continue;
 		}
 
-		pt = alloc_pt(dev_priv);
+		pt = alloc_pt(vm);
 		if (IS_ERR(pt))
 			goto unwind_out;
 
@@ -1161,7 +1163,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 
 unwind_out:
 	for_each_set_bit(pde, new_pts, I915_PDES)
-		free_pt(dev_priv, pd->page_table[pde]);
+		free_pt(vm, pd->page_table[pde]);
 
 	return -ENOMEM;
 }
@@ -1196,7 +1198,6 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
 				  uint64_t length,
 				  unsigned long *new_pds)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_page_directory *pd;
 	uint32_t pdpe;
 	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
@@ -1207,7 +1208,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
 		if (test_bit(pdpe, pdp->used_pdpes))
 			continue;
 
-		pd = alloc_pd(dev_priv);
+		pd = alloc_pd(vm);
 		if (IS_ERR(pd))
 			goto unwind_out;
 
@@ -1221,7 +1222,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
 
 unwind_out:
 	for_each_set_bit(pdpe, new_pds, pdpes)
-		free_pd(dev_priv, pdp->page_directory[pdpe]);
+		free_pd(vm, pdp->page_directory[pdpe]);
 
 	return -ENOMEM;
 }
@@ -1249,7 +1250,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 				  uint64_t length,
 				  unsigned long *new_pdps)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_page_directory_pointer *pdp;
 	uint32_t pml4e;
 
@@ -1257,7 +1257,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 		if (!test_bit(pml4e, pml4->used_pml4es)) {
-			pdp = alloc_pdp(dev_priv);
+			pdp = alloc_pdp(vm);
 			if (IS_ERR(pdp))
 				goto unwind_out;
 
@@ -1275,7 +1275,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 
 unwind_out:
 	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
-		free_pdp(dev_priv, pml4->pdps[pml4e]);
+		free_pdp(vm, pml4->pdps[pml4e]);
 
 	return -ENOMEM;
 }
@@ -1324,7 +1324,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	unsigned long *new_page_dirs, *new_page_tables;
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_page_directory *pd;
 	const uint64_t orig_start = start;
 	const uint64_t orig_length = length;
@@ -1393,7 +1392,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 			 * point we're still relying on insert_entries() */
 		}
 
-		kunmap_px(ppgtt, page_directory);
+		kunmap_px(page_directory);
 		__set_bit(pdpe, pdp->used_pdpes);
 		gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
 	}
@@ -1408,12 +1407,11 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 
 		for_each_set_bit(temp, new_page_tables + pdpe *
 				BITS_TO_LONGS(I915_PDES), I915_PDES)
-			free_pt(dev_priv,
-				pdp->page_directory[pdpe]->page_table[temp]);
+			free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]);
 	}
 
 	for_each_set_bit(pdpe, new_page_dirs, pdpes)
-		free_pd(dev_priv, pdp->page_directory[pdpe]);
+		free_pd(vm, pdp->page_directory[pdpe]);
 
 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
 	mark_tlbs_dirty(ppgtt);
@@ -1426,7 +1424,6 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 				    uint64_t length)
 {
 	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory_pointer *pdp;
 	uint64_t pml4e;
 	int ret = 0;
@@ -1454,7 +1451,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 		if (ret)
 			goto err_out;
 
-		gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e);
+		gen8_setup_pml4e(pml4, pdp, pml4e);
 	}
 
 	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
@@ -1464,7 +1461,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 
 err_out:
 	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
-		gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]);
+		gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]);
 
 	return ret;
 }
@@ -1480,7 +1477,8 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
 		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
 }
 
-static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
+static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
+			  struct i915_page_directory_pointer *pdp,
 			  uint64_t start, uint64_t length,
 			  gen8_pte_t scratch_pte,
 			  struct seq_file *m)
@@ -1546,7 +1544,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 						 I915_CACHE_LLC);
 
 	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
-		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
+		gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
 	} else {
 		uint64_t pml4e;
 		struct i915_pml4 *pml4 = &ppgtt->pml4;
@@ -1557,7 +1555,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 				continue;
 
 			seq_printf(m, "    PML4E #%llu\n", pml4e);
-			gen8_dump_pdp(pdp, start, length, scratch_pte, m);
+			gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
 		}
 	}
 }
@@ -1613,8 +1611,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
 	ppgtt->debug_dump = gen8_dump_ppgtt;
 
+	/* There are only few exceptions for gen >=6. chv and bxt.
+	 * And we are not sure about the latter so play safe for now.
+	 */
+	if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
+		ppgtt->base.pt_kmap_wc = true;
+
 	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
-		ret = setup_px(dev_priv, &ppgtt->pml4);
+		ret = setup_px(&ppgtt->base, &ppgtt->pml4);
 		if (ret)
 			goto free_scratch;
 
@@ -1703,7 +1707,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 			}
 			seq_puts(m, "\n");
 		}
-		kunmap_px(ppgtt, pt_vaddr);
+		kunmap_px(pt_vaddr);
 	}
 }
 
@@ -1900,7 +1904,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 		for (i = first_pte; i < last_pte; i++)
 			pt_vaddr[i] = scratch_pte;
 
-		kunmap_px(ppgtt, pt_vaddr);
+		kunmap_px(pt_vaddr);
 
 		num_entries -= last_pte - first_pte;
 		first_pte = 0;
@@ -1939,12 +1943,12 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		}
 
 		if (++act_pte == GEN6_PTES) {
-			kunmap_px(ppgtt, vaddr);
+			kunmap_px(vaddr);
 			vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
 			act_pte = 0;
 		}
 	} while (1);
-	kunmap_px(ppgtt, vaddr);
+	kunmap_px(vaddr);
 }
 
 static int gen6_alloc_va_range(struct i915_address_space *vm,
@@ -1978,7 +1982,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 		/* We've already allocated a page table */
 		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
 
-		pt = alloc_pt(dev_priv);
+		pt = alloc_pt(vm);
 		if (IS_ERR(pt)) {
 			ret = PTR_ERR(pt);
 			goto unwind_out;
@@ -2026,7 +2030,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
 
 		ppgtt->pd.page_table[pde] = vm->scratch_pt;
-		free_pt(dev_priv, pt);
+		free_pt(vm, pt);
 	}
 
 	mark_tlbs_dirty(ppgtt);
@@ -2035,16 +2039,15 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 
 static int gen6_init_scratch(struct i915_address_space *vm)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	int ret;
 
-	ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
+	ret = setup_scratch_page(vm, I915_GFP_DMA);
 	if (ret)
 		return ret;
 
-	vm->scratch_pt = alloc_pt(dev_priv);
+	vm->scratch_pt = alloc_pt(vm);
 	if (IS_ERR(vm->scratch_pt)) {
-		cleanup_scratch_page(dev_priv, &vm->scratch_page);
+		cleanup_scratch_page(vm);
 		return PTR_ERR(vm->scratch_pt);
 	}
 
@@ -2055,17 +2058,14 @@ static int gen6_init_scratch(struct i915_address_space *vm)
 
 static void gen6_free_scratch(struct i915_address_space *vm)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
-
-	free_pt(dev_priv, vm->scratch_pt);
-	cleanup_scratch_page(dev_priv, &vm->scratch_page);
+	free_pt(vm, vm->scratch_pt);
+	cleanup_scratch_page(vm);
 }
 
 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory *pd = &ppgtt->pd;
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_page_table *pt;
 	uint32_t pde;
 
@@ -2073,7 +2073,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 
 	gen6_for_all_pdes(pt, pd, pde)
 		if (pt != vm->scratch_pt)
-			free_pt(dev_priv, pt);
+			free_pt(vm, pt);
 
 	gen6_free_scratch(vm);
 }
@@ -2182,6 +2182,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
 			   struct drm_i915_private *dev_priv)
 {
 	ppgtt->base.i915 = dev_priv;
+	ppgtt->base.dma = &dev_priv->drm.pdev->dev;
 
 	if (INTEL_INFO(dev_priv)->gen < 8)
 		return gen6_ppgtt_init(ppgtt);
@@ -2199,10 +2200,14 @@ static void i915_address_space_init(struct i915_address_space *vm,
 	INIT_LIST_HEAD(&vm->inactive_list);
 	INIT_LIST_HEAD(&vm->unbound_list);
 	list_add_tail(&vm->global_link, &dev_priv->vm_list);
+	pagevec_init(&vm->free_pages, false);
 }
 
 static void i915_address_space_fini(struct i915_address_space *vm)
 {
+	if (pagevec_count(&vm->free_pages))
+		vm_free_pages_release(vm);
+
 	i915_gem_timeline_fini(&vm->timeline);
 	drm_mm_takedown(&vm->mm);
 	list_del(&vm->global_link);
@@ -2310,9 +2315,8 @@ void i915_ppgtt_release(struct kref *kref)
 	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
 	WARN_ON(!list_empty(&ppgtt->base.unbound_list));
 
-	i915_address_space_fini(&ppgtt->base);
-
 	ppgtt->base.cleanup(&ppgtt->base);
+	i915_address_space_fini(&ppgtt->base);
 	kfree(ppgtt);
 }
 
@@ -2947,7 +2951,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 		return -ENOMEM;
 	}
 
-	ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32);
+	ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
 	if (ret) {
 		DRM_ERROR("Scratch setup failed\n");
 		/* iounmap will also get called at remove, but meh */
@@ -3036,7 +3040,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 
 	iounmap(ggtt->gsm);
-	cleanup_scratch_page(vm->i915, &vm->scratch_page);
+	cleanup_scratch_page(vm);
 }
 
 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
@@ -3187,6 +3191,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
 	int ret;
 
 	ggtt->base.i915 = dev_priv;
+	ggtt->base.dma = &dev_priv->drm.pdev->dev;
 
 	if (INTEL_GEN(dev_priv) <= 5)
 		ret = i915_gmch_probe(ggtt);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 3c5ef5358cef..c59a7687ed6f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -36,6 +36,7 @@
 
 #include <linux/io-mapping.h>
 #include <linux/mm.h>
+#include <linux/pagevec.h>
 
 #include "i915_gem_timeline.h"
 #include "i915_gem_request.h"
@@ -247,6 +248,7 @@ struct i915_address_space {
 	struct drm_mm mm;
 	struct i915_gem_timeline timeline;
 	struct drm_i915_private *i915;
+	struct device *dma;
 	/* Every address space belongs to a struct file - except for the global
 	 * GTT that is owned by the driver (and so @file is set to NULL). In
 	 * principle, no information should leak from one context to another
@@ -297,6 +299,9 @@ struct i915_address_space {
 	 */
 	struct list_head unbound_list;
 
+	struct pagevec free_pages;
+	bool pt_kmap_wc;
+
 	/* FIXME: Need a more generic return type */
 	gen6_pte_t (*pte_encode)(dma_addr_t addr,
 				 enum i915_cache_level level,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 07/19] drm/i915: Remove kmap/kunmap wrappers
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (4 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 06/19] drm/i915: Convert clflushed pagetables over to WC maps Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-10 11:25   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 08/19] drm/i915: Remove user-triggerable WARN for large objects Chris Wilson
                   ` (13 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

As these are now both plain and simple kmap_atomic/kunmap_atomic pairs,
we can remove the wrappers for a small gain of clarity (in particular,
not hiding the atomic critical sections!).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 69 ++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 302aee193ce5..2428a5fe532e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -405,21 +405,7 @@ static void cleanup_page_dma(struct i915_address_space *vm,
 	vm_free_page(vm, p->page);
 }
 
-static void *kmap_page_dma(struct i915_page_dma *p)
-{
-	return kmap_atomic(p->page);
-}
-
-/* We use the flushing unmap only with ppgtt structures:
- * page directories, page tables and scratch pages.
- */
-static void kunmap_page_dma(void *vaddr)
-{
-	kunmap_atomic(vaddr);
-}
-
-#define kmap_px(px) kmap_page_dma(px_base(px))
-#define kunmap_px(vaddr) kunmap_page_dma((vaddr))
+#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
 
 #define setup_px(vm, px) setup_page_dma((vm), px_base(px))
 #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
@@ -430,13 +416,13 @@ static void fill_page_dma(struct i915_address_space *vm,
 			  struct i915_page_dma *p,
 			  const u64 val)
 {
-	u64 * const vaddr = kmap_page_dma(p);
+	u64 * const vaddr = kmap_atomic(p->page);
 	int i;
 
 	for (i = 0; i < 512; i++)
 		vaddr[i] = val;
 
-	kunmap_page_dma(vaddr);
+	kunmap_atomic(vaddr);
 }
 
 static void fill_page_dma_32(struct i915_address_space *vm,
@@ -669,9 +655,9 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt,
 	if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev)))
 		return;
 
-	page_directorypo = kmap_px(pdp);
+	page_directorypo = kmap_atomic_px(pdp);
 	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
-	kunmap_px(page_directorypo);
+	kunmap_atomic(page_directorypo);
 }
 
 static void
@@ -679,10 +665,10 @@ gen8_setup_pml4e(struct i915_pml4 *pml4,
 		 struct i915_page_directory_pointer *pdp,
 		 int index)
 {
-	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
+	gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4);
 
 	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
-	kunmap_px(pagemap);
+	kunmap_atomic(pagemap);
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
@@ -768,10 +754,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	if (bitmap_empty(pt->used_ptes, GEN8_PTES))
 		return true;
 
-	vaddr = kmap_px(pt);
+	vaddr = kmap_atomic_px(pt);
 	while (pte < pte_end)
 		vaddr[pte++] = scratch_pte;
-	kunmap_px(vaddr);
+	kunmap_atomic(vaddr);
 
 	return false;
 }
@@ -796,9 +782,9 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 
 		if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
 			__clear_bit(pde, pd->used_pdes);
-			pde_vaddr = kmap_px(pd);
+			pde_vaddr = kmap_atomic_px(pd);
 			pde_vaddr[pde] = scratch_pde;
-			kunmap_px(pde_vaddr);
+			kunmap_atomic(pde_vaddr);
 			free_pt(vm, pt);
 		}
 	}
@@ -898,7 +884,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 	bool ret = true;
 
 	pd = pdp->page_directory[pdpe];
-	vaddr = kmap_px(pd->page_table[pde]);
+	vaddr = kmap_atomic_px(pd->page_table[pde]);
 	do {
 		vaddr[pte] = pte_encode | iter->dma;
 		iter->dma += PAGE_SIZE;
@@ -919,12 +905,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 				pde = 0;
 			}
 
-			kunmap_px(vaddr);
-			vaddr = kmap_px(pd->page_table[pde]);
+			kunmap_atomic(vaddr);
+			vaddr = kmap_atomic_px(pd->page_table[pde]);
 			pte = 0;
 		}
 	} while (1);
-	kunmap_px(vaddr);
+	kunmap_atomic(vaddr);
 
 	return ret;
 }
@@ -1357,7 +1343,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	/* Allocations have completed successfully, so set the bitmaps, and do
 	 * the mappings. */
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		gen8_pde_t *const page_directory = kmap_px(pd);
+		gen8_pde_t *const page_directory = kmap_atomic_px(pd);
 		struct i915_page_table *pt;
 		uint64_t pd_len = length;
 		uint64_t pd_start = start;
@@ -1392,7 +1378,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 			 * point we're still relying on insert_entries() */
 		}
 
-		kunmap_px(page_directory);
+		kunmap_atomic(page_directory);
 		__set_bit(pdpe, pdp->used_pdpes);
 		gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
 	}
@@ -1503,7 +1489,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
 			if (!test_bit(pde, pd->used_pdes))
 				continue;
 
-			pt_vaddr = kmap_px(pt);
+			pt_vaddr = kmap_atomic_px(pt);
 			for (pte = 0; pte < GEN8_PTES; pte += 4) {
 				uint64_t va =
 					(pdpe << GEN8_PDPE_SHIFT) |
@@ -1527,9 +1513,6 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
 				}
 				seq_puts(m, "\n");
 			}
-			/* don't use kunmap_px, it could trigger
-			 * an unnecessary flush.
-			 */
 			kunmap_atomic(pt_vaddr);
 		}
 	}
@@ -1684,7 +1667,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 				   expected);
 		seq_printf(m, "\tPDE: %x\n", pd_entry);
 
-		pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
+		pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]);
 
 		for (pte = 0; pte < GEN6_PTES; pte+=4) {
 			unsigned long va =
@@ -1707,7 +1690,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 			}
 			seq_puts(m, "\n");
 		}
-		kunmap_px(pt_vaddr);
+		kunmap_atomic(pt_vaddr);
 	}
 }
 
@@ -1899,12 +1882,12 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 		if (last_pte > GEN6_PTES)
 			last_pte = GEN6_PTES;
 
-		pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
+		pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
 
 		for (i = first_pte; i < last_pte; i++)
 			pt_vaddr[i] = scratch_pte;
 
-		kunmap_px(pt_vaddr);
+		kunmap_atomic(pt_vaddr);
 
 		num_entries -= last_pte - first_pte;
 		first_pte = 0;
@@ -1925,7 +1908,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct sgt_dma iter;
 	gen6_pte_t *vaddr;
 
-	vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
+	vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
 	iter.sg = pages->sgl;
 	iter.dma = sg_dma_address(iter.sg);
 	iter.max = iter.dma + iter.sg->length;
@@ -1943,12 +1926,12 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		}
 
 		if (++act_pte == GEN6_PTES) {
-			kunmap_px(vaddr);
-			vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
+			kunmap_atomic(vaddr);
+			vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
 			act_pte = 0;
 		}
 	} while (1);
-	kunmap_px(vaddr);
+	kunmap_atomic(vaddr);
 }
 
 static int gen6_alloc_va_range(struct i915_address_space *vm,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 08/19] drm/i915: Remove user-triggerable WARN for large objects
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (5 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 07/19] drm/i915: Remove kmap/kunmap wrappers Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-02 15:07   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 09/19] drm/i915: Move allocate_va_range to GTT Chris Wilson
                   ` (12 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Very large objects are expected, so don't WARN the user if they are
using them!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2428a5fe532e..9eaa396b7402 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1426,10 +1426,6 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 	if (ret)
 		return ret;
 
-	WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
-	     "The allocation has spanned more than 512GB. "
-	     "It is highly likely this is incorrect.");
-
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 		WARN_ON(!pdp);
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 09/19] drm/i915: Move allocate_va_range to GTT
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (6 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 08/19] drm/i915: Remove user-triggerable WARN for large objects Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-07 10:01   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 10/19] drm/i915: Remove redundant clear of appgtt Chris Wilson
                   ` (11 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

In the future, we need to call allocate_va_range on the aliasing-ppgtt
which means moving the call down from the vma into the vm (which is
more appropriate for calling the vm function).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +++++++++-
 drivers/gpu/drm/i915/i915_vma.c     |  9 ---------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9eaa396b7402..e777ef19893e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -187,11 +187,19 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 			  enum i915_cache_level cache_level,
 			  u32 unused)
 {
-	u32 pte_flags = 0;
+	u32 pte_flags;
+	int ret;
+
+	trace_i915_va_alloc(vma);
+	ret = vma->vm->allocate_va_range(vma->vm,
+					 vma->node.start, vma->size);
+	if (ret)
+		return ret;
 
 	vma->pages = vma->obj->mm.pages;
 
 	/* Currently applicable only to VLV */
+	pte_flags = 0;
 	if (vma->obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 341c3f82ec1f..623e85d97db2 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -260,15 +260,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 					vma->vm->total)))
 		return -ENODEV;
 
-	if (vma_flags == 0 && vma->vm->allocate_va_range) {
-		trace_i915_va_alloc(vma);
-		ret = vma->vm->allocate_va_range(vma->vm,
-						 vma->node.start,
-						 vma->node.size);
-		if (ret)
-			return ret;
-	}
-
 	trace_i915_vma_bind(vma, bind_flags);
 	ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
 	if (ret)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 10/19] drm/i915: Remove redundant clear of appgtt
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (7 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 09/19] drm/i915: Move allocate_va_range to GTT Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-07 10:06   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 11/19] drm/i915: Tidy gen6_write_pde() Chris Wilson
                   ` (10 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Upon creation of the va range, it is initialised to point at scratch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e777ef19893e..991a15efe478 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2152,7 +2152,6 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
 
 	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
-
 	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
 
 	DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
@@ -2774,10 +2773,6 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 				goto err_ppgtt;
 		}
 
-		ppgtt->base.clear_range(&ppgtt->base,
-					ppgtt->base.start,
-					ppgtt->base.total);
-
 		dev_priv->mm.aliasing_ppgtt = ppgtt;
 
 		WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 11/19] drm/i915: Tidy gen6_write_pde()
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (8 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 10/19] drm/i915: Remove redundant clear of appgtt Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-07 10:18   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 12/19] drm/i915: Remove bitmap tracking for used-ptes Chris Wilson
                   ` (9 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 34 ++++++++++++----------------------
 1 file changed, 12 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 991a15efe478..66c3540eb387 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1699,36 +1699,29 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 }
 
 /* Write pde (index) from the page directory @pd to the page table @pt */
-static void gen6_write_pde(struct i915_page_directory *pd,
-			    const int pde, struct i915_page_table *pt)
+static void gen6_write_pde(struct i915_hw_ppgtt *ppgtt,
+			   const int pde, struct i915_page_table *pt)
 {
 	/* Caller needs to make sure the write completes if necessary */
-	struct i915_hw_ppgtt *ppgtt =
-		container_of(pd, struct i915_hw_ppgtt, pd);
 	u32 pd_entry;
 
-	pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
-	pd_entry |= GEN6_PDE_VALID;
-
+	pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID;
 	writel(pd_entry, ppgtt->pd_addr + pde);
 }
 
 /* Write all the page tables found in the ppgtt structure to incrementing page
  * directories. */
-static void gen6_write_page_range(struct drm_i915_private *dev_priv,
-				  struct i915_page_directory *pd,
+static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
 				  uint32_t start, uint32_t length)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct i915_page_table *pt;
-	uint32_t pde;
+	unsigned int pde;
 
-	gen6_for_each_pde(pt, pd, start, length, pde)
-		gen6_write_pde(pd, pde, pt);
+	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde)
+		gen6_write_pde(ppgtt, pde, pt);
+	wmb();
 
-	/* Make sure write is complete before other code can use this page
-	 * table. Also require for WC mapped PTEs */
-	readl(ggtt->gsm);
+	mark_tlbs_dirty(ppgtt);
 }
 
 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
@@ -1993,7 +1986,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 			   gen6_pte_count(start, length));
 
 		if (__test_and_clear_bit(pde, new_page_tables))
-			gen6_write_pde(&ppgtt->pd, pde, pt);
+			gen6_write_pde(ppgtt, pde, pt);
 
 		trace_i915_page_table_entry_map(vm, pde, pt,
 					 gen6_pte_index(start),
@@ -2152,7 +2145,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
 
 	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
-	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
+	gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
 
 	DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
 			 ppgtt->node.size >> 20,
@@ -3335,8 +3328,6 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 		struct i915_address_space *vm;
 
 		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
-			/* TODO: Perhaps it shouldn't be gen6 specific */
-
 			struct i915_hw_ppgtt *ppgtt;
 
 			if (i915_is_ggtt(vm))
@@ -3344,8 +3335,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 			else
 				ppgtt = i915_vm_to_ppgtt(vm);
 
-			gen6_write_page_range(dev_priv, &ppgtt->pd,
-					      0, ppgtt->base.total);
+			gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
 		}
 	}
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 12/19] drm/i915: Remove bitmap tracking for used-ptes
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (9 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 11/19] drm/i915: Tidy gen6_write_pde() Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-06 20:32   ` Michał Winiarski
  2017-02-02 15:02 ` [PATCH 13/19] drm/i915: Remove bitmap tracking for used-pdes Chris Wilson
                   ` (8 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We only operate on known extents (both for alloc/clear) and so we can use
both the knowledge of the bind/unbind range along with the knowledge of
the existing pagetable to avoid having to allocate temporary and
auxiliary bitmaps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 338 ++++++++++++------------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   5 +-
 drivers/gpu/drm/i915/i915_trace.h   |  19 +-
 3 files changed, 121 insertions(+), 241 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 66c3540eb387..5ea46c05462b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -454,62 +454,38 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 {
 	struct i915_page_table *pt;
-	const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES;
-	int ret = -ENOMEM;
 
-	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
-	if (!pt)
+	pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN);
+	if (unlikely(!pt))
 		return ERR_PTR(-ENOMEM);
 
-	pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
-				GFP_KERNEL);
-
-	if (!pt->used_ptes)
-		goto fail_bitmap;
-
-	ret = setup_px(vm, pt);
-	if (ret)
-		goto fail_page_m;
+	if (unlikely(setup_px(vm, pt))) {
+		kfree(pt);
+		return ERR_PTR(-ENOMEM);
+	}
 
+	pt->used_ptes = 0;
 	return pt;
-
-fail_page_m:
-	kfree(pt->used_ptes);
-fail_bitmap:
-	kfree(pt);
-
-	return ERR_PTR(ret);
 }
 
 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
 {
 	cleanup_px(vm, pt);
-	kfree(pt->used_ptes);
 	kfree(pt);
 }
 
 static void gen8_initialize_pt(struct i915_address_space *vm,
 			       struct i915_page_table *pt)
 {
-	gen8_pte_t scratch_pte;
-
-	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-				      I915_CACHE_LLC);
-
-	fill_px(vm, pt, scratch_pte);
+	fill_px(vm, pt,
+		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC));
 }
 
 static void gen6_initialize_pt(struct i915_address_space *vm,
 			       struct i915_page_table *pt)
 {
-	gen6_pte_t scratch_pte;
-
-	WARN_ON(vm->scratch_page.daddr == 0);
-
-	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
-				     I915_CACHE_LLC, 0);
-
-	fill32_px(vm, pt, scratch_pte);
+	fill32_px(vm, pt,
+		  vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
 }
 
 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
@@ -553,11 +529,12 @@ static void free_pd(struct i915_address_space *vm,
 static void gen8_initialize_pd(struct i915_address_space *vm,
 			       struct i915_page_directory *pd)
 {
-	gen8_pde_t scratch_pde;
-
-	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
+	unsigned int i;
 
-	fill_px(vm, pd, scratch_pde);
+	fill_px(vm, pd,
+		gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
+	for (i = 0; i < I915_PDES; i++)
+		pd->page_table[i] = vm->scratch_pt;
 }
 
 static int __pdp_init(struct drm_i915_private *dev_priv,
@@ -742,8 +719,7 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
  */
 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 				struct i915_page_table *pt,
-				uint64_t start,
-				uint64_t length)
+				u64 start, u64 length)
 {
 	unsigned int num_entries = gen8_pte_count(start, length);
 	unsigned int pte = gen8_pte_index(start);
@@ -752,14 +728,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
 	gen8_pte_t *vaddr;
 
-	if (WARN_ON(!px_page(pt)))
-		return false;
-
-	GEM_BUG_ON(pte_end > GEN8_PTES);
+	GEM_BUG_ON(num_entries > pt->used_ptes);
 
-	bitmap_clear(pt->used_ptes, pte, num_entries);
-
-	if (bitmap_empty(pt->used_ptes, GEN8_PTES))
+	pt->used_ptes -= num_entries;
+	if (!pt->used_ptes)
 		return true;
 
 	vaddr = kmap_atomic_px(pt);
@@ -770,31 +742,38 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	return false;
 }
 
+static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
+			       struct i915_page_directory *pd,
+			       struct i915_page_table *pt,
+			       unsigned int pde)
+{
+	gen8_pde_t *vaddr;
+
+	pd->page_table[pde] = pt;
+
+	vaddr = kmap_atomic_px(pd);
+	vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
+	kunmap_atomic(vaddr);
+}
+
 /* Removes entries from a single page dir, releasing it if it's empty.
  * Caller can use the return value to update higher-level entries
  */
 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 				struct i915_page_directory *pd,
-				uint64_t start,
-				uint64_t length)
+				u64 start, u64 length)
 {
 	struct i915_page_table *pt;
-	uint64_t pde;
-	gen8_pde_t *pde_vaddr;
-	gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
-						 I915_CACHE_LLC);
+	u32 pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
-		if (WARN_ON(!pd->page_table[pde]))
-			break;
+		if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
+			continue;
 
-		if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
-			__clear_bit(pde, pd->used_pdes);
-			pde_vaddr = kmap_atomic_px(pd);
-			pde_vaddr[pde] = scratch_pde;
-			kunmap_atomic(pde_vaddr);
-			free_pt(vm, pt);
-		}
+		gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
+		__clear_bit(pde, pd->used_pdes);
+
+		free_pt(vm, pt);
 	}
 
 	if (bitmap_empty(pd->used_pdes, I915_PDES))
@@ -1114,8 +1093,6 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  * @pd:	Page directory for this address range.
  * @start:	Starting virtual address to begin allocations.
  * @length:	Size of the allocations.
- * @new_pts:	Bitmap set by function with new allocations. Likely used by the
- *		caller to free on error.
  *
  * Allocate the required number of page tables. Extremely similar to
  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
@@ -1128,37 +1105,30 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  */
 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 				     struct i915_page_directory *pd,
-				     uint64_t start,
-				     uint64_t length,
-				     unsigned long *new_pts)
+				     u64 start, u64 length)
 {
 	struct i915_page_table *pt;
+	u64 from = start;
 	uint32_t pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
 		/* Don't reallocate page tables */
-		if (test_bit(pde, pd->used_pdes)) {
-			/* Scratch is never allocated this way */
-			WARN_ON(pt == vm->scratch_pt);
-			continue;
-		}
+		if (!test_bit(pde, pd->used_pdes)) {
+			pt = alloc_pt(vm);
+			if (IS_ERR(pt))
+				goto unwind;
 
-		pt = alloc_pt(vm);
-		if (IS_ERR(pt))
-			goto unwind_out;
-
-		gen8_initialize_pt(vm, pt);
-		pd->page_table[pde] = pt;
-		__set_bit(pde, new_pts);
+			gen8_initialize_pt(vm, pt);
+			pd->page_table[pde] = pt;
+		}
+		pt->used_ptes += gen8_pte_count(start, length);
 		trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
 	}
 
 	return 0;
 
-unwind_out:
-	for_each_set_bit(pde, new_pts, I915_PDES)
-		free_pt(vm, pd->page_table[pde]);
-
+unwind:
+	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
 	return -ENOMEM;
 }
 
@@ -1275,9 +1245,8 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 }
 
 static void
-free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
+free_gen8_temp_bitmaps(unsigned long *new_pds)
 {
-	kfree(new_pts);
 	kfree(new_pds);
 }
 
@@ -1286,29 +1255,16 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
  */
 static
 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
-					 unsigned long **new_pts,
 					 uint32_t pdpes)
 {
 	unsigned long *pds;
-	unsigned long *pts;
 
 	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
 	if (!pds)
 		return -ENOMEM;
 
-	pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
-		      GFP_TEMPORARY);
-	if (!pts)
-		goto err_out;
-
 	*new_pds = pds;
-	*new_pts = pts;
-
 	return 0;
-
-err_out:
-	free_gen8_temp_bitmaps(pds, pts);
-	return -ENOMEM;
 }
 
 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
@@ -1317,7 +1273,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 				    uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	unsigned long *new_page_dirs, *new_page_tables;
+	unsigned long *new_page_dirs;
 	struct i915_page_directory *pd;
 	const uint64_t orig_start = start;
 	const uint64_t orig_length = length;
@@ -1325,7 +1281,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
 	int ret;
 
-	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
+	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
 	if (ret)
 		return ret;
 
@@ -1333,14 +1289,13 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
 						new_page_dirs);
 	if (ret) {
-		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+		free_gen8_temp_bitmaps(new_page_dirs);
 		return ret;
 	}
 
 	/* For every page directory referenced, allocate page tables */
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
-						new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
+		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length);
 		if (ret)
 			goto err_out;
 	}
@@ -1366,11 +1321,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 			WARN_ON(!pd_len);
 			WARN_ON(!gen8_pte_count(pd_start, pd_len));
 
-			/* Set our used ptes within the page table */
-			bitmap_set(pt->used_ptes,
-				   gen8_pte_index(pd_start),
-				   gen8_pte_count(pd_start, pd_len));
-
 			/* Our pde is now pointing to the pagetable, pt */
 			__set_bit(pde, pd->used_pdes);
 
@@ -1379,8 +1329,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 							      I915_CACHE_LLC);
 			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
 							gen8_pte_index(start),
-							gen8_pte_count(start, length),
-							GEN8_PTES);
+							gen8_pte_count(start, length));
 
 			/* NB: We haven't yet mapped ptes to pages. At this
 			 * point we're still relying on insert_entries() */
@@ -1391,23 +1340,15 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 		gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
 	}
 
-	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	free_gen8_temp_bitmaps(new_page_dirs);
 	mark_tlbs_dirty(ppgtt);
 	return 0;
 
 err_out:
-	while (pdpe--) {
-		unsigned long temp;
-
-		for_each_set_bit(temp, new_page_tables + pdpe *
-				BITS_TO_LONGS(I915_PDES), I915_PDES)
-			free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]);
-	}
-
 	for_each_set_bit(pdpe, new_page_dirs, pdpes)
 		free_pd(vm, pdp->page_directory[pdpe]);
 
-	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	free_gen8_temp_bitmaps(new_page_dirs);
 	mark_tlbs_dirty(ppgtt);
 	return ret;
 }
@@ -1549,14 +1490,14 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 
 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
 {
-	unsigned long *new_page_dirs, *new_page_tables;
+	unsigned long *new_page_dirs;
 	uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev));
 	int ret;
 
 	/* We allocate temp bitmap for page tables for no gain
 	 * but as this is for init only, lets keep the things simple
 	 */
-	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
+	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
 	if (ret)
 		return ret;
 
@@ -1569,7 +1510,7 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
 	if (!ret)
 		*ppgtt->pdp.used_pdpes = *new_page_dirs;
 
-	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	free_gen8_temp_bitmaps(new_page_dirs);
 
 	return ret;
 }
@@ -1699,14 +1640,13 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 }
 
 /* Write pde (index) from the page directory @pd to the page table @pt */
-static void gen6_write_pde(struct i915_hw_ppgtt *ppgtt,
-			   const int pde, struct i915_page_table *pt)
+static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt,
+				  unsigned int pde,
+				  const struct i915_page_table *pt)
 {
 	/* Caller needs to make sure the write completes if necessary */
-	u32 pd_entry;
-
-	pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID;
-	writel(pd_entry, ppgtt->pd_addr + pde);
+	writel(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
+	       ppgtt->pd_addr + pde);
 }
 
 /* Write all the page tables found in the ppgtt structure to incrementing page
@@ -1719,16 +1659,15 @@ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
 
 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde)
 		gen6_write_pde(ppgtt, pde, pt);
-	wmb();
 
 	mark_tlbs_dirty(ppgtt);
+	wmb();
 }
 
-static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
+static inline uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 {
-	BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
-
-	return (ppgtt->pd.base.ggtt_offset / 64) << 16;
+	GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
+	return ppgtt->pd.base.ggtt_offset << 10;
 }
 
 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
@@ -1860,35 +1799,36 @@ static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
 
 /* PPGTT support for Sandybdrige/Gen6 and later */
 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
-				   uint64_t start,
-				   uint64_t length)
+				   u64 start, u64 length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	gen6_pte_t *pt_vaddr, scratch_pte;
-	unsigned first_entry = start >> PAGE_SHIFT;
-	unsigned num_entries = length >> PAGE_SHIFT;
-	unsigned act_pt = first_entry / GEN6_PTES;
-	unsigned first_pte = first_entry % GEN6_PTES;
-	unsigned last_pte, i;
-
-	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
-				     I915_CACHE_LLC, 0);
+	unsigned int first_entry = start >> PAGE_SHIFT;
+	unsigned int pde = first_entry / GEN6_PTES;
+	unsigned int pte = first_entry % GEN6_PTES;
+	unsigned int num_entries = length >> PAGE_SHIFT;
+	gen6_pte_t scratch_pte =
+		vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
 
 	while (num_entries) {
-		last_pte = first_pte + num_entries;
-		if (last_pte > GEN6_PTES)
-			last_pte = GEN6_PTES;
+		struct i915_page_table *pt = ppgtt->pd.page_table[pde++];
+		unsigned int end = min(pte + num_entries, GEN6_PTES);
+		gen6_pte_t *vaddr;
 
-		pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
+		num_entries -= end - pte;
 
-		for (i = first_pte; i < last_pte; i++)
-			pt_vaddr[i] = scratch_pte;
+		/* Note that the hw doesn't support removing PDE on the fly
+		 * (they are cached inside the context with no means to
+		 * invalidate the cache), so we can only reset the PTE
+		 * entries back to scratch.
+		 */
 
-		kunmap_atomic(pt_vaddr);
+		vaddr = kmap_atomic_px(pt);
+		do {
+			vaddr[pte++] = scratch_pte;
+		} while (pte < end);
+		kunmap_atomic(vaddr);
 
-		num_entries -= last_pte - first_pte;
-		first_pte = 0;
-		act_pt++;
+		pte = 0;
 	}
 }
 
@@ -1932,89 +1872,37 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 }
 
 static int gen6_alloc_va_range(struct i915_address_space *vm,
-			       uint64_t start_in, uint64_t length_in)
+			       u64 start, u64 length)
 {
-	DECLARE_BITMAP(new_page_tables, I915_PDES);
-	struct drm_i915_private *dev_priv = vm->i915;
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_table *pt;
-	uint32_t start, length, start_save, length_save;
-	uint32_t pde;
-	int ret;
-
-	start = start_save = start_in;
-	length = length_save = length_in;
-
-	bitmap_zero(new_page_tables, I915_PDES);
+	u64 from = start;
+	unsigned int pde;
+	bool flush = false;
 
-	/* The allocation is done in two stages so that we can bail out with
-	 * minimal amount of pain. The first stage finds new page tables that
-	 * need allocation. The second stage marks use ptes within the page
-	 * tables.
-	 */
 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
-		if (pt != vm->scratch_pt) {
-			WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
-			continue;
-		}
-
-		/* We've already allocated a page table */
-		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
+		if (pt == vm->scratch_pt) {
+			pt = alloc_pt(vm);
+			if (IS_ERR(pt))
+				goto unwind_out;
 
-		pt = alloc_pt(vm);
-		if (IS_ERR(pt)) {
-			ret = PTR_ERR(pt);
-			goto unwind_out;
+			gen6_initialize_pt(vm, pt);
+			ppgtt->pd.page_table[pde] = pt;
+			gen6_write_pde(ppgtt, pde, pt);
+			flush = true;
 		}
-
-		gen6_initialize_pt(vm, pt);
-
-		ppgtt->pd.page_table[pde] = pt;
-		__set_bit(pde, new_page_tables);
-		trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
 	}
 
-	start = start_save;
-	length = length_save;
-
-	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
-		DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
-
-		bitmap_zero(tmp_bitmap, GEN6_PTES);
-		bitmap_set(tmp_bitmap, gen6_pte_index(start),
-			   gen6_pte_count(start, length));
-
-		if (__test_and_clear_bit(pde, new_page_tables))
-			gen6_write_pde(ppgtt, pde, pt);
-
-		trace_i915_page_table_entry_map(vm, pde, pt,
-					 gen6_pte_index(start),
-					 gen6_pte_count(start, length),
-					 GEN6_PTES);
-		bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
-				GEN6_PTES);
+	if (flush) {
+		mark_tlbs_dirty(ppgtt);
+		wmb();
 	}
 
-	WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
-
-	/* Make sure write is complete before other code can use this page
-	 * table. Also require for WC mapped PTEs */
-	readl(ggtt->gsm);
-
-	mark_tlbs_dirty(ppgtt);
 	return 0;
 
 unwind_out:
-	for_each_set_bit(pde, new_page_tables, I915_PDES) {
-		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
-
-		ppgtt->pd.page_table[pde] = vm->scratch_pt;
-		free_pt(vm, pt);
-	}
-
-	mark_tlbs_dirty(ppgtt);
-	return ret;
+	gen6_ppgtt_clear_range(vm, from, start);
+	return -ENOMEM;
 }
 
 static int gen6_init_scratch(struct i915_address_space *vm)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c59a7687ed6f..9ba98f0da89f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -68,7 +68,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
 #define GEN6_PTE_UNCACHED		(1 << 1)
 #define GEN6_PTE_VALID			(1 << 0)
 
-#define I915_PTES(pte_len)		(PAGE_SIZE / (pte_len))
+#define I915_PTES(pte_len)		((unsigned int)(PAGE_SIZE / (pte_len)))
 #define I915_PTE_MASK(pte_len)		(I915_PTES(pte_len) - 1)
 #define I915_PDES			512
 #define I915_PDE_MASK			(I915_PDES - 1)
@@ -219,8 +219,7 @@ struct i915_page_dma {
 
 struct i915_page_table {
 	struct i915_page_dma base;
-
-	unsigned long *used_ptes;
+	unsigned int used_ptes;
 };
 
 struct i915_page_directory {
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 4461df5a94fe..de31c49781d3 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -245,15 +245,14 @@ DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc,
 
 DECLARE_EVENT_CLASS(i915_page_table_entry_update,
 	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table *pt, u32 first, u32 count, u32 bits),
-	TP_ARGS(vm, pde, pt, first, count, bits),
+		 struct i915_page_table *pt, u32 first, u32 count),
+	TP_ARGS(vm, pde, pt, first, count),
 
 	TP_STRUCT__entry(
 		__field(struct i915_address_space *, vm)
 		__field(u32, pde)
 		__field(u32, first)
 		__field(u32, last)
-		__dynamic_array(char, cur_ptes, TRACE_PT_SIZE(bits))
 	),
 
 	TP_fast_assign(
@@ -261,22 +260,16 @@ DECLARE_EVENT_CLASS(i915_page_table_entry_update,
 		__entry->pde = pde;
 		__entry->first = first;
 		__entry->last = first + count - 1;
-		scnprintf(__get_str(cur_ptes),
-			  TRACE_PT_SIZE(bits),
-			  "%*pb",
-			  bits,
-			  pt->used_ptes);
 	),
 
-	TP_printk("vm=%p, pde=%d, updating %u:%u\t%s",
-		  __entry->vm, __entry->pde, __entry->last, __entry->first,
-		  __get_str(cur_ptes))
+	TP_printk("vm=%p, pde=%d, updating %u:%u",
+		  __entry->vm, __entry->pde, __entry->last, __entry->first)
 );
 
 DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map,
 	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table *pt, u32 first, u32 count, u32 bits),
-	TP_ARGS(vm, pde, pt, first, count, bits)
+		 struct i915_page_table *pt, u32 first, u32 count),
+	TP_ARGS(vm, pde, pt, first, count)
 );
 
 TRACE_EVENT(i915_gem_object_change_domain,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 13/19] drm/i915: Remove bitmap tracking for used-pdes
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (10 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 12/19] drm/i915: Remove bitmap tracking for used-ptes Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-08 16:30   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 14/19] drm/i915: Remove bitmap tracking for used-pdpes Chris Wilson
                   ` (7 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We only operate on known extents (both for alloc/clear) and so we can use
both the knowledge of the bind/unbind range along with the knowledge of
the existing pagetable to avoid having to allocate temporary and
auxiliary bitmaps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 246 ++++++++++++------------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   6 +-
 2 files changed, 84 insertions(+), 168 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5ea46c05462b..15e95904931f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -491,39 +491,25 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 {
 	struct i915_page_directory *pd;
-	int ret = -ENOMEM;
 
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN);
+	if (unlikely(!pd))
 		return ERR_PTR(-ENOMEM);
 
-	pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
-				sizeof(*pd->used_pdes), GFP_KERNEL);
-	if (!pd->used_pdes)
-		goto fail_bitmap;
-
-	ret = setup_px(vm, pd);
-	if (ret)
-		goto fail_page_m;
+	if (unlikely(setup_px(vm, pd))) {
+		kfree(pd);
+		return ERR_PTR(-ENOMEM);
+	}
 
+	pd->used_pdes = 0;
 	return pd;
-
-fail_page_m:
-	kfree(pd->used_pdes);
-fail_bitmap:
-	kfree(pd);
-
-	return ERR_PTR(ret);
 }
 
 static void free_pd(struct i915_address_space *vm,
 		    struct i915_page_directory *pd)
 {
-	if (px_page(pd)) {
-		cleanup_px(vm, pd);
-		kfree(pd->used_pdes);
-		kfree(pd);
-	}
+	cleanup_px(vm, pd);
+	kfree(pd);
 }
 
 static void gen8_initialize_pd(struct i915_address_space *vm,
@@ -537,10 +523,11 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
 		pd->page_table[i] = vm->scratch_pt;
 }
 
-static int __pdp_init(struct drm_i915_private *dev_priv,
+static int __pdp_init(struct i915_address_space *vm,
 		      struct i915_page_directory_pointer *pdp)
 {
-	size_t pdpes = I915_PDPES_PER_PDP(dev_priv);
+	size_t pdpes = I915_PDPES_PER_PDP(vm->i915);
+	int i;
 
 	pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
 				  sizeof(unsigned long),
@@ -548,8 +535,8 @@ static int __pdp_init(struct drm_i915_private *dev_priv,
 	if (!pdp->used_pdpes)
 		return -ENOMEM;
 
-	pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
-				      GFP_KERNEL);
+	pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
+					    GFP_KERNEL);
 	if (!pdp->page_directory) {
 		kfree(pdp->used_pdpes);
 		/* the PDP might be the statically allocated top level. Keep it
@@ -558,6 +545,9 @@ static int __pdp_init(struct drm_i915_private *dev_priv,
 		return -ENOMEM;
 	}
 
+	for (i = 0; i < pdpes; i++)
+		pdp->page_directory[i] = vm->scratch_pd;
+
 	return 0;
 }
 
@@ -580,7 +570,7 @@ alloc_pdp(struct i915_address_space *vm)
 	if (!pdp)
 		return ERR_PTR(-ENOMEM);
 
-	ret = __pdp_init(vm->i915, pdp);
+	ret = __pdp_init(vm, pdp);
 	if (ret)
 		goto fail_bitmap;
 
@@ -630,25 +620,9 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
 }
 
 static void
-gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt,
-		struct i915_page_directory_pointer *pdp,
-		struct i915_page_directory *pd,
-		int index)
-{
-	gen8_ppgtt_pdpe_t *page_directorypo;
-
-	if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev)))
-		return;
-
-	page_directorypo = kmap_atomic_px(pdp);
-	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
-	kunmap_atomic(page_directorypo);
-}
-
-static void
-gen8_setup_pml4e(struct i915_pml4 *pml4,
-		 struct i915_page_directory_pointer *pdp,
-		 int index)
+gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
+		     struct i915_page_directory_pointer *pdp,
+		     int index)
 {
 	gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4);
 
@@ -756,9 +730,6 @@ static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
 	kunmap_atomic(vaddr);
 }
 
-/* Removes entries from a single page dir, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries
- */
 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 				struct i915_page_directory *pd,
 				u64 start, u64 length)
@@ -771,15 +742,28 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 			continue;
 
 		gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
-		__clear_bit(pde, pd->used_pdes);
+		pd->used_pdes--;
 
 		free_pt(vm, pt);
 	}
 
-	if (bitmap_empty(pd->used_pdes, I915_PDES))
-		return true;
+	return !pd->used_pdes;
+}
 
-	return false;
+static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
+				struct i915_page_directory_pointer *pdp,
+				struct i915_page_directory *pd,
+				unsigned int pdpe)
+{
+	gen8_ppgtt_pdpe_t *vaddr;
+
+	pdp->page_directory[pdpe] = pd;
+	if (!USES_FULL_48BIT_PPGTT(vm->i915))
+		return;
+
+	vaddr = kmap_atomic_px(pdp);
+	vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
+	kunmap_atomic(vaddr);
 }
 
 /* Removes entries from a single page dir pointer, releasing it if it's empty.
@@ -787,25 +771,20 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
  */
 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 				 struct i915_page_directory_pointer *pdp,
-				 uint64_t start,
-				 uint64_t length)
+				 u64 start, u64 length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory *pd;
-	uint64_t pdpe;
+	unsigned int pdpe;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		if (WARN_ON(!pdp->page_directory[pdpe]))
-			break;
+		if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
+			continue;
 
-		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
-			__clear_bit(pdpe, pdp->used_pdpes);
-			gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);
-			free_pd(vm, pd);
-		}
-	}
+		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
+		__clear_bit(pdpe, pdp->used_pdpes);
 
-	mark_tlbs_dirty(ppgtt);
+		free_pd(vm, pd);
+	}
 
 	if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
 		return true;
@@ -813,15 +792,21 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 	return false;
 }
 
+static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
+				  u64 start, u64 length)
+{
+	gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
+}
+
 /* Removes entries from a single pml4.
  * This is the top-level structure in 4-level page tables used on gen8+.
  * Empty entries are always scratch pml4e.
  */
-static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
-				  struct i915_pml4 *pml4,
-				  uint64_t start,
-				  uint64_t length)
+static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
+				  u64 start, u64 length)
 {
+	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_pml4 *pml4 = &ppgtt->pml4;
 	struct i915_page_directory_pointer *pdp;
 	uint64_t pml4e;
 
@@ -833,23 +818,12 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
 
 		if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
 			__clear_bit(pml4e, pml4->used_pml4es);
-			gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e);
+			gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
 			free_pdp(vm, pdp);
 		}
 	}
 }
 
-static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
-				   uint64_t start, uint64_t length)
-{
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
-	if (USES_FULL_48BIT_PPGTT(vm->i915))
-		gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length);
-	else
-		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
-}
-
 struct sgt_dma {
 	struct scatterlist *sg;
 	dma_addr_t dma, max;
@@ -947,12 +921,9 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
 	if (!px_page(pd))
 		return;
 
-	for_each_set_bit(i, pd->used_pdes, I915_PDES) {
-		if (WARN_ON(!pd->page_table[i]))
-			continue;
-
-		free_pt(vm, pd->page_table[i]);
-		pd->page_table[i] = NULL;
+	for (i = 0; i < I915_PDES; i++) {
+		if (pd->page_table[i] != vm->scratch_pt)
+			free_pt(vm, pd->page_table[i]);
 	}
 }
 
@@ -1047,7 +1018,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
 	int i;
 
 	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
-		if (WARN_ON(!pdp->page_directory[i]))
+		if (pdp->page_directory[i] == vm->scratch_pd)
 			continue;
 
 		gen8_free_page_tables(vm, pdp->page_directory[i]);
@@ -1087,44 +1058,28 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 	gen8_free_scratch(vm);
 }
 
-/**
- * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
- * @vm:	Master vm structure.
- * @pd:	Page directory for this address range.
- * @start:	Starting virtual address to begin allocations.
- * @length:	Size of the allocations.
- *
- * Allocate the required number of page tables. Extremely similar to
- * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
- * the page directory boundary (instead of the page directory pointer). That
- * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
- * possible, and likely that the caller will need to use multiple calls of this
- * function to achieve the appropriate allocation.
- *
- * Return: 0 if success; negative error code otherwise.
- */
-static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
-				     struct i915_page_directory *pd,
-				     u64 start, u64 length)
+static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
+			       struct i915_page_directory *pd,
+			       u64 start, u64 length)
 {
 	struct i915_page_table *pt;
 	u64 from = start;
-	uint32_t pde;
+	unsigned int pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
-		/* Don't reallocate page tables */
-		if (!test_bit(pde, pd->used_pdes)) {
+		if (pt == vm->scratch_pt) {
 			pt = alloc_pt(vm);
 			if (IS_ERR(pt))
 				goto unwind;
 
 			gen8_initialize_pt(vm, pt);
-			pd->page_table[pde] = pt;
+
+			gen8_ppgtt_set_pde(vm, pd, pt, pde);
+			pd->used_pdes++;
 		}
+
 		pt->used_ptes += gen8_pte_count(start, length);
-		trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
 	}
-
 	return 0;
 
 unwind:
@@ -1201,7 +1156,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
  *		caller to free on error.
  *
  * Allocate the required number of page directory pointers. Extremely similar to
- * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
+ * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pd().
  * The main difference is here we are limited by the pml4 boundary (instead of
  * the page directory pointer).
  *
@@ -1253,9 +1208,8 @@ free_gen8_temp_bitmaps(unsigned long *new_pds)
 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
  * of these are based on the number of PDPEs in the system.
  */
-static
-int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
-					 uint32_t pdpes)
+static int __must_check
+alloc_gen8_temp_bitmaps(unsigned long **new_pds, uint32_t pdpes)
 {
 	unsigned long *pds;
 
@@ -1275,8 +1229,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	unsigned long *new_page_dirs;
 	struct i915_page_directory *pd;
-	const uint64_t orig_start = start;
-	const uint64_t orig_length = length;
 	uint32_t pdpe;
 	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
 	int ret;
@@ -1295,51 +1247,16 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 
 	/* For every page directory referenced, allocate page tables */
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length);
+		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
 		if (ret)
 			goto err_out;
-	}
 
-	start = orig_start;
-	length = orig_length;
+		if (test_and_set_bit(pdpe, pdp->used_pdpes))
+			gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
+	}
 
 	/* Allocations have completed successfully, so set the bitmaps, and do
 	 * the mappings. */
-	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		gen8_pde_t *const page_directory = kmap_atomic_px(pd);
-		struct i915_page_table *pt;
-		uint64_t pd_len = length;
-		uint64_t pd_start = start;
-		uint32_t pde;
-
-		/* Every pd should be allocated, we just did that above. */
-		WARN_ON(!pd);
-
-		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
-			/* Same reasoning as pd */
-			WARN_ON(!pt);
-			WARN_ON(!pd_len);
-			WARN_ON(!gen8_pte_count(pd_start, pd_len));
-
-			/* Our pde is now pointing to the pagetable, pt */
-			__set_bit(pde, pd->used_pdes);
-
-			/* Map the PDE to the page table */
-			page_directory[pde] = gen8_pde_encode(px_dma(pt),
-							      I915_CACHE_LLC);
-			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
-							gen8_pte_index(start),
-							gen8_pte_count(start, length));
-
-			/* NB: We haven't yet mapped ptes to pages. At this
-			 * point we're still relying on insert_entries() */
-		}
-
-		kunmap_atomic(page_directory);
-		__set_bit(pdpe, pdp->used_pdpes);
-		gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
-	}
-
 	free_gen8_temp_bitmaps(new_page_dirs);
 	mark_tlbs_dirty(ppgtt);
 	return 0;
@@ -1382,7 +1299,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 		if (ret)
 			goto err_out;
 
-		gen8_setup_pml4e(pml4, pdp, pml4e);
+		gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
 	}
 
 	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
@@ -1431,7 +1348,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
 			uint32_t  pte;
 			gen8_pte_t *pt_vaddr;
 
-			if (!test_bit(pde, pd->used_pdes))
+			if (pd->page_table[pde] == ppgtt->base.scratch_pt)
 				continue;
 
 			pt_vaddr = kmap_atomic_px(pt);
@@ -1534,7 +1451,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.start = 0;
 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
 	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
-	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
 	ppgtt->debug_dump = gen8_dump_ppgtt;
@@ -1556,8 +1472,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 		ppgtt->switch_mm = gen8_48b_mm_switch;
 
 		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
+		ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
 	} else {
-		ret = __pdp_init(dev_priv, &ppgtt->pdp);
+		ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
 		if (ret)
 			goto free_scratch;
 
@@ -1574,6 +1491,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 		}
 
 		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
+		ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl;
 	}
 
 	if (intel_vgpu_active(dev_priv))
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 9ba98f0da89f..72656cc5ecf1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -225,8 +225,8 @@ struct i915_page_table {
 struct i915_page_directory {
 	struct i915_page_dma base;
 
-	unsigned long *used_pdes;
 	struct i915_page_table *page_table[I915_PDES]; /* PDEs */
+	unsigned int used_pdes;
 };
 
 struct i915_page_directory_pointer {
@@ -517,9 +517,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length)
 static inline dma_addr_t
 i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
 {
-	return test_bit(n, ppgtt->pdp.used_pdpes) ?
-		px_dma(ppgtt->pdp.page_directory[n]) :
-		px_dma(ppgtt->base.scratch_pd);
+	return px_dma(ppgtt->pdp.page_directory[n]);
 }
 
 static inline struct i915_ggtt *
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 14/19] drm/i915: Remove bitmap tracking for used-pdpes
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (11 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 13/19] drm/i915: Remove bitmap tracking for used-pdes Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-08 17:42   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 15/19] drm/i915: Remove bitmap tracking for used-pml4 Chris Wilson
                   ` (6 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We only operate on known extents (both for alloc/clear) and so we can use
both the knowledge of the bind/unbind range along with the knowledge of
the existing pagetable to avoid having to allocate temporary and
auxiliary bitmaps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 275 +++++++++++-------------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   3 +-
 2 files changed, 84 insertions(+), 194 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 15e95904931f..99319461f86c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -526,24 +526,13 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
 static int __pdp_init(struct i915_address_space *vm,
 		      struct i915_page_directory_pointer *pdp)
 {
-	size_t pdpes = I915_PDPES_PER_PDP(vm->i915);
-	int i;
-
-	pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
-				  sizeof(unsigned long),
-				  GFP_KERNEL);
-	if (!pdp->used_pdpes)
-		return -ENOMEM;
+	const unsigned int pdpes = I915_PDPES_PER_PDP(vm->i915);
+	unsigned int i;
 
 	pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
-					    GFP_KERNEL);
-	if (!pdp->page_directory) {
-		kfree(pdp->used_pdpes);
-		/* the PDP might be the statically allocated top level. Keep it
-		 * as clean as possible */
-		pdp->used_pdpes = NULL;
+					    GFP_KERNEL | __GFP_NOWARN);
+	if (unlikely(!pdp->page_directory))
 		return -ENOMEM;
-	}
 
 	for (i = 0; i < pdpes; i++)
 		pdp->page_directory[i] = vm->scratch_pd;
@@ -553,7 +542,6 @@ static int __pdp_init(struct i915_address_space *vm,
 
 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
 {
-	kfree(pdp->used_pdpes);
 	kfree(pdp->page_directory);
 	pdp->page_directory = NULL;
 }
@@ -611,23 +599,12 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
 static void gen8_initialize_pml4(struct i915_address_space *vm,
 				 struct i915_pml4 *pml4)
 {
-	gen8_ppgtt_pml4e_t scratch_pml4e;
-
-	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
-					  I915_CACHE_LLC);
-
-	fill_px(vm, pml4, scratch_pml4e);
-}
-
-static void
-gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
-		     struct i915_page_directory_pointer *pdp,
-		     int index)
-{
-	gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4);
+	unsigned int i;
 
-	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
-	kunmap_atomic(pagemap);
+	fill_px(vm, pml4,
+		gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
+	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++)
+		pml4->pdps[i] = vm->scratch_pdp;
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
@@ -781,15 +758,12 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 			continue;
 
 		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
-		__clear_bit(pdpe, pdp->used_pdpes);
+		pdp->used_pdpes--;
 
 		free_pd(vm, pd);
 	}
 
-	if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
-		return true;
-
-	return false;
+	return !pdp->used_pdpes;
 }
 
 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
@@ -798,6 +772,19 @@ static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
 	gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
 }
 
+static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
+				 struct i915_page_directory_pointer *pdp,
+				 unsigned int pml4e)
+{
+	gen8_ppgtt_pml4e_t *vaddr;
+
+	pml4->pdps[pml4e] = pdp;
+
+	vaddr = kmap_atomic_px(pml4);
+	vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
+	kunmap_atomic(vaddr);
+}
+
 /* Removes entries from a single pml4.
  * This is the top-level structure in 4-level page tables used on gen8+.
  * Empty entries are always scratch pml4e.
@@ -808,19 +795,18 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_pml4 *pml4 = &ppgtt->pml4;
 	struct i915_page_directory_pointer *pdp;
-	uint64_t pml4e;
+	unsigned int pml4e;
 
 	GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (WARN_ON(!pml4->pdps[pml4e]))
-			break;
+		if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
+			continue;
 
-		if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
-			__clear_bit(pml4e, pml4->used_pml4es);
-			gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
-			free_pdp(vm, pdp);
-		}
+		gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
+		__clear_bit(pml4e, pml4->used_pml4es);
+
+		free_pdp(vm, pdp);
 	}
 }
 
@@ -1017,7 +1003,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
 {
 	int i;
 
-	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
+	for (i = 0; i < I915_PDPES_PER_PDP(vm->i915); i++) {
 		if (pdp->page_directory[i] == vm->scratch_pd)
 			continue;
 
@@ -1088,65 +1074,6 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 }
 
 /**
- * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
- * @vm:	Master vm structure.
- * @pdp:	Page directory pointer for this address range.
- * @start:	Starting virtual address to begin allocations.
- * @length:	Size of the allocations.
- * @new_pds:	Bitmap set by function with new allocations. Likely used by the
- *		caller to free on error.
- *
- * Allocate the required number of page directories starting at the pde index of
- * @start, and ending at the pde index @start + @length. This function will skip
- * over already allocated page directories within the range, and only allocate
- * new ones, setting the appropriate pointer within the pdp as well as the
- * correct position in the bitmap @new_pds.
- *
- * The function will only allocate the pages within the range for a give page
- * directory pointer. In other words, if @start + @length straddles a virtually
- * addressed PDP boundary (512GB for 4k pages), there will be more allocations
- * required by the caller, This is not currently possible, and the BUG in the
- * code will prevent it.
- *
- * Return: 0 if success; negative error code otherwise.
- */
-static int
-gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
-				  struct i915_page_directory_pointer *pdp,
-				  uint64_t start,
-				  uint64_t length,
-				  unsigned long *new_pds)
-{
-	struct i915_page_directory *pd;
-	uint32_t pdpe;
-	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
-
-	WARN_ON(!bitmap_empty(new_pds, pdpes));
-
-	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		if (test_bit(pdpe, pdp->used_pdpes))
-			continue;
-
-		pd = alloc_pd(vm);
-		if (IS_ERR(pd))
-			goto unwind_out;
-
-		gen8_initialize_pd(vm, pd);
-		pdp->page_directory[pdpe] = pd;
-		__set_bit(pdpe, new_pds);
-		trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
-	}
-
-	return 0;
-
-unwind_out:
-	for_each_set_bit(pdpe, new_pds, pdpes)
-		free_pd(vm, pdp->page_directory[pdpe]);
-
-	return -ENOMEM;
-}
-
-/**
  * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
  * @vm:	Master vm structure.
  * @pml4:	Page map level 4 for this address range.
@@ -1166,23 +1093,19 @@ static int
 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 				  struct i915_pml4 *pml4,
 				  uint64_t start,
-				  uint64_t length,
-				  unsigned long *new_pdps)
+				  uint64_t length)
 {
 	struct i915_page_directory_pointer *pdp;
 	uint32_t pml4e;
 
-	WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
-
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 		if (!test_bit(pml4e, pml4->used_pml4es)) {
 			pdp = alloc_pdp(vm);
 			if (IS_ERR(pdp))
-				goto unwind_out;
+				return PTR_ERR(pdp);
 
 			gen8_initialize_pdp(vm, pdp);
 			pml4->pdps[pml4e] = pdp;
-			__set_bit(pml4e, new_pdps);
 			trace_i915_page_directory_pointer_entry_alloc(vm,
 								      pml4e,
 								      start,
@@ -1191,34 +1114,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 	}
 
 	return 0;
-
-unwind_out:
-	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
-		free_pdp(vm, pml4->pdps[pml4e]);
-
-	return -ENOMEM;
-}
-
-static void
-free_gen8_temp_bitmaps(unsigned long *new_pds)
-{
-	kfree(new_pds);
-}
-
-/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
- * of these are based on the number of PDPEs in the system.
- */
-static int __must_check
-alloc_gen8_temp_bitmaps(unsigned long **new_pds, uint32_t pdpes)
-{
-	unsigned long *pds;
-
-	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
-	if (!pds)
-		return -ENOMEM;
-
-	*new_pds = pds;
-	return 0;
 }
 
 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
@@ -1227,47 +1122,37 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 				    uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	unsigned long *new_page_dirs;
 	struct i915_page_directory *pd;
-	uint32_t pdpe;
-	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
+	u64 from = start;
+	unsigned int pdpe;
 	int ret;
 
-	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
-	if (ret)
-		return ret;
-
-	/* Do the allocations first so we can easily bail out */
-	ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
-						new_page_dirs);
-	if (ret) {
-		free_gen8_temp_bitmaps(new_page_dirs);
-		return ret;
-	}
-
-	/* For every page directory referenced, allocate page tables */
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
-		if (ret)
-			goto err_out;
+		if (pd == vm->scratch_pd) {
+			pd = alloc_pd(vm);
+			if (IS_ERR(pd))
+				goto unwind;
 
-		if (test_and_set_bit(pdpe, pdp->used_pdpes))
+			gen8_initialize_pd(vm, pd);
 			gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
+			pdp->used_pdpes++;
+		}
+
+		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
+		if (unlikely(ret)) {
+			gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
+			pdp->used_pdpes--;
+			free_pd(vm, pd);
+			goto unwind;
+		}
 	}
 
-	/* Allocations have completed successfully, so set the bitmaps, and do
-	 * the mappings. */
-	free_gen8_temp_bitmaps(new_page_dirs);
 	mark_tlbs_dirty(ppgtt);
 	return 0;
 
-err_out:
-	for_each_set_bit(pdpe, new_page_dirs, pdpes)
-		free_pd(vm, pdp->page_directory[pdpe]);
-
-	free_gen8_temp_bitmaps(new_page_dirs);
-	mark_tlbs_dirty(ppgtt);
-	return ret;
+unwind:
+	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
+	return -ENOMEM;
 }
 
 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
@@ -1287,8 +1172,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 	/* The pagedirectory and pagetable allocations are done in the shared 3
 	 * and 4 level code. Just allocate the pdps.
 	 */
-	ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
-						new_pdps);
+	ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length);
 	if (ret)
 		return ret;
 
@@ -1340,7 +1224,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
 		uint64_t pd_start = start;
 		uint32_t pde;
 
-		if (!test_bit(pdpe, pdp->used_pdpes))
+		if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd)
 			continue;
 
 		seq_printf(m, "\tPDPE #%d\n", pdpe);
@@ -1407,29 +1291,34 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 
 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
 {
-	unsigned long *new_page_dirs;
-	uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev));
-	int ret;
+	struct i915_address_space *vm = &ppgtt->base;
+	struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
+	struct i915_page_directory *pd;
+	u64 start = 0, length = ppgtt->base.total;
+	u64 from = start;
+	unsigned int pdpe;
 
-	/* We allocate temp bitmap for page tables for no gain
-	 * but as this is for init only, lets keep the things simple
-	 */
-	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
-	if (ret)
-		return ret;
+	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
+		pd = alloc_pd(vm);
+		if (IS_ERR(pd))
+			goto unwind;
 
-	/* Allocate for all pdps regardless of how the ppgtt
-	 * was defined.
-	 */
-	ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
-						0, 1ULL << 32,
-						new_page_dirs);
-	if (!ret)
-		*ppgtt->pdp.used_pdpes = *new_page_dirs;
+		gen8_initialize_pd(vm, pd);
+		gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
+		pdp->used_pdpes++;
+	}
 
-	free_gen8_temp_bitmaps(new_page_dirs);
+	pdp->used_pdpes++; /* never remove */
+	return 0;
 
-	return ret;
+unwind:
+	start -= from;
+	gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
+		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
+		free_pd(vm, pd);
+	}
+	pdp->used_pdpes = 0;
+	return -ENOMEM;
 }
 
 /*
@@ -1486,8 +1375,10 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
 		if (intel_vgpu_active(dev_priv)) {
 			ret = gen8_preallocate_top_level_pdps(ppgtt);
-			if (ret)
+			if (ret) {
+				__pdp_fini(&ppgtt->pdp);
 				goto free_scratch;
+			}
 		}
 
 		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 72656cc5ecf1..063e062fad2b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -231,9 +231,8 @@ struct i915_page_directory {
 
 struct i915_page_directory_pointer {
 	struct i915_page_dma base;
-
-	unsigned long *used_pdpes;
 	struct i915_page_directory **page_directory;
+	unsigned int used_pdpes;
 };
 
 struct i915_pml4 {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 15/19] drm/i915: Remove bitmap tracking for used-pml4
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (12 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 14/19] drm/i915: Remove bitmap tracking for used-pdpes Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-08 17:47   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 16/19] drm/i915: Remove superfluous posting reads after clear GGTT Chris Wilson
                   ` (5 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We only operate on known extents (both for alloc/clear) and so we can use
both the knowledge of the bind/unbind range along with the knowledge of
the existing pagetable to avoid having to allocate temporary and
auxiliary bitmaps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 132 +++++++++++-------------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   2 -
 2 files changed, 38 insertions(+), 96 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 99319461f86c..5964b83df4e2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -804,7 +804,6 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 			continue;
 
 		gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
-		__clear_bit(pml4e, pml4->used_pml4es);
 
 		free_pdp(vm, pdp);
 	}
@@ -1018,8 +1017,8 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
 {
 	int i;
 
-	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
-		if (WARN_ON(!ppgtt->pml4.pdps[i]))
+	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
+		if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp)
 			continue;
 
 		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
@@ -1073,53 +1072,9 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 	return -ENOMEM;
 }
 
-/**
- * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
- * @vm:	Master vm structure.
- * @pml4:	Page map level 4 for this address range.
- * @start:	Starting virtual address to begin allocations.
- * @length:	Size of the allocations.
- * @new_pdps:	Bitmap set by function with new allocations. Likely used by the
- *		caller to free on error.
- *
- * Allocate the required number of page directory pointers. Extremely similar to
- * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pd().
- * The main difference is here we are limited by the pml4 boundary (instead of
- * the page directory pointer).
- *
- * Return: 0 if success; negative error code otherwise.
- */
-static int
-gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
-				  struct i915_pml4 *pml4,
-				  uint64_t start,
-				  uint64_t length)
-{
-	struct i915_page_directory_pointer *pdp;
-	uint32_t pml4e;
-
-	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (!test_bit(pml4e, pml4->used_pml4es)) {
-			pdp = alloc_pdp(vm);
-			if (IS_ERR(pdp))
-				return PTR_ERR(pdp);
-
-			gen8_initialize_pdp(vm, pdp);
-			pml4->pdps[pml4e] = pdp;
-			trace_i915_page_directory_pointer_entry_alloc(vm,
-								      pml4e,
-								      start,
-								      GEN8_PML4E_SHIFT);
-		}
-	}
-
-	return 0;
-}
-
-static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
-				    struct i915_page_directory_pointer *pdp,
-				    uint64_t start,
-				    uint64_t length)
+static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
+				struct i915_page_directory_pointer *pdp,
+				u64 start, u64 length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory *pd;
@@ -1155,58 +1110,46 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	return -ENOMEM;
 }
 
-static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
-				    struct i915_pml4 *pml4,
-				    uint64_t start,
-				    uint64_t length)
+static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
+				 u64 start, u64 length)
 {
-	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
-	struct i915_page_directory_pointer *pdp;
-	uint64_t pml4e;
-	int ret = 0;
-
-	/* Do the pml4 allocations first, so we don't need to track the newly
-	 * allocated tables below the pdp */
-	bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
+	return gen8_ppgtt_alloc_pdp(vm,
+				    &i915_vm_to_ppgtt(vm)->pdp, start, length);
+}
 
-	/* The pagedirectory and pagetable allocations are done in the shared 3
-	 * and 4 level code. Just allocate the pdps.
-	 */
-	ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length);
-	if (ret)
-		return ret;
+static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
+				 u64 start, u64 length)
+{
+	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_pml4 *pml4 = &ppgtt->pml4;
+	struct i915_page_directory_pointer *pdp;
+	u64 from = start;
+	u32 pml4e;
+	int ret;
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		WARN_ON(!pdp);
+		if (pml4->pdps[pml4e] == vm->scratch_pdp) {
+			pdp = alloc_pdp(vm);
+			if (IS_ERR(pdp))
+				goto unwind;
 
-		ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
-		if (ret)
-			goto err_out;
+			gen8_initialize_pdp(vm, pdp);
+			gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
+		}
 
-		gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
+		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
+		if (unlikely(ret)) {
+			gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
+			free_pdp(vm, pdp);
+			goto unwind;
+		}
 	}
 
-	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
-		  GEN8_PML4ES_PER_PML4);
-
 	return 0;
 
-err_out:
-	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
-		gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]);
-
-	return ret;
-}
-
-static int gen8_alloc_va_range(struct i915_address_space *vm,
-			       uint64_t start, uint64_t length)
-{
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
-	if (USES_FULL_48BIT_PPGTT(vm->i915))
-		return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
-	else
-		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
+unwind:
+	gen8_ppgtt_clear_4lvl(vm, from, start - from);
+	return -ENOMEM;
 }
 
 static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
@@ -1280,7 +1223,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 		struct i915_page_directory_pointer *pdp;
 
 		gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-			if (!test_bit(pml4e, pml4->used_pml4es))
+			if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp)
 				continue;
 
 			seq_printf(m, "    PML4E #%llu\n", pml4e);
@@ -1339,7 +1282,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
 	ppgtt->base.start = 0;
 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
-	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
 	ppgtt->debug_dump = gen8_dump_ppgtt;
@@ -1360,6 +1302,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 		ppgtt->base.total = 1ULL << 48;
 		ppgtt->switch_mm = gen8_48b_mm_switch;
 
+		ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
 		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
 		ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
 	} else {
@@ -1381,6 +1324,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 			}
 		}
 
+		ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl;
 		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
 		ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 063e062fad2b..5af5a5d53d93 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -237,8 +237,6 @@ struct i915_page_directory_pointer {
 
 struct i915_pml4 {
 	struct i915_page_dma base;
-
-	DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4);
 	struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4];
 };
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 16/19] drm/i915: Remove superfluous posting reads after clear GGTT
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (13 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 15/19] drm/i915: Remove bitmap tracking for used-pml4 Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-08 17:48   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 17/19] drm/i915: Always mark the PDP as dirty when altered Chris Wilson
                   ` (4 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

The barrier here is not required - we apply the barrier before the range
is ever reused by the GPU instead.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5964b83df4e2..04636760fb38 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2166,7 +2166,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 				      I915_CACHE_LLC);
 	for (i = 0; i < num_entries; i++)
 		gen8_set_pte(&gtt_base[i], scratch_pte);
-	readl(gtt_base);
 }
 
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
@@ -2191,7 +2190,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
-	readl(gtt_base);
 }
 
 static void i915_ggtt_insert_page(struct i915_address_space *vm,
@@ -2215,7 +2213,6 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 
 	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
-
 }
 
 static void i915_ggtt_clear_range(struct i915_address_space *vm,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 17/19] drm/i915: Always mark the PDP as dirty when altered
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (14 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 16/19] drm/i915: Remove superfluous posting reads after clear GGTT Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-08 17:53   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 18/19] drm/i915: Remove defunct GTT tracepoints Chris Wilson
                   ` (3 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We want to reload the PDP (and flush the TLB) when the addresses are
changed.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 04636760fb38..0a28913b06af 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1076,7 +1076,6 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 				struct i915_page_directory_pointer *pdp,
 				u64 start, u64 length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory *pd;
 	u64 from = start;
 	unsigned int pdpe;
@@ -1091,6 +1090,8 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 			gen8_initialize_pd(vm, pd);
 			gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
 			pdp->used_pdpes++;
+
+			mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
 		}
 
 		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
@@ -1102,7 +1103,6 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 		}
 	}
 
-	mark_tlbs_dirty(ppgtt);
 	return 0;
 
 unwind:
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 18/19] drm/i915: Remove defunct GTT tracepoints
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (15 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 17/19] drm/i915: Always mark the PDP as dirty when altered Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-08 18:01   ` Matthew Auld
  2017-02-02 15:02 ` [PATCH 19/19] drm/i915: Remove unused ppgtt->enable() Chris Wilson
                   ` (2 subsequent siblings)
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

The tracepoints are now entirely synonymous with binding and unbinding the
VMA (and the tracepoints there).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c |  4 --
 drivers/gpu/drm/i915/i915_trace.h   | 97 -------------------------------------
 2 files changed, 101 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0a28913b06af..46170b3acc2a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -190,7 +190,6 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 	u32 pte_flags;
 	int ret;
 
-	trace_i915_va_alloc(vma);
 	ret = vma->vm->allocate_va_range(vma->vm,
 					 vma->node.start, vma->size);
 	if (ret)
@@ -1312,9 +1311,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
 		ppgtt->base.total = 1ULL << 32;
 		ppgtt->switch_mm = gen8_legacy_mm_switch;
-		trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
-							      0, 0,
-							      GEN8_PML4E_SHIFT);
 
 		if (intel_vgpu_active(dev_priv)) {
 			ret = gen8_preallocate_top_level_pdps(ppgtt);
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index de31c49781d3..7a547cdfc381 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -175,103 +175,6 @@ TRACE_EVENT(i915_vma_unbind,
 		      __entry->obj, __entry->offset, __entry->size, __entry->vm)
 );
 
-TRACE_EVENT(i915_va_alloc,
-	TP_PROTO(struct i915_vma *vma),
-	TP_ARGS(vma),
-
-	TP_STRUCT__entry(
-		__field(struct i915_address_space *, vm)
-		__field(u64, start)
-		__field(u64, end)
-	),
-
-	TP_fast_assign(
-		__entry->vm = vma->vm;
-		__entry->start = vma->node.start;
-		__entry->end = vma->node.start + vma->node.size - 1;
-	),
-
-	TP_printk("vm=%p (%c), 0x%llx-0x%llx",
-		  __entry->vm, i915_is_ggtt(__entry->vm) ? 'G' : 'P',  __entry->start, __entry->end)
-);
-
-DECLARE_EVENT_CLASS(i915_px_entry,
-	TP_PROTO(struct i915_address_space *vm, u32 px, u64 start, u64 px_shift),
-	TP_ARGS(vm, px, start, px_shift),
-
-	TP_STRUCT__entry(
-		__field(struct i915_address_space *, vm)
-		__field(u32, px)
-		__field(u64, start)
-		__field(u64, end)
-	),
-
-	TP_fast_assign(
-		__entry->vm = vm;
-		__entry->px = px;
-		__entry->start = start;
-		__entry->end = ((start + (1ULL << px_shift)) & ~((1ULL << px_shift)-1)) - 1;
-	),
-
-	TP_printk("vm=%p, pde=%d (0x%llx-0x%llx)",
-		  __entry->vm, __entry->px, __entry->start, __entry->end)
-);
-
-DEFINE_EVENT(i915_px_entry, i915_page_table_entry_alloc,
-	     TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift),
-	     TP_ARGS(vm, pde, start, pde_shift)
-);
-
-DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc,
-		   TP_PROTO(struct i915_address_space *vm, u32 pdpe, u64 start, u64 pdpe_shift),
-		   TP_ARGS(vm, pdpe, start, pdpe_shift),
-
-		   TP_printk("vm=%p, pdpe=%d (0x%llx-0x%llx)",
-			     __entry->vm, __entry->px, __entry->start, __entry->end)
-);
-
-DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc,
-		   TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift),
-		   TP_ARGS(vm, pml4e, start, pml4e_shift),
-
-		   TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)",
-			     __entry->vm, __entry->px, __entry->start, __entry->end)
-);
-
-/* Avoid extra math because we only support two sizes. The format is defined by
- * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */
-#define TRACE_PT_SIZE(bits) \
-	((((bits) == 1024) ? 288 : 144) + 1)
-
-DECLARE_EVENT_CLASS(i915_page_table_entry_update,
-	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table *pt, u32 first, u32 count),
-	TP_ARGS(vm, pde, pt, first, count),
-
-	TP_STRUCT__entry(
-		__field(struct i915_address_space *, vm)
-		__field(u32, pde)
-		__field(u32, first)
-		__field(u32, last)
-	),
-
-	TP_fast_assign(
-		__entry->vm = vm;
-		__entry->pde = pde;
-		__entry->first = first;
-		__entry->last = first + count - 1;
-	),
-
-	TP_printk("vm=%p, pde=%d, updating %u:%u",
-		  __entry->vm, __entry->pde, __entry->last, __entry->first)
-);
-
-DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map,
-	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table *pt, u32 first, u32 count),
-	TP_ARGS(vm, pde, pt, first, count)
-);
-
 TRACE_EVENT(i915_gem_object_change_domain,
 	    TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write),
 	    TP_ARGS(obj, old_read, old_write),
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* [PATCH 19/19] drm/i915: Remove unused ppgtt->enable()
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (16 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 18/19] drm/i915: Remove defunct GTT tracepoints Chris Wilson
@ 2017-02-02 15:02 ` Chris Wilson
  2017-02-03 13:04   ` Joonas Lahtinen
  2017-02-02 17:54 ` ✓ Fi.CI.BAT: success for series starting with [01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Patchwork
  2017-02-07 15:30 ` [PATCH 01/19] " Mika Kuoppala
  19 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

We never assign or use the ppgtt->enable() callback, so remove it.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 5af5a5d53d93..caaa2c5c23be 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -382,7 +382,6 @@ struct i915_hw_ppgtt {
 
 	gen6_pte_t __iomem *pd_addr;
 
-	int (*enable)(struct i915_hw_ppgtt *ppgtt);
 	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
 			 struct drm_i915_gem_request *req);
 	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 46+ messages in thread

* Re: [PATCH 08/19] drm/i915: Remove user-triggerable WARN for large objects
  2017-02-02 15:02 ` [PATCH 08/19] drm/i915: Remove user-triggerable WARN for large objects Chris Wilson
@ 2017-02-02 15:07   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-02 15:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Very large objects are expected, so don't WARN the user if they are
> using them!
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Let's just go with yours then :)

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-02 15:02 ` [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Chris Wilson
@ 2017-02-02 15:32   ` Chris Wilson
  2017-02-02 15:57     ` Tvrtko Ursulin
  0 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 15:32 UTC (permalink / raw)
  To: intel-gfx; +Cc: mika.kuoppala

On Thu, Feb 02, 2017 at 03:02:32PM +0000, Chris Wilson wrote:
> Improve the sg iteration and in hte process eliminate a bug in
> miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
> full length of the sg table.
> 

which fixes a corner case of 0c40ce130e38
Fixes: 0c40ce130e38 ("drm/i915: Trim the object sg table")
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-02 15:32   ` Chris Wilson
@ 2017-02-02 15:57     ` Tvrtko Ursulin
  2017-02-02 16:10       ` Chris Wilson
  0 siblings, 1 reply; 46+ messages in thread
From: Tvrtko Ursulin @ 2017-02-02 15:57 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, michal.winiarski, michel.thierry,
	joonas.lahtinen, mika.kuoppala


On 02/02/2017 15:32, Chris Wilson wrote:
> On Thu, Feb 02, 2017 at 03:02:32PM +0000, Chris Wilson wrote:
>> Improve the sg iteration and in hte process eliminate a bug in
>> miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
>> full length of the sg table.
>>
>
> which fixes a corner case of 0c40ce130e38
> Fixes: 0c40ce130e38 ("drm/i915: Trim the object sg table")

What do you mean? oring_nents is definitely the full length of the sg 
table, especially after i915_sg_trim. Before it orig_nents was often 
larger than the real length of the sg table.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-02 15:57     ` Tvrtko Ursulin
@ 2017-02-02 16:10       ` Chris Wilson
  2017-02-02 16:39         ` Tvrtko Ursulin
  0 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 16:10 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, mika.kuoppala

On Thu, Feb 02, 2017 at 03:57:43PM +0000, Tvrtko Ursulin wrote:
> 
> On 02/02/2017 15:32, Chris Wilson wrote:
> >On Thu, Feb 02, 2017 at 03:02:32PM +0000, Chris Wilson wrote:
> >>Improve the sg iteration and in hte process eliminate a bug in
> >>miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
> >>full length of the sg table.
> >>
> >
> >which fixes a corner case of 0c40ce130e38
> >Fixes: 0c40ce130e38 ("drm/i915: Trim the object sg table")
> 
> What do you mean? oring_nents is definitely the full length of the
> sg table, especially after i915_sg_trim. Before it orig_nents was
> often larger than the real length of the sg table.

The code is using orig_nents as obj->base.size/vma->size (a page count,
not the sg count), if I read it correctly as it is computing the address
range.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-02 16:10       ` Chris Wilson
@ 2017-02-02 16:39         ` Tvrtko Ursulin
  2017-02-02 17:05           ` Chris Wilson
  0 siblings, 1 reply; 46+ messages in thread
From: Tvrtko Ursulin @ 2017-02-02 16:39 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, michal.winiarski, michel.thierry,
	joonas.lahtinen, mika.kuoppala


On 02/02/2017 16:10, Chris Wilson wrote:
> On Thu, Feb 02, 2017 at 03:57:43PM +0000, Tvrtko Ursulin wrote:
>>
>> On 02/02/2017 15:32, Chris Wilson wrote:
>>> On Thu, Feb 02, 2017 at 03:02:32PM +0000, Chris Wilson wrote:
>>>> Improve the sg iteration and in hte process eliminate a bug in
>>>> miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
>>>> full length of the sg table.
>>>>
>>>
>>> which fixes a corner case of 0c40ce130e38
>>> Fixes: 0c40ce130e38 ("drm/i915: Trim the object sg table")
>>
>> What do you mean? oring_nents is definitely the full length of the
>> sg table, especially after i915_sg_trim. Before it orig_nents was
>> often larger than the real length of the sg table.
>
> The code is using orig_nents as obj->base.size/vma->size (a page count,
> not the sg count), if I read it correctly as it is computing the address
> range.

Oh right, I was misled by the commit message ("is not longer the full 
length of the sg table"). Nasty. That means it was broken for userptr 
objects as well.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-02 16:39         ` Tvrtko Ursulin
@ 2017-02-02 17:05           ` Chris Wilson
  2017-02-02 17:17             ` Tvrtko Ursulin
  0 siblings, 1 reply; 46+ messages in thread
From: Chris Wilson @ 2017-02-02 17:05 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, mika.kuoppala

On Thu, Feb 02, 2017 at 04:39:49PM +0000, Tvrtko Ursulin wrote:
> 
> On 02/02/2017 16:10, Chris Wilson wrote:
> >On Thu, Feb 02, 2017 at 03:57:43PM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 02/02/2017 15:32, Chris Wilson wrote:
> >>>On Thu, Feb 02, 2017 at 03:02:32PM +0000, Chris Wilson wrote:
> >>>>Improve the sg iteration and in hte process eliminate a bug in
> >>>>miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
> >>>>full length of the sg table.
> >>>>
> >>>
> >>>which fixes a corner case of 0c40ce130e38
> >>>Fixes: 0c40ce130e38 ("drm/i915: Trim the object sg table")
> >>
> >>What do you mean? oring_nents is definitely the full length of the
> >>sg table, especially after i915_sg_trim. Before it orig_nents was
> >>often larger than the real length of the sg table.
> >
> >The code is using orig_nents as obj->base.size/vma->size (a page count,
> >not the sg count), if I read it correctly as it is computing the address
> >range.
> 
> Oh right, I was misled by the commit message ("is not longer the
> full length of the sg table"). Nasty. That means it was broken for
> userptr objects as well.

And partial, and... pretty everything at some point.

Hmm, maybe it is the recent spat of NULL deref for gen8 ppgtt. I'd been
assuming that they had a similar unknown cause to the gen6 which have
been around for yonks.

Tart up the commit message and this could 4.10-rc material.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-02 17:05           ` Chris Wilson
@ 2017-02-02 17:17             ` Tvrtko Ursulin
  2017-02-03  8:34               ` Chris Wilson
  0 siblings, 1 reply; 46+ messages in thread
From: Tvrtko Ursulin @ 2017-02-02 17:17 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, michal.winiarski, michel.thierry,
	joonas.lahtinen, mika.kuoppala


On 02/02/2017 17:05, Chris Wilson wrote:
> On Thu, Feb 02, 2017 at 04:39:49PM +0000, Tvrtko Ursulin wrote:
>>
>> On 02/02/2017 16:10, Chris Wilson wrote:
>>> On Thu, Feb 02, 2017 at 03:57:43PM +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 02/02/2017 15:32, Chris Wilson wrote:
>>>>> On Thu, Feb 02, 2017 at 03:02:32PM +0000, Chris Wilson wrote:
>>>>>> Improve the sg iteration and in hte process eliminate a bug in
>>>>>> miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
>>>>>> full length of the sg table.
>>>>>>
>>>>>
>>>>> which fixes a corner case of 0c40ce130e38
>>>>> Fixes: 0c40ce130e38 ("drm/i915: Trim the object sg table")
>>>>
>>>> What do you mean? oring_nents is definitely the full length of the
>>>> sg table, especially after i915_sg_trim. Before it orig_nents was
>>>> often larger than the real length of the sg table.
>>>
>>> The code is using orig_nents as obj->base.size/vma->size (a page count,
>>> not the sg count), if I read it correctly as it is computing the address
>>> range.
>>
>> Oh right, I was misled by the commit message ("is not longer the
>> full length of the sg table"). Nasty. That means it was broken for
>> userptr objects as well.
>
> And partial, and... pretty everything at some point.

Partials won't be in ppgtt. Neither the stolen ones, and internal ones 
are only gen7 right?

> Hmm, maybe it is the recent spat of NULL deref for gen8 ppgtt. I'd been
> assuming that they had a similar unknown cause to the gen6 which have
> been around for yonks.
>
> Tart up the commit message and this could 4.10-rc material.

Yeah, but how this actually manifests? It should be blowing up left 
right and center because sg trim in my testing managed to trim 
impressively. I don't get it. I'll leave the thinking for tomorrow.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages()
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (17 preceding siblings ...)
  2017-02-02 15:02 ` [PATCH 19/19] drm/i915: Remove unused ppgtt->enable() Chris Wilson
@ 2017-02-02 17:54 ` Patchwork
  2017-02-07 15:30 ` [PATCH 01/19] " Mika Kuoppala
  19 siblings, 0 replies; 46+ messages in thread
From: Patchwork @ 2017-02-02 17:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages()
URL   : https://patchwork.freedesktop.org/series/18997/
State : success

== Summary ==

Series 18997v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/18997/revisions/1/mbox/


fi-bdw-5557u     total:247  pass:233  dwarn:0   dfail:0   fail:0   skip:14 
fi-bsw-n3050     total:247  pass:208  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:247  pass:225  dwarn:0   dfail:0   fail:0   skip:22 
fi-bxt-t5700     total:78   pass:65   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:247  pass:220  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:247  pass:216  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:247  pass:228  dwarn:0   dfail:0   fail:0   skip:19 
fi-hsw-4770r     total:247  pass:228  dwarn:0   dfail:0   fail:0   skip:19 
fi-ivb-3520m     total:247  pass:226  dwarn:0   dfail:0   fail:0   skip:21 
fi-ivb-3770      total:247  pass:226  dwarn:0   dfail:0   fail:0   skip:21 
fi-kbl-7500u     total:247  pass:224  dwarn:0   dfail:0   fail:2   skip:21 
fi-skl-6260u     total:247  pass:234  dwarn:0   dfail:0   fail:0   skip:13 
fi-skl-6700hq    total:247  pass:227  dwarn:0   dfail:0   fail:0   skip:20 
fi-skl-6700k     total:247  pass:222  dwarn:4   dfail:0   fail:0   skip:21 
fi-skl-6770hq    total:247  pass:234  dwarn:0   dfail:0   fail:0   skip:13 
fi-snb-2520m     total:247  pass:216  dwarn:0   dfail:0   fail:0   skip:31 
fi-snb-2600      total:247  pass:215  dwarn:0   dfail:0   fail:0   skip:32 

11190c4421e0076a9f37205b0d53b2b3f2733711 drm-tip: 2017y-02m-02d-15h-01m-15s UTC integration manifest
0d8618c drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
98f31b3 drm/i915: Micro-optimise gen6_ppgtt_insert_entries()
2724b29 drm/i915: Micro-optimise i915_get_ggtt_vma_pages()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3680/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-02 17:17             ` Tvrtko Ursulin
@ 2017-02-03  8:34               ` Chris Wilson
  0 siblings, 0 replies; 46+ messages in thread
From: Chris Wilson @ 2017-02-03  8:34 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, mika.kuoppala

On Thu, Feb 02, 2017 at 05:17:57PM +0000, Tvrtko Ursulin wrote:
> 
> On 02/02/2017 17:05, Chris Wilson wrote:
> >On Thu, Feb 02, 2017 at 04:39:49PM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 02/02/2017 16:10, Chris Wilson wrote:
> >>>On Thu, Feb 02, 2017 at 03:57:43PM +0000, Tvrtko Ursulin wrote:
> >>>>
> >>>>On 02/02/2017 15:32, Chris Wilson wrote:
> >>>>>On Thu, Feb 02, 2017 at 03:02:32PM +0000, Chris Wilson wrote:
> >>>>>>Improve the sg iteration and in hte process eliminate a bug in
> >>>>>>miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
> >>>>>>full length of the sg table.
> >>>>>>
> >>>>>
> >>>>>which fixes a corner case of 0c40ce130e38
> >>>>>Fixes: 0c40ce130e38 ("drm/i915: Trim the object sg table")
> >>>>
> >>>>What do you mean? oring_nents is definitely the full length of the
> >>>>sg table, especially after i915_sg_trim. Before it orig_nents was
> >>>>often larger than the real length of the sg table.
> >>>
> >>>The code is using orig_nents as obj->base.size/vma->size (a page count,
> >>>not the sg count), if I read it correctly as it is computing the address
> >>>range.
> >>
> >>Oh right, I was misled by the commit message ("is not longer the
> >>full length of the sg table"). Nasty. That means it was broken for
> >>userptr objects as well.
> >
> >And partial, and... pretty everything at some point.
> 
> Partials won't be in ppgtt. Neither the stolen ones, and internal
> ones are only gen7 right?

Yup. So far it looks like just get_pages_gtt and get_pages_userptr,
get_pages_dmabuf would be susceptible.

> >Hmm, maybe it is the recent spat of NULL deref for gen8 ppgtt. I'd been
> >assuming that they had a similar unknown cause to the gen6 which have
> >been around for yonks.
> >
> >Tart up the commit message and this could 4.10-rc material.
> 
> Yeah, but how this actually manifests? It should be blowing up left
> right and center because sg trim in my testing managed to trim
> impressively. I don't get it. I'll leave the thinking for tomorrow.

Hmm. I was thinking that we didn't allocate our bookkeeping for the full
range and so would hit find a NULL in the tree - but we do that
allocation pass in allocate_va_range() beforehand, and that has the full
node size. So it would appear that we might stop early (leaving scratch
pages for the GPU to read) but I can't see an oops.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 19/19] drm/i915: Remove unused ppgtt->enable()
  2017-02-02 15:02 ` [PATCH 19/19] drm/i915: Remove unused ppgtt->enable() Chris Wilson
@ 2017-02-03 13:04   ` Joonas Lahtinen
  0 siblings, 0 replies; 46+ messages in thread
From: Joonas Lahtinen @ 2017-02-03 13:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: mika.kuoppala

On to, 2017-02-02 at 15:02 +0000, Chris Wilson wrote:
> We never assign or use the ppgtt->enable() callback, so remove it.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 04/19] drm/i915: Don't special case teardown of aliasing_ppgtt
  2017-02-02 15:02 ` [PATCH 04/19] drm/i915: Don't special case teardown of aliasing_ppgtt Chris Wilson
@ 2017-02-06 14:21   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-06 14:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> The aliasing_ppgtt is a regular ppgtt, and we can use the regular
> i915_ppgtt_put() to properly tear it down.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 05/19] drm/i915: Split ggtt/alasing_gtt unbind_vma
  2017-02-02 15:02 ` [PATCH 05/19] drm/i915: Split ggtt/alasing_gtt unbind_vma Chris Wilson
@ 2017-02-06 15:07   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-06 15:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Similar to how we already split the bind_vma for ggtt/aliasing_gtt, also
> split up the unbind for symmetry.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 12/19] drm/i915: Remove bitmap tracking for used-ptes
  2017-02-02 15:02 ` [PATCH 12/19] drm/i915: Remove bitmap tracking for used-ptes Chris Wilson
@ 2017-02-06 20:32   ` Michał Winiarski
  0 siblings, 0 replies; 46+ messages in thread
From: Michał Winiarski @ 2017-02-06 20:32 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, mika.kuoppala

On Thu, Feb 02, 2017 at 03:02:41PM +0000, Chris Wilson wrote:
> We only operate on known extents (both for alloc/clear) and so we can use
> both the knowledge of the bind/unbind range along with the knowledge of
> the existing pagetable to avoid having to allocate temporary and
> auxiliary bitmaps.

We're losing the ability to effectively track the entries though (as in we can
no longer do 'for each !empty entry do stuff' without iterating through the
whole table), which is why we need to drop the info from the tracepoint.

It would be nice if we could find that in the commit message. Maybe with a
different wording :)

Either way:
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>

-Michał
 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 338 ++++++++++++------------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |   5 +-
>  drivers/gpu/drm/i915/i915_trace.h   |  19 +-
>  3 files changed, 121 insertions(+), 241 deletions(-)
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 09/19] drm/i915: Move allocate_va_range to GTT
  2017-02-02 15:02 ` [PATCH 09/19] drm/i915: Move allocate_va_range to GTT Chris Wilson
@ 2017-02-07 10:01   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-07 10:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> In the future, we need to call allocate_va_range on the aliasing-ppgtt
> which means moving the call down from the vma into the vm (which is
> more appropriate for calling the vm function).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 10/19] drm/i915: Remove redundant clear of appgtt
  2017-02-02 15:02 ` [PATCH 10/19] drm/i915: Remove redundant clear of appgtt Chris Wilson
@ 2017-02-07 10:06   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-07 10:06 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Upon creation of the va range, it is initialised to point at scratch.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 11/19] drm/i915: Tidy gen6_write_pde()
  2017-02-02 15:02 ` [PATCH 11/19] drm/i915: Tidy gen6_write_pde() Chris Wilson
@ 2017-02-07 10:18   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-07 10:18 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 34 ++++++++++++----------------------
>  1 file changed, 12 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 991a15efe478..66c3540eb387 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1699,36 +1699,29 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  }
>
>  /* Write pde (index) from the page directory @pd to the page table @pt */
> -static void gen6_write_pde(struct i915_page_directory *pd,
> -                           const int pde, struct i915_page_table *pt)
> +static void gen6_write_pde(struct i915_hw_ppgtt *ppgtt,
> +                          const int pde, struct i915_page_table *pt)
Why doesn't this just take an unsigned int...

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages()
  2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (18 preceding siblings ...)
  2017-02-02 17:54 ` ✓ Fi.CI.BAT: success for series starting with [01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Patchwork
@ 2017-02-07 15:30 ` Mika Kuoppala
  19 siblings, 0 replies; 46+ messages in thread
From: Mika Kuoppala @ 2017-02-07 15:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The predominant VMA class is normal GTT, so allow gcc to emphasize that
> path and avoid unnecessary stack movement.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 61 +++++++++++++++++++------------------
>  1 file changed, 32 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ec360ab939b8..f8cef51cf24c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2615,14 +2615,16 @@ static int ggtt_bind_vma(struct i915_vma *vma,
>  {
>  	struct drm_i915_private *i915 = vma->vm->i915;
>  	struct drm_i915_gem_object *obj = vma->obj;
> -	u32 pte_flags = 0;
> -	int ret;
> +	u32 pte_flags;
>  
> -	ret = i915_get_ggtt_vma_pages(vma);
> -	if (ret)
> -		return ret;
> +	if (unlikely(!vma->pages)) {
> +		int ret = i915_get_ggtt_vma_pages(vma);
> +		if (ret)
> +			return ret;
> +	}
>  
>  	/* Currently applicable only to VLV */
> +	pte_flags = 0;
>  	if (obj->gt_ro)
>  		pte_flags |= PTE_READ_ONLY;
>  
> @@ -2647,18 +2649,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>  {
>  	struct drm_i915_private *i915 = vma->vm->i915;
>  	u32 pte_flags;
> -	int ret;
>  
> -	ret = i915_get_ggtt_vma_pages(vma);
> -	if (ret)
> -		return ret;
> +	if (unlikely(!vma->pages)) {
> +		int ret = i915_get_ggtt_vma_pages(vma);
> +		if (ret)
> +			return ret;
> +	}
>  
>  	/* Currently applicable only to VLV */
>  	pte_flags = 0;
>  	if (vma->obj->gt_ro)
>  		pte_flags |= PTE_READ_ONLY;
>  
> -
>  	if (flags & I915_VMA_GLOBAL_BIND) {
>  		intel_runtime_pm_get(i915);
>  		vma->vm->insert_entries(vma->vm,
> @@ -3397,9 +3399,9 @@ rotate_pages(const dma_addr_t *in, unsigned int offset,
>  	return sg;
>  }
>  
> -static struct sg_table *
> -intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
> -			  struct drm_i915_gem_object *obj)
> +static noinline struct sg_table *
> +intel_rotate_pages(struct intel_rotation_info *rot_info,
> +		   struct drm_i915_gem_object *obj)
>  {
>  	const size_t n_pages = obj->base.size / PAGE_SIZE;
>  	unsigned int size = intel_rotation_info_size(rot_info);
> @@ -3460,7 +3462,7 @@ intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
>  	return ERR_PTR(ret);
>  }
>  
> -static struct sg_table *
> +static noinline struct sg_table *
>  intel_partial_pages(const struct i915_ggtt_view *view,
>  		    struct drm_i915_gem_object *obj)
>  {
> @@ -3514,7 +3516,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
>  static int
>  i915_get_ggtt_vma_pages(struct i915_vma *vma)
>  {
> -	int ret = 0;
> +	int ret;
>  
>  	/* The vma->pages are only valid within the lifespan of the borrowed
>  	 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
> @@ -3523,32 +3525,33 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
>  	 */
>  	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
>  
> -	if (vma->pages)
> +	switch (vma->ggtt_view.type) {
> +	case I915_GGTT_VIEW_NORMAL:
> +		vma->pages = vma->obj->mm.pages;
>  		return 0;
>  
> -	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
> -		vma->pages = vma->obj->mm.pages;
> -	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
> +	case I915_GGTT_VIEW_ROTATED:
>  		vma->pages =
> -			intel_rotate_fb_obj_pages(&vma->ggtt_view.rotated,
> -						  vma->obj);
> -	else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
> +			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
> +		break;
> +
> +	case I915_GGTT_VIEW_PARTIAL:
>  		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
> -	else
> +		break;
> +
> +	default:
>  		WARN_ONCE(1, "GGTT view %u not implemented!\n",
>  			  vma->ggtt_view.type);
> +		return -EINVAL;
> +	}
>  
> -	if (!vma->pages) {
> -		DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
> -			  vma->ggtt_view.type);
> -		ret = -EINVAL;
> -	} else if (IS_ERR(vma->pages)) {
> +	ret = 0;
> +	if (unlikely(IS_ERR(vma->pages))) {
>  		ret = PTR_ERR(vma->pages);
>  		vma->pages = NULL;
>  		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
>  			  vma->ggtt_view.type, ret);
>  	}
> -
>  	return ret;
>  }
>  
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 13/19] drm/i915: Remove bitmap tracking for used-pdes
  2017-02-02 15:02 ` [PATCH 13/19] drm/i915: Remove bitmap tracking for used-pdes Chris Wilson
@ 2017-02-08 16:30   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-08 16:30 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> We only operate on known extents (both for alloc/clear) and so we can use
> both the knowledge of the bind/unbind range along with the knowledge of
> the existing pagetable to avoid having to allocate temporary and
> auxiliary bitmaps.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 14/19] drm/i915: Remove bitmap tracking for used-pdpes
  2017-02-02 15:02 ` [PATCH 14/19] drm/i915: Remove bitmap tracking for used-pdpes Chris Wilson
@ 2017-02-08 17:42   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-08 17:42 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> We only operate on known extents (both for alloc/clear) and so we can use
> both the knowledge of the bind/unbind range along with the knowledge of
> the existing pagetable to avoid having to allocate temporary and
> auxiliary bitmaps.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 275 +++++++++++-------------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |   3 +-
>  2 files changed, 84 insertions(+), 194 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 15e95904931f..99319461f86c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -526,24 +526,13 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
>  static int __pdp_init(struct i915_address_space *vm,
>                       struct i915_page_directory_pointer *pdp)
>  {
> -       size_t pdpes = I915_PDPES_PER_PDP(vm->i915);
> -       int i;
> -
> -       pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
> -                                 sizeof(unsigned long),
> -                                 GFP_KERNEL);
> -       if (!pdp->used_pdpes)
> -               return -ENOMEM;
> +       const unsigned int pdpes = I915_PDPES_PER_PDP(vm->i915);
> +       unsigned int i;
>
>         pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
> -                                           GFP_KERNEL);
> -       if (!pdp->page_directory) {
> -               kfree(pdp->used_pdpes);
> -               /* the PDP might be the statically allocated top level. Keep it
> -                * as clean as possible */
> -               pdp->used_pdpes = NULL;
> +                                           GFP_KERNEL | __GFP_NOWARN);
> +       if (unlikely(!pdp->page_directory))
>                 return -ENOMEM;
> -       }
>
>         for (i = 0; i < pdpes; i++)
>                 pdp->page_directory[i] = vm->scratch_pd;
> @@ -553,7 +542,6 @@ static int __pdp_init(struct i915_address_space *vm,
>
>  static void __pdp_fini(struct i915_page_directory_pointer *pdp)
>  {
> -       kfree(pdp->used_pdpes);
>         kfree(pdp->page_directory);
>         pdp->page_directory = NULL;
>  }
> @@ -611,23 +599,12 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
>  static void gen8_initialize_pml4(struct i915_address_space *vm,
>                                  struct i915_pml4 *pml4)
>  {
> -       gen8_ppgtt_pml4e_t scratch_pml4e;
> -
> -       scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
> -                                         I915_CACHE_LLC);
> -
> -       fill_px(vm, pml4, scratch_pml4e);
> -}
> -
> -static void
> -gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
> -                    struct i915_page_directory_pointer *pdp,
> -                    int index)
> -{
> -       gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4);
> +       unsigned int i;
>
> -       pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
> -       kunmap_atomic(pagemap);
> +       fill_px(vm, pml4,
> +               gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
> +       for (i = 0; i < GEN8_PML4ES_PER_PML4; i++)
> +               pml4->pdps[i] = vm->scratch_pdp;
>  }
>
>  /* Broadwell Page Directory Pointer Descriptors */
> @@ -781,15 +758,12 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>                         continue;
>
>                 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
> -               __clear_bit(pdpe, pdp->used_pdpes);
> +               pdp->used_pdpes--;
>
>                 free_pd(vm, pd);
>         }
>
> -       if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
> -               return true;
> -
> -       return false;
> +       return !pdp->used_pdpes;
>  }
>
>  static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
> @@ -798,6 +772,19 @@ static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
>         gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
>  }
>
> +static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
> +                                struct i915_page_directory_pointer *pdp,
> +                                unsigned int pml4e)
> +{
> +       gen8_ppgtt_pml4e_t *vaddr;
> +
> +       pml4->pdps[pml4e] = pdp;
> +
> +       vaddr = kmap_atomic_px(pml4);
> +       vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
> +       kunmap_atomic(vaddr);
> +}
> +
>  /* Removes entries from a single pml4.
>   * This is the top-level structure in 4-level page tables used on gen8+.
>   * Empty entries are always scratch pml4e.
> @@ -808,19 +795,18 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
>         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>         struct i915_pml4 *pml4 = &ppgtt->pml4;
>         struct i915_page_directory_pointer *pdp;
> -       uint64_t pml4e;
> +       unsigned int pml4e;
>
>         GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
>
>         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -               if (WARN_ON(!pml4->pdps[pml4e]))
> -                       break;
> +               if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
> +                       continue;
>
> -               if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
> -                       __clear_bit(pml4e, pml4->used_pml4es);
> -                       gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
> -                       free_pdp(vm, pdp);
> -               }
> +               gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
> +               __clear_bit(pml4e, pml4->used_pml4es);
> +
> +               free_pdp(vm, pdp);
>         }
>  }
>
> @@ -1017,7 +1003,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>  {
>         int i;
>
> -       for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
> +       for (i = 0; i < I915_PDPES_PER_PDP(vm->i915); i++) {
>                 if (pdp->page_directory[i] == vm->scratch_pd)
>                         continue;
>
> @@ -1088,65 +1074,6 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  }
>
>  /**
> - * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
> - * @vm:        Master vm structure.
> - * @pdp:       Page directory pointer for this address range.
> - * @start:     Starting virtual address to begin allocations.
> - * @length:    Size of the allocations.
> - * @new_pds:   Bitmap set by function with new allocations. Likely used by the
> - *             caller to free on error.
> - *
> - * Allocate the required number of page directories starting at the pde index of
> - * @start, and ending at the pde index @start + @length. This function will skip
> - * over already allocated page directories within the range, and only allocate
> - * new ones, setting the appropriate pointer within the pdp as well as the
> - * correct position in the bitmap @new_pds.
> - *
> - * The function will only allocate the pages within the range for a give page
> - * directory pointer. In other words, if @start + @length straddles a virtually
> - * addressed PDP boundary (512GB for 4k pages), there will be more allocations
> - * required by the caller, This is not currently possible, and the BUG in the
> - * code will prevent it.
> - *
> - * Return: 0 if success; negative error code otherwise.
> - */
> -static int
> -gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
> -                                 struct i915_page_directory_pointer *pdp,
> -                                 uint64_t start,
> -                                 uint64_t length,
> -                                 unsigned long *new_pds)
> -{
> -       struct i915_page_directory *pd;
> -       uint32_t pdpe;
> -       uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
> -
> -       WARN_ON(!bitmap_empty(new_pds, pdpes));
> -
> -       gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -               if (test_bit(pdpe, pdp->used_pdpes))
> -                       continue;
> -
> -               pd = alloc_pd(vm);
> -               if (IS_ERR(pd))
> -                       goto unwind_out;
> -
> -               gen8_initialize_pd(vm, pd);
> -               pdp->page_directory[pdpe] = pd;
> -               __set_bit(pdpe, new_pds);
> -               trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
> -       }
> -
> -       return 0;
> -
> -unwind_out:
> -       for_each_set_bit(pdpe, new_pds, pdpes)
> -               free_pd(vm, pdp->page_directory[pdpe]);
> -
> -       return -ENOMEM;
> -}
> -
> -/**
>   * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
>   * @vm:        Master vm structure.
>   * @pml4:      Page map level 4 for this address range.
> @@ -1166,23 +1093,19 @@ static int
>  gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>                                   struct i915_pml4 *pml4,
>                                   uint64_t start,
> -                                 uint64_t length,
> -                                 unsigned long *new_pdps)
> +                                 uint64_t length)
>  {
>         struct i915_page_directory_pointer *pdp;
>         uint32_t pml4e;
>
> -       WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
> -
>         gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
>                 if (!test_bit(pml4e, pml4->used_pml4es)) {
>                         pdp = alloc_pdp(vm);
>                         if (IS_ERR(pdp))
> -                               goto unwind_out;
> +                               return PTR_ERR(pdp);
>
>                         gen8_initialize_pdp(vm, pdp);
>                         pml4->pdps[pml4e] = pdp;
> -                       __set_bit(pml4e, new_pdps);
>                         trace_i915_page_directory_pointer_entry_alloc(vm,
>                                                                       pml4e,
>                                                                       start,
> @@ -1191,34 +1114,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>         }
>
>         return 0;
> -
> -unwind_out:
> -       for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
> -               free_pdp(vm, pml4->pdps[pml4e]);
> -
> -       return -ENOMEM;
> -}
> -
> -static void
> -free_gen8_temp_bitmaps(unsigned long *new_pds)
> -{
> -       kfree(new_pds);
> -}
> -
> -/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
> - * of these are based on the number of PDPEs in the system.
> - */
> -static int __must_check
> -alloc_gen8_temp_bitmaps(unsigned long **new_pds, uint32_t pdpes)
> -{
> -       unsigned long *pds;
> -
> -       pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
> -       if (!pds)
> -               return -ENOMEM;
> -
> -       *new_pds = pds;
> -       return 0;
>  }
>
>  static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
> @@ -1227,47 +1122,37 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>                                     uint64_t length)
>  {
>         struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -       unsigned long *new_page_dirs;
>         struct i915_page_directory *pd;
> -       uint32_t pdpe;
> -       uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
> +       u64 from = start;
> +       unsigned int pdpe;
>         int ret;
>
> -       ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
> -       if (ret)
> -               return ret;
> -
> -       /* Do the allocations first so we can easily bail out */
> -       ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
> -                                               new_page_dirs);
> -       if (ret) {
> -               free_gen8_temp_bitmaps(new_page_dirs);
> -               return ret;
> -       }
> -
> -       /* For every page directory referenced, allocate page tables */
>         gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -               ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
> -               if (ret)
> -                       goto err_out;
> +               if (pd == vm->scratch_pd) {
> +                       pd = alloc_pd(vm);
> +                       if (IS_ERR(pd))
> +                               goto unwind;
>
> -               if (test_and_set_bit(pdpe, pdp->used_pdpes))
> +                       gen8_initialize_pd(vm, pd);
>                         gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
> +                       pdp->used_pdpes++;
> +               }
> +
> +               ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
> +               if (unlikely(ret)) {
> +                       gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
> +                       pdp->used_pdpes--;
> +                       free_pd(vm, pd);
> +                       goto unwind;
> +               }
>         }
>
> -       /* Allocations have completed successfully, so set the bitmaps, and do
> -        * the mappings. */
> -       free_gen8_temp_bitmaps(new_page_dirs);
>         mark_tlbs_dirty(ppgtt);
>         return 0;
>
> -err_out:
> -       for_each_set_bit(pdpe, new_page_dirs, pdpes)
> -               free_pd(vm, pdp->page_directory[pdpe]);
> -
> -       free_gen8_temp_bitmaps(new_page_dirs);
> -       mark_tlbs_dirty(ppgtt);
> -       return ret;
> +unwind:
> +       gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
> +       return -ENOMEM;
>  }
>
>  static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
> @@ -1287,8 +1172,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
>         /* The pagedirectory and pagetable allocations are done in the shared 3
>          * and 4 level code. Just allocate the pdps.
>          */
> -       ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
> -                                               new_pdps);
> +       ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length);
>         if (ret)
>                 return ret;
>
> @@ -1340,7 +1224,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
>                 uint64_t pd_start = start;
>                 uint32_t pde;
>
> -               if (!test_bit(pdpe, pdp->used_pdpes))
> +               if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd)
>                         continue;
>
>                 seq_printf(m, "\tPDPE #%d\n", pdpe);
> @@ -1407,29 +1291,34 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>
>  static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
>  {
s/gen8_preallocate_top_level_pdps/gen8_preallocate_top_level_pdp/ ?

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 15/19] drm/i915: Remove bitmap tracking for used-pml4
  2017-02-02 15:02 ` [PATCH 15/19] drm/i915: Remove bitmap tracking for used-pml4 Chris Wilson
@ 2017-02-08 17:47   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-08 17:47 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> We only operate on known extents (both for alloc/clear) and so we can use
> both the knowledge of the bind/unbind range along with the knowledge of
> the existing pagetable to avoid having to allocate temporary and
> auxiliary bitmaps.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 16/19] drm/i915: Remove superfluous posting reads after clear GGTT
  2017-02-02 15:02 ` [PATCH 16/19] drm/i915: Remove superfluous posting reads after clear GGTT Chris Wilson
@ 2017-02-08 17:48   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-08 17:48 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> The barrier here is not required - we apply the barrier before the range
> is ever reused by the GPU instead.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 17/19] drm/i915: Always mark the PDP as dirty when altered
  2017-02-02 15:02 ` [PATCH 17/19] drm/i915: Always mark the PDP as dirty when altered Chris Wilson
@ 2017-02-08 17:53   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-08 17:53 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> We want to reload the PDP (and flush the TLB) when the addresses are
> changed.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 18/19] drm/i915: Remove defunct GTT tracepoints
  2017-02-02 15:02 ` [PATCH 18/19] drm/i915: Remove defunct GTT tracepoints Chris Wilson
@ 2017-02-08 18:01   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-08 18:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> The tracepoints are now entirely synonymous with binding and unbinding the
> VMA (and the tracepoints there).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 02/19] drm/i915: Micro-optimise gen6_ppgtt_insert_entries()
  2017-02-02 15:02 ` [PATCH 02/19] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Chris Wilson
@ 2017-02-09 11:34   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-09 11:34 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Inline the address computation to avoid the vfunc call for every page.
> We still have to pay the high overhead of sg_page_iter_next(), but now
> at least GCC can optimise the inner most loop, giving a significant
> boost to some thrashing Unreal Engine workloads.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 06/19] drm/i915: Convert clflushed pagetables over to WC maps
  2017-02-02 15:02 ` [PATCH 06/19] drm/i915: Convert clflushed pagetables over to WC maps Chris Wilson
@ 2017-02-09 15:08   ` Mika Kuoppala
  2017-02-09 15:11     ` Mika Kuoppala
  0 siblings, 1 reply; 46+ messages in thread
From: Mika Kuoppala @ 2017-02-09 15:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We flush the entire page every time we update a few bytes, making the
> update of a page table many, many times slower than is required. If we
> create a WC map of the page for our updates, we can avoid the clflush
> but incur additional cost for creating the pagetable. We amoritize that
> cost by reusing page vmappings, and only changing the page protection in
> batches.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 329 ++++++++++++++++++------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |   5 +
>  2 files changed, 172 insertions(+), 162 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 45bab7b7b026..302aee193ce5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -340,43 +340,69 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
>  	return pte;
>  }
>  
> -static int __setup_page_dma(struct drm_i915_private *dev_priv,
> -			    struct i915_page_dma *p, gfp_t flags)
> +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
>  {
> -	struct device *kdev = &dev_priv->drm.pdev->dev;
> +	struct page *page;
>  
> -	p->page = alloc_page(flags);
> -	if (!p->page)
> -		return -ENOMEM;
> +	if (vm->free_pages.nr)
> +		return vm->free_pages.pages[--vm->free_pages.nr];
>  
> -	p->daddr = dma_map_page(kdev,
> -				p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
> +	page = alloc_page(gfp);
> +	if (!page)
> +		return NULL;
>  
> -	if (dma_mapping_error(kdev, p->daddr)) {
> -		__free_page(p->page);
> -		return -EINVAL;
> +	if (vm->pt_kmap_wc)
> +		set_pages_array_wc(&page, 1);
> +
> +	return page;
> +}
> +
> +static void vm_free_pages_release(struct i915_address_space *vm)
> +{
> +	GEM_BUG_ON(!pagevec_count(&vm->free_pages));
> +
> +	if (vm->pt_kmap_wc)
> +		set_pages_array_wb(vm->free_pages.pages,
> +				   pagevec_count(&vm->free_pages));
> +
> +	__pagevec_release(&vm->free_pages);
> +}
> +
> +static void vm_free_page(struct i915_address_space *vm, struct page *page)
> +{
> +	if (!pagevec_add(&vm->free_pages, page))
> +		vm_free_pages_release(vm);

What about the page you failed to push to vec?

For me it seems that you are missing a retry after
vec cleanup.

-Mika

> +}
> +
> +static int __setup_page_dma(struct i915_address_space *vm,
> +			    struct i915_page_dma *p,
> +			    gfp_t gfp)
> +{
> +	p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
> +	if (unlikely(!p->page))
> +		return -ENOMEM;
> +
> +	p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
> +				PCI_DMA_BIDIRECTIONAL);
> +	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
> +		vm_free_page(vm, p->page);
> +		return -ENOMEM;
>  	}
>  
>  	return 0;
>  }
>  
> -static int setup_page_dma(struct drm_i915_private *dev_priv,
> +static int setup_page_dma(struct i915_address_space *vm,
>  			  struct i915_page_dma *p)
>  {
> -	return __setup_page_dma(dev_priv, p, I915_GFP_DMA);
> +	return __setup_page_dma(vm, p, I915_GFP_DMA);
>  }
>  
> -static void cleanup_page_dma(struct drm_i915_private *dev_priv,
> +static void cleanup_page_dma(struct i915_address_space *vm,
>  			     struct i915_page_dma *p)
>  {
> -	struct pci_dev *pdev = dev_priv->drm.pdev;
> -
> -	if (WARN_ON(!p->page))
> -		return;
> -
> -	dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
> -	__free_page(p->page);
> -	memset(p, 0, sizeof(*p));
> +	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
> +	vm_free_page(vm, p->page);
>  }
>  
>  static void *kmap_page_dma(struct i915_page_dma *p)
> @@ -387,67 +413,54 @@ static void *kmap_page_dma(struct i915_page_dma *p)
>  /* We use the flushing unmap only with ppgtt structures:
>   * page directories, page tables and scratch pages.
>   */
> -static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr)
> +static void kunmap_page_dma(void *vaddr)
>  {
> -	/* There are only few exceptions for gen >=6. chv and bxt.
> -	 * And we are not sure about the latter so play safe for now.
> -	 */
> -	if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
> -		drm_clflush_virt_range(vaddr, PAGE_SIZE);
> -
>  	kunmap_atomic(vaddr);
>  }
>  
>  #define kmap_px(px) kmap_page_dma(px_base(px))
> -#define kunmap_px(ppgtt, vaddr) \
> -		kunmap_page_dma((ppgtt)->base.i915, (vaddr))
> +#define kunmap_px(vaddr) kunmap_page_dma((vaddr))
>  
> -#define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px))
> -#define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px))
> -#define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v))
> -#define fill32_px(dev_priv, px, v) \
> -		fill_page_dma_32((dev_priv), px_base(px), (v))
> +#define setup_px(vm, px) setup_page_dma((vm), px_base(px))
> +#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
> +#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
> +#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
>  
> -static void fill_page_dma(struct drm_i915_private *dev_priv,
> -			  struct i915_page_dma *p, const uint64_t val)
> +static void fill_page_dma(struct i915_address_space *vm,
> +			  struct i915_page_dma *p,
> +			  const u64 val)
>  {
> +	u64 * const vaddr = kmap_page_dma(p);
>  	int i;
> -	uint64_t * const vaddr = kmap_page_dma(p);
>  
>  	for (i = 0; i < 512; i++)
>  		vaddr[i] = val;
>  
> -	kunmap_page_dma(dev_priv, vaddr);
> +	kunmap_page_dma(vaddr);
>  }
>  
> -static void fill_page_dma_32(struct drm_i915_private *dev_priv,
> -			     struct i915_page_dma *p, const uint32_t val32)
> +static void fill_page_dma_32(struct i915_address_space *vm,
> +			     struct i915_page_dma *p,
> +			     const u32 v)
>  {
> -	uint64_t v = val32;
> -
> -	v = v << 32 | val32;
> -
> -	fill_page_dma(dev_priv, p, v);
> +	fill_page_dma(vm, p, (u64)v << 32 | v);
>  }
>  
>  static int
> -setup_scratch_page(struct drm_i915_private *dev_priv,
> -		   struct i915_page_dma *scratch,
> -		   gfp_t gfp)
> +setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
>  {
> -	return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO);
> +	return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO);
>  }
>  
> -static void cleanup_scratch_page(struct drm_i915_private *dev_priv,
> -				 struct i915_page_dma *scratch)
> +static void cleanup_scratch_page(struct i915_address_space *vm)
>  {
> -	cleanup_page_dma(dev_priv, scratch);
> +	cleanup_page_dma(vm, &vm->scratch_page);
>  }
>  
> -static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
> +static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
>  {
>  	struct i915_page_table *pt;
> -	const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES;
> +	const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES;
>  	int ret = -ENOMEM;
>  
>  	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
> @@ -460,7 +473,7 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
>  	if (!pt->used_ptes)
>  		goto fail_bitmap;
>  
> -	ret = setup_px(dev_priv, pt);
> +	ret = setup_px(vm, pt);
>  	if (ret)
>  		goto fail_page_m;
>  
> @@ -474,10 +487,9 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
>  	return ERR_PTR(ret);
>  }
>  
> -static void free_pt(struct drm_i915_private *dev_priv,
> -		    struct i915_page_table *pt)
> +static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
>  {
> -	cleanup_px(dev_priv, pt);
> +	cleanup_px(vm, pt);
>  	kfree(pt->used_ptes);
>  	kfree(pt);
>  }
> @@ -490,7 +502,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
>  	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
>  				      I915_CACHE_LLC);
>  
> -	fill_px(vm->i915, pt, scratch_pte);
> +	fill_px(vm, pt, scratch_pte);
>  }
>  
>  static void gen6_initialize_pt(struct i915_address_space *vm,
> @@ -503,10 +515,10 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
>  	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
>  				     I915_CACHE_LLC, 0);
>  
> -	fill32_px(vm->i915, pt, scratch_pte);
> +	fill32_px(vm, pt, scratch_pte);
>  }
>  
> -static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
> +static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
>  {
>  	struct i915_page_directory *pd;
>  	int ret = -ENOMEM;
> @@ -520,7 +532,7 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
>  	if (!pd->used_pdes)
>  		goto fail_bitmap;
>  
> -	ret = setup_px(dev_priv, pd);
> +	ret = setup_px(vm, pd);
>  	if (ret)
>  		goto fail_page_m;
>  
> @@ -534,11 +546,11 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
>  	return ERR_PTR(ret);
>  }
>  
> -static void free_pd(struct drm_i915_private *dev_priv,
> +static void free_pd(struct i915_address_space *vm,
>  		    struct i915_page_directory *pd)
>  {
>  	if (px_page(pd)) {
> -		cleanup_px(dev_priv, pd);
> +		cleanup_px(vm, pd);
>  		kfree(pd->used_pdes);
>  		kfree(pd);
>  	}
> @@ -551,7 +563,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
>  
>  	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
>  
> -	fill_px(vm->i915, pd, scratch_pde);
> +	fill_px(vm, pd, scratch_pde);
>  }
>  
>  static int __pdp_init(struct drm_i915_private *dev_priv,
> @@ -585,23 +597,23 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp)
>  	pdp->page_directory = NULL;
>  }
>  
> -static struct
> -i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv)
> +static struct i915_page_directory_pointer *
> +alloc_pdp(struct i915_address_space *vm)
>  {
>  	struct i915_page_directory_pointer *pdp;
>  	int ret = -ENOMEM;
>  
> -	WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv));
> +	WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
>  
>  	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
>  	if (!pdp)
>  		return ERR_PTR(-ENOMEM);
>  
> -	ret = __pdp_init(dev_priv, pdp);
> +	ret = __pdp_init(vm->i915, pdp);
>  	if (ret)
>  		goto fail_bitmap;
>  
> -	ret = setup_px(dev_priv, pdp);
> +	ret = setup_px(vm, pdp);
>  	if (ret)
>  		goto fail_page_m;
>  
> @@ -615,12 +627,12 @@ i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv)
>  	return ERR_PTR(ret);
>  }
>  
> -static void free_pdp(struct drm_i915_private *dev_priv,
> +static void free_pdp(struct i915_address_space *vm,
>  		     struct i915_page_directory_pointer *pdp)
>  {
>  	__pdp_fini(pdp);
> -	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
> -		cleanup_px(dev_priv, pdp);
> +	if (USES_FULL_48BIT_PPGTT(vm->i915)) {
> +		cleanup_px(vm, pdp);
>  		kfree(pdp);
>  	}
>  }
> @@ -632,7 +644,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
>  
>  	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
>  
> -	fill_px(vm->i915, pdp, scratch_pdpe);
> +	fill_px(vm, pdp, scratch_pdpe);
>  }
>  
>  static void gen8_initialize_pml4(struct i915_address_space *vm,
> @@ -643,7 +655,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
>  	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
>  					  I915_CACHE_LLC);
>  
> -	fill_px(vm->i915, pml4, scratch_pml4e);
> +	fill_px(vm, pml4, scratch_pml4e);
>  }
>  
>  static void
> @@ -659,20 +671,18 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt,
>  
>  	page_directorypo = kmap_px(pdp);
>  	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
> -	kunmap_px(ppgtt, page_directorypo);
> +	kunmap_px(page_directorypo);
>  }
>  
>  static void
> -gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt,
> -		 struct i915_pml4 *pml4,
> +gen8_setup_pml4e(struct i915_pml4 *pml4,
>  		 struct i915_page_directory_pointer *pdp,
>  		 int index)
>  {
>  	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
>  
> -	WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev)));
>  	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
> -	kunmap_px(ppgtt, pagemap);
> +	kunmap_px(pagemap);
>  }
>  
>  /* Broadwell Page Directory Pointer Descriptors */
> @@ -741,7 +751,6 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  				uint64_t start,
>  				uint64_t length)
>  {
> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	unsigned int num_entries = gen8_pte_count(start, length);
>  	unsigned int pte = gen8_pte_index(start);
>  	unsigned int pte_end = pte + num_entries;
> @@ -762,7 +771,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  	vaddr = kmap_px(pt);
>  	while (pte < pte_end)
>  		vaddr[pte++] = scratch_pte;
> -	kunmap_px(ppgtt, vaddr);
> +	kunmap_px(vaddr);
>  
>  	return false;
>  }
> @@ -775,7 +784,6 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  				uint64_t start,
>  				uint64_t length)
>  {
> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct i915_page_table *pt;
>  	uint64_t pde;
>  	gen8_pde_t *pde_vaddr;
> @@ -790,8 +798,8 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  			__clear_bit(pde, pd->used_pdes);
>  			pde_vaddr = kmap_px(pd);
>  			pde_vaddr[pde] = scratch_pde;
> -			kunmap_px(ppgtt, pde_vaddr);
> -			free_pt(vm->i915, pt);
> +			kunmap_px(pde_vaddr);
> +			free_pt(vm, pt);
>  		}
>  	}
>  
> @@ -820,7 +828,7 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>  		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
>  			__clear_bit(pdpe, pdp->used_pdpes);
>  			gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);
> -			free_pd(vm->i915, pd);
> +			free_pd(vm, pd);
>  		}
>  	}
>  
> @@ -841,7 +849,6 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
>  				  uint64_t start,
>  				  uint64_t length)
>  {
> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct i915_page_directory_pointer *pdp;
>  	uint64_t pml4e;
>  
> @@ -853,8 +860,8 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
>  
>  		if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
>  			__clear_bit(pml4e, pml4->used_pml4es);
> -			gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e);
> -			free_pdp(vm->i915, pdp);
> +			gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e);
> +			free_pdp(vm, pdp);
>  		}
>  	}
>  }
> @@ -912,12 +919,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
>  				pde = 0;
>  			}
>  
> -			kunmap_px(ppgtt, vaddr);
> +			kunmap_px(vaddr);
>  			vaddr = kmap_px(pd->page_table[pde]);
>  			pte = 0;
>  		}
>  	} while (1);
> -	kunmap_px(ppgtt, vaddr);
> +	kunmap_px(vaddr);
>  
>  	return ret;
>  }
> @@ -959,7 +966,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
>  		;
>  }
>  
> -static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
> +static void gen8_free_page_tables(struct i915_address_space *vm,
>  				  struct i915_page_directory *pd)
>  {
>  	int i;
> @@ -971,34 +978,33 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
>  		if (WARN_ON(!pd->page_table[i]))
>  			continue;
>  
> -		free_pt(dev_priv, pd->page_table[i]);
> +		free_pt(vm, pd->page_table[i]);
>  		pd->page_table[i] = NULL;
>  	}
>  }
>  
>  static int gen8_init_scratch(struct i915_address_space *vm)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	int ret;
>  
> -	ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
> +	ret = setup_scratch_page(vm, I915_GFP_DMA);
>  	if (ret)
>  		return ret;
>  
> -	vm->scratch_pt = alloc_pt(dev_priv);
> +	vm->scratch_pt = alloc_pt(vm);
>  	if (IS_ERR(vm->scratch_pt)) {
>  		ret = PTR_ERR(vm->scratch_pt);
>  		goto free_scratch_page;
>  	}
>  
> -	vm->scratch_pd = alloc_pd(dev_priv);
> +	vm->scratch_pd = alloc_pd(vm);
>  	if (IS_ERR(vm->scratch_pd)) {
>  		ret = PTR_ERR(vm->scratch_pd);
>  		goto free_pt;
>  	}
>  
> -	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
> -		vm->scratch_pdp = alloc_pdp(dev_priv);
> +	if (USES_FULL_48BIT_PPGTT(dev)) {
> +		vm->scratch_pdp = alloc_pdp(vm);
>  		if (IS_ERR(vm->scratch_pdp)) {
>  			ret = PTR_ERR(vm->scratch_pdp);
>  			goto free_pd;
> @@ -1013,11 +1019,11 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>  	return 0;
>  
>  free_pd:
> -	free_pd(dev_priv, vm->scratch_pd);
> +	free_pd(vm, vm->scratch_pd);
>  free_pt:
> -	free_pt(dev_priv, vm->scratch_pt);
> +	free_pt(vm, vm->scratch_pt);
>  free_scratch_page:
> -	cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +	cleanup_scratch_page(vm);
>  
>  	return ret;
>  }
> @@ -1055,44 +1061,41 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
>  
>  static void gen8_free_scratch(struct i915_address_space *vm)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
> -
> -	if (USES_FULL_48BIT_PPGTT(dev_priv))
> -		free_pdp(dev_priv, vm->scratch_pdp);
> -	free_pd(dev_priv, vm->scratch_pd);
> -	free_pt(dev_priv, vm->scratch_pt);
> -	cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +	if (USES_FULL_48BIT_PPGTT(vm->i915))
> +		free_pdp(vm, vm->scratch_pdp);
> +	free_pd(vm, vm->scratch_pd);
> +	free_pt(vm, vm->scratch_pt);
> +	cleanup_scratch_page(vm);
>  }
>  
> -static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv,
> +static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>  				    struct i915_page_directory_pointer *pdp)
>  {
>  	int i;
>  
> -	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) {
> +	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
>  		if (WARN_ON(!pdp->page_directory[i]))
>  			continue;
>  
> -		gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
> -		free_pd(dev_priv, pdp->page_directory[i]);
> +		gen8_free_page_tables(vm, pdp->page_directory[i]);
> +		free_pd(vm, pdp->page_directory[i]);
>  	}
>  
> -	free_pdp(dev_priv, pdp);
> +	free_pdp(vm, pdp);
>  }
>  
>  static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
>  {
> -	struct drm_i915_private *dev_priv = ppgtt->base.i915;
>  	int i;
>  
>  	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
>  		if (WARN_ON(!ppgtt->pml4.pdps[i]))
>  			continue;
>  
> -		gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]);
> +		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
>  	}
>  
> -	cleanup_px(dev_priv, &ppgtt->pml4);
> +	cleanup_px(&ppgtt->base, &ppgtt->pml4);
>  }
>  
>  static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
> @@ -1103,8 +1106,8 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
>  	if (intel_vgpu_active(dev_priv))
>  		gen8_ppgtt_notify_vgt(ppgtt, false);
>  
> -	if (!USES_FULL_48BIT_PPGTT(dev_priv))
> -		gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp);
> +	if (!USES_FULL_48BIT_PPGTT(vm->i915))
> +		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
>  	else
>  		gen8_ppgtt_cleanup_4lvl(ppgtt);
>  
> @@ -1135,7 +1138,6 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
>  				     uint64_t length,
>  				     unsigned long *new_pts)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_page_table *pt;
>  	uint32_t pde;
>  
> @@ -1147,7 +1149,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
>  			continue;
>  		}
>  
> -		pt = alloc_pt(dev_priv);
> +		pt = alloc_pt(vm);
>  		if (IS_ERR(pt))
>  			goto unwind_out;
>  
> @@ -1161,7 +1163,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
>  
>  unwind_out:
>  	for_each_set_bit(pde, new_pts, I915_PDES)
> -		free_pt(dev_priv, pd->page_table[pde]);
> +		free_pt(vm, pd->page_table[pde]);
>  
>  	return -ENOMEM;
>  }
> @@ -1196,7 +1198,6 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
>  				  uint64_t length,
>  				  unsigned long *new_pds)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_page_directory *pd;
>  	uint32_t pdpe;
>  	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
> @@ -1207,7 +1208,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
>  		if (test_bit(pdpe, pdp->used_pdpes))
>  			continue;
>  
> -		pd = alloc_pd(dev_priv);
> +		pd = alloc_pd(vm);
>  		if (IS_ERR(pd))
>  			goto unwind_out;
>  
> @@ -1221,7 +1222,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
>  
>  unwind_out:
>  	for_each_set_bit(pdpe, new_pds, pdpes)
> -		free_pd(dev_priv, pdp->page_directory[pdpe]);
> +		free_pd(vm, pdp->page_directory[pdpe]);
>  
>  	return -ENOMEM;
>  }
> @@ -1249,7 +1250,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>  				  uint64_t length,
>  				  unsigned long *new_pdps)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_page_directory_pointer *pdp;
>  	uint32_t pml4e;
>  
> @@ -1257,7 +1257,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>  
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
>  		if (!test_bit(pml4e, pml4->used_pml4es)) {
> -			pdp = alloc_pdp(dev_priv);
> +			pdp = alloc_pdp(vm);
>  			if (IS_ERR(pdp))
>  				goto unwind_out;
>  
> @@ -1275,7 +1275,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>  
>  unwind_out:
>  	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
> -		free_pdp(dev_priv, pml4->pdps[pml4e]);
> +		free_pdp(vm, pml4->pdps[pml4e]);
>  
>  	return -ENOMEM;
>  }
> @@ -1324,7 +1324,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	unsigned long *new_page_dirs, *new_page_tables;
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_page_directory *pd;
>  	const uint64_t orig_start = start;
>  	const uint64_t orig_length = length;
> @@ -1393,7 +1392,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>  			 * point we're still relying on insert_entries() */
>  		}
>  
> -		kunmap_px(ppgtt, page_directory);
> +		kunmap_px(page_directory);
>  		__set_bit(pdpe, pdp->used_pdpes);
>  		gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
>  	}
> @@ -1408,12 +1407,11 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>  
>  		for_each_set_bit(temp, new_page_tables + pdpe *
>  				BITS_TO_LONGS(I915_PDES), I915_PDES)
> -			free_pt(dev_priv,
> -				pdp->page_directory[pdpe]->page_table[temp]);
> +			free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]);
>  	}
>  
>  	for_each_set_bit(pdpe, new_page_dirs, pdpes)
> -		free_pd(dev_priv, pdp->page_directory[pdpe]);
> +		free_pd(vm, pdp->page_directory[pdpe]);
>  
>  	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
>  	mark_tlbs_dirty(ppgtt);
> @@ -1426,7 +1424,6 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
>  				    uint64_t length)
>  {
>  	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct i915_page_directory_pointer *pdp;
>  	uint64_t pml4e;
>  	int ret = 0;
> @@ -1454,7 +1451,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
>  		if (ret)
>  			goto err_out;
>  
> -		gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e);
> +		gen8_setup_pml4e(pml4, pdp, pml4e);
>  	}
>  
>  	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
> @@ -1464,7 +1461,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
>  
>  err_out:
>  	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
> -		gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]);
> +		gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]);
>  
>  	return ret;
>  }
> @@ -1480,7 +1477,8 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
>  		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
>  }
>  
> -static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
> +static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
> +			  struct i915_page_directory_pointer *pdp,
>  			  uint64_t start, uint64_t length,
>  			  gen8_pte_t scratch_pte,
>  			  struct seq_file *m)
> @@ -1546,7 +1544,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  						 I915_CACHE_LLC);
>  
>  	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
> -		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
> +		gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
>  	} else {
>  		uint64_t pml4e;
>  		struct i915_pml4 *pml4 = &ppgtt->pml4;
> @@ -1557,7 +1555,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  				continue;
>  
>  			seq_printf(m, "    PML4E #%llu\n", pml4e);
> -			gen8_dump_pdp(pdp, start, length, scratch_pte, m);
> +			gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
>  		}
>  	}
>  }
> @@ -1613,8 +1611,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  	ppgtt->base.bind_vma = ppgtt_bind_vma;
>  	ppgtt->debug_dump = gen8_dump_ppgtt;
>  
> +	/* There are only few exceptions for gen >=6. chv and bxt.
> +	 * And we are not sure about the latter so play safe for now.
> +	 */
> +	if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
> +		ppgtt->base.pt_kmap_wc = true;
> +
>  	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
> -		ret = setup_px(dev_priv, &ppgtt->pml4);
> +		ret = setup_px(&ppgtt->base, &ppgtt->pml4);
>  		if (ret)
>  			goto free_scratch;
>  
> @@ -1703,7 +1707,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  			}
>  			seq_puts(m, "\n");
>  		}
> -		kunmap_px(ppgtt, pt_vaddr);
> +		kunmap_px(pt_vaddr);
>  	}
>  }
>  
> @@ -1900,7 +1904,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  		for (i = first_pte; i < last_pte; i++)
>  			pt_vaddr[i] = scratch_pte;
>  
> -		kunmap_px(ppgtt, pt_vaddr);
> +		kunmap_px(pt_vaddr);
>  
>  		num_entries -= last_pte - first_pte;
>  		first_pte = 0;
> @@ -1939,12 +1943,12 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  		}
>  
>  		if (++act_pte == GEN6_PTES) {
> -			kunmap_px(ppgtt, vaddr);
> +			kunmap_px(vaddr);
>  			vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
>  			act_pte = 0;
>  		}
>  	} while (1);
> -	kunmap_px(ppgtt, vaddr);
> +	kunmap_px(vaddr);
>  }
>  
>  static int gen6_alloc_va_range(struct i915_address_space *vm,
> @@ -1978,7 +1982,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  		/* We've already allocated a page table */
>  		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
>  
> -		pt = alloc_pt(dev_priv);
> +		pt = alloc_pt(vm);
>  		if (IS_ERR(pt)) {
>  			ret = PTR_ERR(pt);
>  			goto unwind_out;
> @@ -2026,7 +2030,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
>  
>  		ppgtt->pd.page_table[pde] = vm->scratch_pt;
> -		free_pt(dev_priv, pt);
> +		free_pt(vm, pt);
>  	}
>  
>  	mark_tlbs_dirty(ppgtt);
> @@ -2035,16 +2039,15 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  
>  static int gen6_init_scratch(struct i915_address_space *vm)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	int ret;
>  
> -	ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
> +	ret = setup_scratch_page(vm, I915_GFP_DMA);
>  	if (ret)
>  		return ret;
>  
> -	vm->scratch_pt = alloc_pt(dev_priv);
> +	vm->scratch_pt = alloc_pt(vm);
>  	if (IS_ERR(vm->scratch_pt)) {
> -		cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +		cleanup_scratch_page(vm);
>  		return PTR_ERR(vm->scratch_pt);
>  	}
>  
> @@ -2055,17 +2058,14 @@ static int gen6_init_scratch(struct i915_address_space *vm)
>  
>  static void gen6_free_scratch(struct i915_address_space *vm)
>  {
> -	struct drm_i915_private *dev_priv = vm->i915;
> -
> -	free_pt(dev_priv, vm->scratch_pt);
> -	cleanup_scratch_page(dev_priv, &vm->scratch_page);
> +	free_pt(vm, vm->scratch_pt);
> +	cleanup_scratch_page(vm);
>  }
>  
>  static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	struct i915_page_directory *pd = &ppgtt->pd;
> -	struct drm_i915_private *dev_priv = vm->i915;
>  	struct i915_page_table *pt;
>  	uint32_t pde;
>  
> @@ -2073,7 +2073,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
>  
>  	gen6_for_all_pdes(pt, pd, pde)
>  		if (pt != vm->scratch_pt)
> -			free_pt(dev_priv, pt);
> +			free_pt(vm, pt);
>  
>  	gen6_free_scratch(vm);
>  }
> @@ -2182,6 +2182,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
>  			   struct drm_i915_private *dev_priv)
>  {
>  	ppgtt->base.i915 = dev_priv;
> +	ppgtt->base.dma = &dev_priv->drm.pdev->dev;
>  
>  	if (INTEL_INFO(dev_priv)->gen < 8)
>  		return gen6_ppgtt_init(ppgtt);
> @@ -2199,10 +2200,14 @@ static void i915_address_space_init(struct i915_address_space *vm,
>  	INIT_LIST_HEAD(&vm->inactive_list);
>  	INIT_LIST_HEAD(&vm->unbound_list);
>  	list_add_tail(&vm->global_link, &dev_priv->vm_list);
> +	pagevec_init(&vm->free_pages, false);
>  }
>  
>  static void i915_address_space_fini(struct i915_address_space *vm)
>  {
> +	if (pagevec_count(&vm->free_pages))
> +		vm_free_pages_release(vm);
> +
>  	i915_gem_timeline_fini(&vm->timeline);
>  	drm_mm_takedown(&vm->mm);
>  	list_del(&vm->global_link);
> @@ -2310,9 +2315,8 @@ void i915_ppgtt_release(struct kref *kref)
>  	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
>  	WARN_ON(!list_empty(&ppgtt->base.unbound_list));
>  
> -	i915_address_space_fini(&ppgtt->base);
> -
>  	ppgtt->base.cleanup(&ppgtt->base);
> +	i915_address_space_fini(&ppgtt->base);
>  	kfree(ppgtt);
>  }
>  
> @@ -2947,7 +2951,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
>  		return -ENOMEM;
>  	}
>  
> -	ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32);
> +	ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
>  	if (ret) {
>  		DRM_ERROR("Scratch setup failed\n");
>  		/* iounmap will also get called at remove, but meh */
> @@ -3036,7 +3040,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  
>  	iounmap(ggtt->gsm);
> -	cleanup_scratch_page(vm->i915, &vm->scratch_page);
> +	cleanup_scratch_page(vm);
>  }
>  
>  static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> @@ -3187,6 +3191,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
>  	int ret;
>  
>  	ggtt->base.i915 = dev_priv;
> +	ggtt->base.dma = &dev_priv->drm.pdev->dev;
>  
>  	if (INTEL_GEN(dev_priv) <= 5)
>  		ret = i915_gmch_probe(ggtt);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 3c5ef5358cef..c59a7687ed6f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -36,6 +36,7 @@
>  
>  #include <linux/io-mapping.h>
>  #include <linux/mm.h>
> +#include <linux/pagevec.h>
>  
>  #include "i915_gem_timeline.h"
>  #include "i915_gem_request.h"
> @@ -247,6 +248,7 @@ struct i915_address_space {
>  	struct drm_mm mm;
>  	struct i915_gem_timeline timeline;
>  	struct drm_i915_private *i915;
> +	struct device *dma;
>  	/* Every address space belongs to a struct file - except for the global
>  	 * GTT that is owned by the driver (and so @file is set to NULL). In
>  	 * principle, no information should leak from one context to another
> @@ -297,6 +299,9 @@ struct i915_address_space {
>  	 */
>  	struct list_head unbound_list;
>  
> +	struct pagevec free_pages;
> +	bool pt_kmap_wc;
> +
>  	/* FIXME: Need a more generic return type */
>  	gen6_pte_t (*pte_encode)(dma_addr_t addr,
>  				 enum i915_cache_level level,
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 06/19] drm/i915: Convert clflushed pagetables over to WC maps
  2017-02-09 15:08   ` Mika Kuoppala
@ 2017-02-09 15:11     ` Mika Kuoppala
  0 siblings, 0 replies; 46+ messages in thread
From: Mika Kuoppala @ 2017-02-09 15:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Mika Kuoppala <mika.kuoppala@intel.com> writes:

> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
>> We flush the entire page every time we update a few bytes, making the
>> update of a page table many, many times slower than is required. If we
>> create a WC map of the page for our updates, we can avoid the clflush
>> but incur additional cost for creating the pagetable. We amoritize that
>> cost by reusing page vmappings, and only changing the page protection in
>> batches.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>  drivers/gpu/drm/i915/i915_gem_gtt.c | 329 ++++++++++++++++++------------------
>>  drivers/gpu/drm/i915/i915_gem_gtt.h |   5 +
>>  2 files changed, 172 insertions(+), 162 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index 45bab7b7b026..302aee193ce5 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -340,43 +340,69 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
>>  	return pte;
>>  }
>>  
>> -static int __setup_page_dma(struct drm_i915_private *dev_priv,
>> -			    struct i915_page_dma *p, gfp_t flags)
>> +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
>>  {
>> -	struct device *kdev = &dev_priv->drm.pdev->dev;
>> +	struct page *page;
>>  
>> -	p->page = alloc_page(flags);
>> -	if (!p->page)
>> -		return -ENOMEM;
>> +	if (vm->free_pages.nr)
>> +		return vm->free_pages.pages[--vm->free_pages.nr];
>>  
>> -	p->daddr = dma_map_page(kdev,
>> -				p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
>> +	page = alloc_page(gfp);
>> +	if (!page)
>> +		return NULL;
>>  
>> -	if (dma_mapping_error(kdev, p->daddr)) {
>> -		__free_page(p->page);
>> -		return -EINVAL;
>> +	if (vm->pt_kmap_wc)
>> +		set_pages_array_wc(&page, 1);
>> +
>> +	return page;
>> +}
>> +
>> +static void vm_free_pages_release(struct i915_address_space *vm)
>> +{
>> +	GEM_BUG_ON(!pagevec_count(&vm->free_pages));
>> +
>> +	if (vm->pt_kmap_wc)
>> +		set_pages_array_wb(vm->free_pages.pages,
>> +				   pagevec_count(&vm->free_pages));
>> +
>> +	__pagevec_release(&vm->free_pages);
>> +}
>> +
>> +static void vm_free_page(struct i915_address_space *vm, struct page *page)
>> +{
>> +	if (!pagevec_add(&vm->free_pages, page))
>> +		vm_free_pages_release(vm);
>
> What about the page you failed to push to vec?
>
> For me it seems that you are missing a retry after
> vec cleanup.

No, you are not missing it. The page was pushed.

Quite amount of changes but mostly just pointer changes,

Matthew is also looking at this, but two stamps are
better than one.

Patch is,
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

>
> -Mika
>
>> +}
>> +
>> +static int __setup_page_dma(struct i915_address_space *vm,
>> +			    struct i915_page_dma *p,
>> +			    gfp_t gfp)
>> +{
>> +	p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
>> +	if (unlikely(!p->page))
>> +		return -ENOMEM;
>> +
>> +	p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
>> +				PCI_DMA_BIDIRECTIONAL);
>> +	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
>> +		vm_free_page(vm, p->page);
>> +		return -ENOMEM;
>>  	}
>>  
>>  	return 0;
>>  }
>>  
>> -static int setup_page_dma(struct drm_i915_private *dev_priv,
>> +static int setup_page_dma(struct i915_address_space *vm,
>>  			  struct i915_page_dma *p)
>>  {
>> -	return __setup_page_dma(dev_priv, p, I915_GFP_DMA);
>> +	return __setup_page_dma(vm, p, I915_GFP_DMA);
>>  }
>>  
>> -static void cleanup_page_dma(struct drm_i915_private *dev_priv,
>> +static void cleanup_page_dma(struct i915_address_space *vm,
>>  			     struct i915_page_dma *p)
>>  {
>> -	struct pci_dev *pdev = dev_priv->drm.pdev;
>> -
>> -	if (WARN_ON(!p->page))
>> -		return;
>> -
>> -	dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
>> -	__free_page(p->page);
>> -	memset(p, 0, sizeof(*p));
>> +	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
>> +	vm_free_page(vm, p->page);
>>  }
>>  
>>  static void *kmap_page_dma(struct i915_page_dma *p)
>> @@ -387,67 +413,54 @@ static void *kmap_page_dma(struct i915_page_dma *p)
>>  /* We use the flushing unmap only with ppgtt structures:
>>   * page directories, page tables and scratch pages.
>>   */
>> -static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr)
>> +static void kunmap_page_dma(void *vaddr)
>>  {
>> -	/* There are only few exceptions for gen >=6. chv and bxt.
>> -	 * And we are not sure about the latter so play safe for now.
>> -	 */
>> -	if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
>> -		drm_clflush_virt_range(vaddr, PAGE_SIZE);
>> -
>>  	kunmap_atomic(vaddr);
>>  }
>>  
>>  #define kmap_px(px) kmap_page_dma(px_base(px))
>> -#define kunmap_px(ppgtt, vaddr) \
>> -		kunmap_page_dma((ppgtt)->base.i915, (vaddr))
>> +#define kunmap_px(vaddr) kunmap_page_dma((vaddr))
>>  
>> -#define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px))
>> -#define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px))
>> -#define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v))
>> -#define fill32_px(dev_priv, px, v) \
>> -		fill_page_dma_32((dev_priv), px_base(px), (v))
>> +#define setup_px(vm, px) setup_page_dma((vm), px_base(px))
>> +#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
>> +#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
>> +#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
>>  
>> -static void fill_page_dma(struct drm_i915_private *dev_priv,
>> -			  struct i915_page_dma *p, const uint64_t val)
>> +static void fill_page_dma(struct i915_address_space *vm,
>> +			  struct i915_page_dma *p,
>> +			  const u64 val)
>>  {
>> +	u64 * const vaddr = kmap_page_dma(p);
>>  	int i;
>> -	uint64_t * const vaddr = kmap_page_dma(p);
>>  
>>  	for (i = 0; i < 512; i++)
>>  		vaddr[i] = val;
>>  
>> -	kunmap_page_dma(dev_priv, vaddr);
>> +	kunmap_page_dma(vaddr);
>>  }
>>  
>> -static void fill_page_dma_32(struct drm_i915_private *dev_priv,
>> -			     struct i915_page_dma *p, const uint32_t val32)
>> +static void fill_page_dma_32(struct i915_address_space *vm,
>> +			     struct i915_page_dma *p,
>> +			     const u32 v)
>>  {
>> -	uint64_t v = val32;
>> -
>> -	v = v << 32 | val32;
>> -
>> -	fill_page_dma(dev_priv, p, v);
>> +	fill_page_dma(vm, p, (u64)v << 32 | v);
>>  }
>>  
>>  static int
>> -setup_scratch_page(struct drm_i915_private *dev_priv,
>> -		   struct i915_page_dma *scratch,
>> -		   gfp_t gfp)
>> +setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
>>  {
>> -	return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO);
>> +	return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO);
>>  }
>>  
>> -static void cleanup_scratch_page(struct drm_i915_private *dev_priv,
>> -				 struct i915_page_dma *scratch)
>> +static void cleanup_scratch_page(struct i915_address_space *vm)
>>  {
>> -	cleanup_page_dma(dev_priv, scratch);
>> +	cleanup_page_dma(vm, &vm->scratch_page);
>>  }
>>  
>> -static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
>> +static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
>>  {
>>  	struct i915_page_table *pt;
>> -	const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES;
>> +	const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES;
>>  	int ret = -ENOMEM;
>>  
>>  	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
>> @@ -460,7 +473,7 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
>>  	if (!pt->used_ptes)
>>  		goto fail_bitmap;
>>  
>> -	ret = setup_px(dev_priv, pt);
>> +	ret = setup_px(vm, pt);
>>  	if (ret)
>>  		goto fail_page_m;
>>  
>> @@ -474,10 +487,9 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
>>  	return ERR_PTR(ret);
>>  }
>>  
>> -static void free_pt(struct drm_i915_private *dev_priv,
>> -		    struct i915_page_table *pt)
>> +static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
>>  {
>> -	cleanup_px(dev_priv, pt);
>> +	cleanup_px(vm, pt);
>>  	kfree(pt->used_ptes);
>>  	kfree(pt);
>>  }
>> @@ -490,7 +502,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
>>  	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
>>  				      I915_CACHE_LLC);
>>  
>> -	fill_px(vm->i915, pt, scratch_pte);
>> +	fill_px(vm, pt, scratch_pte);
>>  }
>>  
>>  static void gen6_initialize_pt(struct i915_address_space *vm,
>> @@ -503,10 +515,10 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
>>  	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
>>  				     I915_CACHE_LLC, 0);
>>  
>> -	fill32_px(vm->i915, pt, scratch_pte);
>> +	fill32_px(vm, pt, scratch_pte);
>>  }
>>  
>> -static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
>> +static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
>>  {
>>  	struct i915_page_directory *pd;
>>  	int ret = -ENOMEM;
>> @@ -520,7 +532,7 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
>>  	if (!pd->used_pdes)
>>  		goto fail_bitmap;
>>  
>> -	ret = setup_px(dev_priv, pd);
>> +	ret = setup_px(vm, pd);
>>  	if (ret)
>>  		goto fail_page_m;
>>  
>> @@ -534,11 +546,11 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
>>  	return ERR_PTR(ret);
>>  }
>>  
>> -static void free_pd(struct drm_i915_private *dev_priv,
>> +static void free_pd(struct i915_address_space *vm,
>>  		    struct i915_page_directory *pd)
>>  {
>>  	if (px_page(pd)) {
>> -		cleanup_px(dev_priv, pd);
>> +		cleanup_px(vm, pd);
>>  		kfree(pd->used_pdes);
>>  		kfree(pd);
>>  	}
>> @@ -551,7 +563,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
>>  
>>  	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
>>  
>> -	fill_px(vm->i915, pd, scratch_pde);
>> +	fill_px(vm, pd, scratch_pde);
>>  }
>>  
>>  static int __pdp_init(struct drm_i915_private *dev_priv,
>> @@ -585,23 +597,23 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp)
>>  	pdp->page_directory = NULL;
>>  }
>>  
>> -static struct
>> -i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv)
>> +static struct i915_page_directory_pointer *
>> +alloc_pdp(struct i915_address_space *vm)
>>  {
>>  	struct i915_page_directory_pointer *pdp;
>>  	int ret = -ENOMEM;
>>  
>> -	WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv));
>> +	WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
>>  
>>  	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
>>  	if (!pdp)
>>  		return ERR_PTR(-ENOMEM);
>>  
>> -	ret = __pdp_init(dev_priv, pdp);
>> +	ret = __pdp_init(vm->i915, pdp);
>>  	if (ret)
>>  		goto fail_bitmap;
>>  
>> -	ret = setup_px(dev_priv, pdp);
>> +	ret = setup_px(vm, pdp);
>>  	if (ret)
>>  		goto fail_page_m;
>>  
>> @@ -615,12 +627,12 @@ i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv)
>>  	return ERR_PTR(ret);
>>  }
>>  
>> -static void free_pdp(struct drm_i915_private *dev_priv,
>> +static void free_pdp(struct i915_address_space *vm,
>>  		     struct i915_page_directory_pointer *pdp)
>>  {
>>  	__pdp_fini(pdp);
>> -	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
>> -		cleanup_px(dev_priv, pdp);
>> +	if (USES_FULL_48BIT_PPGTT(vm->i915)) {
>> +		cleanup_px(vm, pdp);
>>  		kfree(pdp);
>>  	}
>>  }
>> @@ -632,7 +644,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
>>  
>>  	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
>>  
>> -	fill_px(vm->i915, pdp, scratch_pdpe);
>> +	fill_px(vm, pdp, scratch_pdpe);
>>  }
>>  
>>  static void gen8_initialize_pml4(struct i915_address_space *vm,
>> @@ -643,7 +655,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
>>  	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
>>  					  I915_CACHE_LLC);
>>  
>> -	fill_px(vm->i915, pml4, scratch_pml4e);
>> +	fill_px(vm, pml4, scratch_pml4e);
>>  }
>>  
>>  static void
>> @@ -659,20 +671,18 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt,
>>  
>>  	page_directorypo = kmap_px(pdp);
>>  	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
>> -	kunmap_px(ppgtt, page_directorypo);
>> +	kunmap_px(page_directorypo);
>>  }
>>  
>>  static void
>> -gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt,
>> -		 struct i915_pml4 *pml4,
>> +gen8_setup_pml4e(struct i915_pml4 *pml4,
>>  		 struct i915_page_directory_pointer *pdp,
>>  		 int index)
>>  {
>>  	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
>>  
>> -	WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev)));
>>  	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
>> -	kunmap_px(ppgtt, pagemap);
>> +	kunmap_px(pagemap);
>>  }
>>  
>>  /* Broadwell Page Directory Pointer Descriptors */
>> @@ -741,7 +751,6 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>>  				uint64_t start,
>>  				uint64_t length)
>>  {
>> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>>  	unsigned int num_entries = gen8_pte_count(start, length);
>>  	unsigned int pte = gen8_pte_index(start);
>>  	unsigned int pte_end = pte + num_entries;
>> @@ -762,7 +771,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>>  	vaddr = kmap_px(pt);
>>  	while (pte < pte_end)
>>  		vaddr[pte++] = scratch_pte;
>> -	kunmap_px(ppgtt, vaddr);
>> +	kunmap_px(vaddr);
>>  
>>  	return false;
>>  }
>> @@ -775,7 +784,6 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>>  				uint64_t start,
>>  				uint64_t length)
>>  {
>> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>>  	struct i915_page_table *pt;
>>  	uint64_t pde;
>>  	gen8_pde_t *pde_vaddr;
>> @@ -790,8 +798,8 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>>  			__clear_bit(pde, pd->used_pdes);
>>  			pde_vaddr = kmap_px(pd);
>>  			pde_vaddr[pde] = scratch_pde;
>> -			kunmap_px(ppgtt, pde_vaddr);
>> -			free_pt(vm->i915, pt);
>> +			kunmap_px(pde_vaddr);
>> +			free_pt(vm, pt);
>>  		}
>>  	}
>>  
>> @@ -820,7 +828,7 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>>  		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
>>  			__clear_bit(pdpe, pdp->used_pdpes);
>>  			gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);
>> -			free_pd(vm->i915, pd);
>> +			free_pd(vm, pd);
>>  		}
>>  	}
>>  
>> @@ -841,7 +849,6 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
>>  				  uint64_t start,
>>  				  uint64_t length)
>>  {
>> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>>  	struct i915_page_directory_pointer *pdp;
>>  	uint64_t pml4e;
>>  
>> @@ -853,8 +860,8 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
>>  
>>  		if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
>>  			__clear_bit(pml4e, pml4->used_pml4es);
>> -			gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e);
>> -			free_pdp(vm->i915, pdp);
>> +			gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e);
>> +			free_pdp(vm, pdp);
>>  		}
>>  	}
>>  }
>> @@ -912,12 +919,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
>>  				pde = 0;
>>  			}
>>  
>> -			kunmap_px(ppgtt, vaddr);
>> +			kunmap_px(vaddr);
>>  			vaddr = kmap_px(pd->page_table[pde]);
>>  			pte = 0;
>>  		}
>>  	} while (1);
>> -	kunmap_px(ppgtt, vaddr);
>> +	kunmap_px(vaddr);
>>  
>>  	return ret;
>>  }
>> @@ -959,7 +966,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
>>  		;
>>  }
>>  
>> -static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
>> +static void gen8_free_page_tables(struct i915_address_space *vm,
>>  				  struct i915_page_directory *pd)
>>  {
>>  	int i;
>> @@ -971,34 +978,33 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
>>  		if (WARN_ON(!pd->page_table[i]))
>>  			continue;
>>  
>> -		free_pt(dev_priv, pd->page_table[i]);
>> +		free_pt(vm, pd->page_table[i]);
>>  		pd->page_table[i] = NULL;
>>  	}
>>  }
>>  
>>  static int gen8_init_scratch(struct i915_address_space *vm)
>>  {
>> -	struct drm_i915_private *dev_priv = vm->i915;
>>  	int ret;
>>  
>> -	ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
>> +	ret = setup_scratch_page(vm, I915_GFP_DMA);
>>  	if (ret)
>>  		return ret;
>>  
>> -	vm->scratch_pt = alloc_pt(dev_priv);
>> +	vm->scratch_pt = alloc_pt(vm);
>>  	if (IS_ERR(vm->scratch_pt)) {
>>  		ret = PTR_ERR(vm->scratch_pt);
>>  		goto free_scratch_page;
>>  	}
>>  
>> -	vm->scratch_pd = alloc_pd(dev_priv);
>> +	vm->scratch_pd = alloc_pd(vm);
>>  	if (IS_ERR(vm->scratch_pd)) {
>>  		ret = PTR_ERR(vm->scratch_pd);
>>  		goto free_pt;
>>  	}
>>  
>> -	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
>> -		vm->scratch_pdp = alloc_pdp(dev_priv);
>> +	if (USES_FULL_48BIT_PPGTT(dev)) {
>> +		vm->scratch_pdp = alloc_pdp(vm);
>>  		if (IS_ERR(vm->scratch_pdp)) {
>>  			ret = PTR_ERR(vm->scratch_pdp);
>>  			goto free_pd;
>> @@ -1013,11 +1019,11 @@ static int gen8_init_scratch(struct i915_address_space *vm)
>>  	return 0;
>>  
>>  free_pd:
>> -	free_pd(dev_priv, vm->scratch_pd);
>> +	free_pd(vm, vm->scratch_pd);
>>  free_pt:
>> -	free_pt(dev_priv, vm->scratch_pt);
>> +	free_pt(vm, vm->scratch_pt);
>>  free_scratch_page:
>> -	cleanup_scratch_page(dev_priv, &vm->scratch_page);
>> +	cleanup_scratch_page(vm);
>>  
>>  	return ret;
>>  }
>> @@ -1055,44 +1061,41 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
>>  
>>  static void gen8_free_scratch(struct i915_address_space *vm)
>>  {
>> -	struct drm_i915_private *dev_priv = vm->i915;
>> -
>> -	if (USES_FULL_48BIT_PPGTT(dev_priv))
>> -		free_pdp(dev_priv, vm->scratch_pdp);
>> -	free_pd(dev_priv, vm->scratch_pd);
>> -	free_pt(dev_priv, vm->scratch_pt);
>> -	cleanup_scratch_page(dev_priv, &vm->scratch_page);
>> +	if (USES_FULL_48BIT_PPGTT(vm->i915))
>> +		free_pdp(vm, vm->scratch_pdp);
>> +	free_pd(vm, vm->scratch_pd);
>> +	free_pt(vm, vm->scratch_pt);
>> +	cleanup_scratch_page(vm);
>>  }
>>  
>> -static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv,
>> +static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>>  				    struct i915_page_directory_pointer *pdp)
>>  {
>>  	int i;
>>  
>> -	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) {
>> +	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
>>  		if (WARN_ON(!pdp->page_directory[i]))
>>  			continue;
>>  
>> -		gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
>> -		free_pd(dev_priv, pdp->page_directory[i]);
>> +		gen8_free_page_tables(vm, pdp->page_directory[i]);
>> +		free_pd(vm, pdp->page_directory[i]);
>>  	}
>>  
>> -	free_pdp(dev_priv, pdp);
>> +	free_pdp(vm, pdp);
>>  }
>>  
>>  static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
>>  {
>> -	struct drm_i915_private *dev_priv = ppgtt->base.i915;
>>  	int i;
>>  
>>  	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
>>  		if (WARN_ON(!ppgtt->pml4.pdps[i]))
>>  			continue;
>>  
>> -		gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]);
>> +		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
>>  	}
>>  
>> -	cleanup_px(dev_priv, &ppgtt->pml4);
>> +	cleanup_px(&ppgtt->base, &ppgtt->pml4);
>>  }
>>  
>>  static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
>> @@ -1103,8 +1106,8 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
>>  	if (intel_vgpu_active(dev_priv))
>>  		gen8_ppgtt_notify_vgt(ppgtt, false);
>>  
>> -	if (!USES_FULL_48BIT_PPGTT(dev_priv))
>> -		gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp);
>> +	if (!USES_FULL_48BIT_PPGTT(vm->i915))
>> +		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
>>  	else
>>  		gen8_ppgtt_cleanup_4lvl(ppgtt);
>>  
>> @@ -1135,7 +1138,6 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
>>  				     uint64_t length,
>>  				     unsigned long *new_pts)
>>  {
>> -	struct drm_i915_private *dev_priv = vm->i915;
>>  	struct i915_page_table *pt;
>>  	uint32_t pde;
>>  
>> @@ -1147,7 +1149,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
>>  			continue;
>>  		}
>>  
>> -		pt = alloc_pt(dev_priv);
>> +		pt = alloc_pt(vm);
>>  		if (IS_ERR(pt))
>>  			goto unwind_out;
>>  
>> @@ -1161,7 +1163,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
>>  
>>  unwind_out:
>>  	for_each_set_bit(pde, new_pts, I915_PDES)
>> -		free_pt(dev_priv, pd->page_table[pde]);
>> +		free_pt(vm, pd->page_table[pde]);
>>  
>>  	return -ENOMEM;
>>  }
>> @@ -1196,7 +1198,6 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
>>  				  uint64_t length,
>>  				  unsigned long *new_pds)
>>  {
>> -	struct drm_i915_private *dev_priv = vm->i915;
>>  	struct i915_page_directory *pd;
>>  	uint32_t pdpe;
>>  	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
>> @@ -1207,7 +1208,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
>>  		if (test_bit(pdpe, pdp->used_pdpes))
>>  			continue;
>>  
>> -		pd = alloc_pd(dev_priv);
>> +		pd = alloc_pd(vm);
>>  		if (IS_ERR(pd))
>>  			goto unwind_out;
>>  
>> @@ -1221,7 +1222,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
>>  
>>  unwind_out:
>>  	for_each_set_bit(pdpe, new_pds, pdpes)
>> -		free_pd(dev_priv, pdp->page_directory[pdpe]);
>> +		free_pd(vm, pdp->page_directory[pdpe]);
>>  
>>  	return -ENOMEM;
>>  }
>> @@ -1249,7 +1250,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>>  				  uint64_t length,
>>  				  unsigned long *new_pdps)
>>  {
>> -	struct drm_i915_private *dev_priv = vm->i915;
>>  	struct i915_page_directory_pointer *pdp;
>>  	uint32_t pml4e;
>>  
>> @@ -1257,7 +1257,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>>  
>>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
>>  		if (!test_bit(pml4e, pml4->used_pml4es)) {
>> -			pdp = alloc_pdp(dev_priv);
>> +			pdp = alloc_pdp(vm);
>>  			if (IS_ERR(pdp))
>>  				goto unwind_out;
>>  
>> @@ -1275,7 +1275,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
>>  
>>  unwind_out:
>>  	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
>> -		free_pdp(dev_priv, pml4->pdps[pml4e]);
>> +		free_pdp(vm, pml4->pdps[pml4e]);
>>  
>>  	return -ENOMEM;
>>  }
>> @@ -1324,7 +1324,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>>  {
>>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>>  	unsigned long *new_page_dirs, *new_page_tables;
>> -	struct drm_i915_private *dev_priv = vm->i915;
>>  	struct i915_page_directory *pd;
>>  	const uint64_t orig_start = start;
>>  	const uint64_t orig_length = length;
>> @@ -1393,7 +1392,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>>  			 * point we're still relying on insert_entries() */
>>  		}
>>  
>> -		kunmap_px(ppgtt, page_directory);
>> +		kunmap_px(page_directory);
>>  		__set_bit(pdpe, pdp->used_pdpes);
>>  		gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
>>  	}
>> @@ -1408,12 +1407,11 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>>  
>>  		for_each_set_bit(temp, new_page_tables + pdpe *
>>  				BITS_TO_LONGS(I915_PDES), I915_PDES)
>> -			free_pt(dev_priv,
>> -				pdp->page_directory[pdpe]->page_table[temp]);
>> +			free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]);
>>  	}
>>  
>>  	for_each_set_bit(pdpe, new_page_dirs, pdpes)
>> -		free_pd(dev_priv, pdp->page_directory[pdpe]);
>> +		free_pd(vm, pdp->page_directory[pdpe]);
>>  
>>  	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
>>  	mark_tlbs_dirty(ppgtt);
>> @@ -1426,7 +1424,6 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
>>  				    uint64_t length)
>>  {
>>  	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
>> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>>  	struct i915_page_directory_pointer *pdp;
>>  	uint64_t pml4e;
>>  	int ret = 0;
>> @@ -1454,7 +1451,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
>>  		if (ret)
>>  			goto err_out;
>>  
>> -		gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e);
>> +		gen8_setup_pml4e(pml4, pdp, pml4e);
>>  	}
>>  
>>  	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
>> @@ -1464,7 +1461,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
>>  
>>  err_out:
>>  	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
>> -		gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]);
>> +		gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]);
>>  
>>  	return ret;
>>  }
>> @@ -1480,7 +1477,8 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
>>  		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
>>  }
>>  
>> -static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
>> +static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
>> +			  struct i915_page_directory_pointer *pdp,
>>  			  uint64_t start, uint64_t length,
>>  			  gen8_pte_t scratch_pte,
>>  			  struct seq_file *m)
>> @@ -1546,7 +1544,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>>  						 I915_CACHE_LLC);
>>  
>>  	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
>> -		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
>> +		gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
>>  	} else {
>>  		uint64_t pml4e;
>>  		struct i915_pml4 *pml4 = &ppgtt->pml4;
>> @@ -1557,7 +1555,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>>  				continue;
>>  
>>  			seq_printf(m, "    PML4E #%llu\n", pml4e);
>> -			gen8_dump_pdp(pdp, start, length, scratch_pte, m);
>> +			gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
>>  		}
>>  	}
>>  }
>> @@ -1613,8 +1611,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>>  	ppgtt->base.bind_vma = ppgtt_bind_vma;
>>  	ppgtt->debug_dump = gen8_dump_ppgtt;
>>  
>> +	/* There are only few exceptions for gen >=6. chv and bxt.
>> +	 * And we are not sure about the latter so play safe for now.
>> +	 */
>> +	if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
>> +		ppgtt->base.pt_kmap_wc = true;
>> +
>>  	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
>> -		ret = setup_px(dev_priv, &ppgtt->pml4);
>> +		ret = setup_px(&ppgtt->base, &ppgtt->pml4);
>>  		if (ret)
>>  			goto free_scratch;
>>  
>> @@ -1703,7 +1707,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>>  			}
>>  			seq_puts(m, "\n");
>>  		}
>> -		kunmap_px(ppgtt, pt_vaddr);
>> +		kunmap_px(pt_vaddr);
>>  	}
>>  }
>>  
>> @@ -1900,7 +1904,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>>  		for (i = first_pte; i < last_pte; i++)
>>  			pt_vaddr[i] = scratch_pte;
>>  
>> -		kunmap_px(ppgtt, pt_vaddr);
>> +		kunmap_px(pt_vaddr);
>>  
>>  		num_entries -= last_pte - first_pte;
>>  		first_pte = 0;
>> @@ -1939,12 +1943,12 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>>  		}
>>  
>>  		if (++act_pte == GEN6_PTES) {
>> -			kunmap_px(ppgtt, vaddr);
>> +			kunmap_px(vaddr);
>>  			vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
>>  			act_pte = 0;
>>  		}
>>  	} while (1);
>> -	kunmap_px(ppgtt, vaddr);
>> +	kunmap_px(vaddr);
>>  }
>>  
>>  static int gen6_alloc_va_range(struct i915_address_space *vm,
>> @@ -1978,7 +1982,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>>  		/* We've already allocated a page table */
>>  		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
>>  
>> -		pt = alloc_pt(dev_priv);
>> +		pt = alloc_pt(vm);
>>  		if (IS_ERR(pt)) {
>>  			ret = PTR_ERR(pt);
>>  			goto unwind_out;
>> @@ -2026,7 +2030,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>>  		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
>>  
>>  		ppgtt->pd.page_table[pde] = vm->scratch_pt;
>> -		free_pt(dev_priv, pt);
>> +		free_pt(vm, pt);
>>  	}
>>  
>>  	mark_tlbs_dirty(ppgtt);
>> @@ -2035,16 +2039,15 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>>  
>>  static int gen6_init_scratch(struct i915_address_space *vm)
>>  {
>> -	struct drm_i915_private *dev_priv = vm->i915;
>>  	int ret;
>>  
>> -	ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
>> +	ret = setup_scratch_page(vm, I915_GFP_DMA);
>>  	if (ret)
>>  		return ret;
>>  
>> -	vm->scratch_pt = alloc_pt(dev_priv);
>> +	vm->scratch_pt = alloc_pt(vm);
>>  	if (IS_ERR(vm->scratch_pt)) {
>> -		cleanup_scratch_page(dev_priv, &vm->scratch_page);
>> +		cleanup_scratch_page(vm);
>>  		return PTR_ERR(vm->scratch_pt);
>>  	}
>>  
>> @@ -2055,17 +2058,14 @@ static int gen6_init_scratch(struct i915_address_space *vm)
>>  
>>  static void gen6_free_scratch(struct i915_address_space *vm)
>>  {
>> -	struct drm_i915_private *dev_priv = vm->i915;
>> -
>> -	free_pt(dev_priv, vm->scratch_pt);
>> -	cleanup_scratch_page(dev_priv, &vm->scratch_page);
>> +	free_pt(vm, vm->scratch_pt);
>> +	cleanup_scratch_page(vm);
>>  }
>>  
>>  static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
>>  {
>>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>>  	struct i915_page_directory *pd = &ppgtt->pd;
>> -	struct drm_i915_private *dev_priv = vm->i915;
>>  	struct i915_page_table *pt;
>>  	uint32_t pde;
>>  
>> @@ -2073,7 +2073,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
>>  
>>  	gen6_for_all_pdes(pt, pd, pde)
>>  		if (pt != vm->scratch_pt)
>> -			free_pt(dev_priv, pt);
>> +			free_pt(vm, pt);
>>  
>>  	gen6_free_scratch(vm);
>>  }
>> @@ -2182,6 +2182,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
>>  			   struct drm_i915_private *dev_priv)
>>  {
>>  	ppgtt->base.i915 = dev_priv;
>> +	ppgtt->base.dma = &dev_priv->drm.pdev->dev;
>>  
>>  	if (INTEL_INFO(dev_priv)->gen < 8)
>>  		return gen6_ppgtt_init(ppgtt);
>> @@ -2199,10 +2200,14 @@ static void i915_address_space_init(struct i915_address_space *vm,
>>  	INIT_LIST_HEAD(&vm->inactive_list);
>>  	INIT_LIST_HEAD(&vm->unbound_list);
>>  	list_add_tail(&vm->global_link, &dev_priv->vm_list);
>> +	pagevec_init(&vm->free_pages, false);
>>  }
>>  
>>  static void i915_address_space_fini(struct i915_address_space *vm)
>>  {
>> +	if (pagevec_count(&vm->free_pages))
>> +		vm_free_pages_release(vm);
>> +
>>  	i915_gem_timeline_fini(&vm->timeline);
>>  	drm_mm_takedown(&vm->mm);
>>  	list_del(&vm->global_link);
>> @@ -2310,9 +2315,8 @@ void i915_ppgtt_release(struct kref *kref)
>>  	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
>>  	WARN_ON(!list_empty(&ppgtt->base.unbound_list));
>>  
>> -	i915_address_space_fini(&ppgtt->base);
>> -
>>  	ppgtt->base.cleanup(&ppgtt->base);
>> +	i915_address_space_fini(&ppgtt->base);
>>  	kfree(ppgtt);
>>  }
>>  
>> @@ -2947,7 +2951,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
>>  		return -ENOMEM;
>>  	}
>>  
>> -	ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32);
>> +	ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
>>  	if (ret) {
>>  		DRM_ERROR("Scratch setup failed\n");
>>  		/* iounmap will also get called at remove, but meh */
>> @@ -3036,7 +3040,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
>>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>>  
>>  	iounmap(ggtt->gsm);
>> -	cleanup_scratch_page(vm->i915, &vm->scratch_page);
>> +	cleanup_scratch_page(vm);
>>  }
>>  
>>  static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>> @@ -3187,6 +3191,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
>>  	int ret;
>>  
>>  	ggtt->base.i915 = dev_priv;
>> +	ggtt->base.dma = &dev_priv->drm.pdev->dev;
>>  
>>  	if (INTEL_GEN(dev_priv) <= 5)
>>  		ret = i915_gmch_probe(ggtt);
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> index 3c5ef5358cef..c59a7687ed6f 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> @@ -36,6 +36,7 @@
>>  
>>  #include <linux/io-mapping.h>
>>  #include <linux/mm.h>
>> +#include <linux/pagevec.h>
>>  
>>  #include "i915_gem_timeline.h"
>>  #include "i915_gem_request.h"
>> @@ -247,6 +248,7 @@ struct i915_address_space {
>>  	struct drm_mm mm;
>>  	struct i915_gem_timeline timeline;
>>  	struct drm_i915_private *i915;
>> +	struct device *dma;
>>  	/* Every address space belongs to a struct file - except for the global
>>  	 * GTT that is owned by the driver (and so @file is set to NULL). In
>>  	 * principle, no information should leak from one context to another
>> @@ -297,6 +299,9 @@ struct i915_address_space {
>>  	 */
>>  	struct list_head unbound_list;
>>  
>> +	struct pagevec free_pages;
>> +	bool pt_kmap_wc;
>> +
>>  	/* FIXME: Need a more generic return type */
>>  	gen6_pte_t (*pte_encode)(dma_addr_t addr,
>>  				 enum i915_cache_level level,
>> -- 
>> 2.11.0
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

* Re: [PATCH 07/19] drm/i915: Remove kmap/kunmap wrappers
  2017-02-02 15:02 ` [PATCH 07/19] drm/i915: Remove kmap/kunmap wrappers Chris Wilson
@ 2017-02-10 11:25   ` Matthew Auld
  0 siblings, 0 replies; 46+ messages in thread
From: Matthew Auld @ 2017-02-10 11:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Mika Kuoppala

On 2 February 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> As these are now both plain and simple kmap_atomic/kunmap_atomic pairs,
> we can remove the wrappers for a small gain of clarity (in particular,
> not hiding the atomic critical sections!).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 46+ messages in thread

end of thread, other threads:[~2017-02-10 11:26 UTC | newest]

Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
2017-02-02 15:02 ` [PATCH 02/19] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Chris Wilson
2017-02-09 11:34   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Chris Wilson
2017-02-02 15:32   ` Chris Wilson
2017-02-02 15:57     ` Tvrtko Ursulin
2017-02-02 16:10       ` Chris Wilson
2017-02-02 16:39         ` Tvrtko Ursulin
2017-02-02 17:05           ` Chris Wilson
2017-02-02 17:17             ` Tvrtko Ursulin
2017-02-03  8:34               ` Chris Wilson
2017-02-02 15:02 ` [PATCH 04/19] drm/i915: Don't special case teardown of aliasing_ppgtt Chris Wilson
2017-02-06 14:21   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 05/19] drm/i915: Split ggtt/alasing_gtt unbind_vma Chris Wilson
2017-02-06 15:07   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 06/19] drm/i915: Convert clflushed pagetables over to WC maps Chris Wilson
2017-02-09 15:08   ` Mika Kuoppala
2017-02-09 15:11     ` Mika Kuoppala
2017-02-02 15:02 ` [PATCH 07/19] drm/i915: Remove kmap/kunmap wrappers Chris Wilson
2017-02-10 11:25   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 08/19] drm/i915: Remove user-triggerable WARN for large objects Chris Wilson
2017-02-02 15:07   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 09/19] drm/i915: Move allocate_va_range to GTT Chris Wilson
2017-02-07 10:01   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 10/19] drm/i915: Remove redundant clear of appgtt Chris Wilson
2017-02-07 10:06   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 11/19] drm/i915: Tidy gen6_write_pde() Chris Wilson
2017-02-07 10:18   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 12/19] drm/i915: Remove bitmap tracking for used-ptes Chris Wilson
2017-02-06 20:32   ` Michał Winiarski
2017-02-02 15:02 ` [PATCH 13/19] drm/i915: Remove bitmap tracking for used-pdes Chris Wilson
2017-02-08 16:30   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 14/19] drm/i915: Remove bitmap tracking for used-pdpes Chris Wilson
2017-02-08 17:42   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 15/19] drm/i915: Remove bitmap tracking for used-pml4 Chris Wilson
2017-02-08 17:47   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 16/19] drm/i915: Remove superfluous posting reads after clear GGTT Chris Wilson
2017-02-08 17:48   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 17/19] drm/i915: Always mark the PDP as dirty when altered Chris Wilson
2017-02-08 17:53   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 18/19] drm/i915: Remove defunct GTT tracepoints Chris Wilson
2017-02-08 18:01   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 19/19] drm/i915: Remove unused ppgtt->enable() Chris Wilson
2017-02-03 13:04   ` Joonas Lahtinen
2017-02-02 17:54 ` ✓ Fi.CI.BAT: success for series starting with [01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Patchwork
2017-02-07 15:30 ` [PATCH 01/19] " Mika Kuoppala

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.