All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages()
@ 2017-02-10 19:38 Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 02/22] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Chris Wilson
                   ` (21 more replies)
  0 siblings, 22 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

The predominant VMA class is normal GTT, so allow gcc to emphasize that
path and avoid unnecessary stack movement.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 61 +++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index eebbffdb9a0b..68169694d268 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2622,14 +2622,16 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
 	struct drm_i915_gem_object *obj = vma->obj;
-	u32 pte_flags = 0;
-	int ret;
+	u32 pte_flags;
 
-	ret = i915_get_ggtt_vma_pages(vma);
-	if (ret)
-		return ret;
+	if (unlikely(!vma->pages)) {
+		int ret = i915_get_ggtt_vma_pages(vma);
+		if (ret)
+			return ret;
+	}
 
 	/* Currently applicable only to VLV */
+	pte_flags = 0;
 	if (obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
@@ -2654,18 +2656,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
 	u32 pte_flags;
-	int ret;
 
-	ret = i915_get_ggtt_vma_pages(vma);
-	if (ret)
-		return ret;
+	if (unlikely(!vma->pages)) {
+		int ret = i915_get_ggtt_vma_pages(vma);
+		if (ret)
+			return ret;
+	}
 
 	/* Currently applicable only to VLV */
 	pte_flags = 0;
 	if (vma->obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
-
 	if (flags & I915_VMA_GLOBAL_BIND) {
 		intel_runtime_pm_get(i915);
 		vma->vm->insert_entries(vma->vm,
@@ -3431,9 +3433,9 @@ rotate_pages(const dma_addr_t *in, unsigned int offset,
 	return sg;
 }
 
-static struct sg_table *
-intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
-			  struct drm_i915_gem_object *obj)
+static noinline struct sg_table *
+intel_rotate_pages(struct intel_rotation_info *rot_info,
+		   struct drm_i915_gem_object *obj)
 {
 	const size_t n_pages = obj->base.size / PAGE_SIZE;
 	unsigned int size = intel_rotation_info_size(rot_info);
@@ -3494,7 +3496,7 @@ intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
 	return ERR_PTR(ret);
 }
 
-static struct sg_table *
+static noinline struct sg_table *
 intel_partial_pages(const struct i915_ggtt_view *view,
 		    struct drm_i915_gem_object *obj)
 {
@@ -3548,7 +3550,7 @@ intel_partial_pages(const struct i915_ggtt_view *view,
 static int
 i915_get_ggtt_vma_pages(struct i915_vma *vma)
 {
-	int ret = 0;
+	int ret;
 
 	/* The vma->pages are only valid within the lifespan of the borrowed
 	 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
@@ -3557,32 +3559,33 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
 	 */
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
 
-	if (vma->pages)
+	switch (vma->ggtt_view.type) {
+	case I915_GGTT_VIEW_NORMAL:
+		vma->pages = vma->obj->mm.pages;
 		return 0;
 
-	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
-		vma->pages = vma->obj->mm.pages;
-	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
+	case I915_GGTT_VIEW_ROTATED:
 		vma->pages =
-			intel_rotate_fb_obj_pages(&vma->ggtt_view.rotated,
-						  vma->obj);
-	else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
+			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
+		break;
+
+	case I915_GGTT_VIEW_PARTIAL:
 		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
-	else
+		break;
+
+	default:
 		WARN_ONCE(1, "GGTT view %u not implemented!\n",
 			  vma->ggtt_view.type);
+		return -EINVAL;
+	}
 
-	if (!vma->pages) {
-		DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
-			  vma->ggtt_view.type);
-		ret = -EINVAL;
-	} else if (IS_ERR(vma->pages)) {
+	ret = 0;
+	if (unlikely(IS_ERR(vma->pages))) {
 		ret = PTR_ERR(vma->pages);
 		vma->pages = NULL;
 		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
 			  vma->ggtt_view.type, ret);
 	}
-
 	return ret;
 }
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 02/22] drm/i915: Micro-optimise gen6_ppgtt_insert_entries()
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 03/22] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Chris Wilson
                   ` (20 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

Inline the address computation to avoid the vfunc call for every page.
We still have to pay the high overhead of sg_page_iter_next(), but now
at least GCC can optimise the inner most loop, giving a significant
boost to some thrashing Unreal Engine workloads.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 68 ++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 68169694d268..ca1f5fa6984f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1888,6 +1888,11 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	}
 }
 
+struct sgt_dma {
+	struct scatterlist *sg;
+	dma_addr_t dma, max;
+};
+
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      uint64_t start,
@@ -1897,27 +1902,34 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned act_pt = first_entry / GEN6_PTES;
 	unsigned act_pte = first_entry % GEN6_PTES;
-	gen6_pte_t *pt_vaddr = NULL;
-	struct sgt_iter sgt_iter;
-	dma_addr_t addr;
+	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
+	struct sgt_dma iter;
+	gen6_pte_t *vaddr;
+
+	vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
+	iter.sg = pages->sgl;
+	iter.dma = sg_dma_address(iter.sg);
+	iter.max = iter.dma + iter.sg->length;
+	do {
+		vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
 
-	for_each_sgt_dma(addr, sgt_iter, pages) {
-		if (pt_vaddr == NULL)
-			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
+		iter.dma += PAGE_SIZE;
+		if (iter.dma == iter.max) {
+			iter.sg = __sg_next(iter.sg);
+			if (!iter.sg)
+				break;
 
-		pt_vaddr[act_pte] =
-			vm->pte_encode(addr, cache_level, flags);
+			iter.dma = sg_dma_address(iter.sg);
+			iter.max = iter.dma + iter.sg->length;
+		}
 
 		if (++act_pte == GEN6_PTES) {
-			kunmap_px(ppgtt, pt_vaddr);
-			pt_vaddr = NULL;
-			act_pt++;
+			kunmap_px(ppgtt, vaddr);
+			vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
 			act_pte = 0;
 		}
-	}
-
-	if (pt_vaddr)
-		kunmap_px(ppgtt, pt_vaddr);
+	} while (1);
+	kunmap_px(ppgtt, vaddr);
 }
 
 static int gen6_alloc_va_range(struct i915_address_space *vm,
@@ -2503,27 +2515,13 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     enum i915_cache_level level, u32 flags)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	struct sgt_iter sgt_iter;
-	gen6_pte_t __iomem *gtt_entries;
-	gen6_pte_t gtt_entry;
+	gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
+	unsigned int i = start >> PAGE_SHIFT;
+	struct sgt_iter iter;
 	dma_addr_t addr;
-	int i = 0;
-
-	gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
-
-	for_each_sgt_dma(addr, sgt_iter, st) {
-		gtt_entry = vm->pte_encode(addr, level, flags);
-		iowrite32(gtt_entry, &gtt_entries[i++]);
-	}
-
-	/* XXX: This serves as a posting read to make sure that the PTE has
-	 * actually been updated. There is some concern that even though
-	 * registers and PTEs are within the same BAR that they are potentially
-	 * of NUMA access patterns. Therefore, even with the way we assume
-	 * hardware should work, we must keep this posting read for paranoia.
-	 */
-	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1]) != gtt_entry);
+	for_each_sgt_dma(addr, iter, st)
+		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
+	wmb();
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 03/22] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 02/22] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-13 14:58   ` Mika Kuoppala
  2017-02-13 15:44   ` [PATCH v3] " Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 04/22] drm/i915: Don't special case teardown of aliasing_ppgtt Chris Wilson
                   ` (19 subsequent siblings)
  21 siblings, 2 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

Improve the sg iteration and in hte process eliminate a bug in
miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
full length of the sg table.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 157 +++++++++++++++++++-----------------
 1 file changed, 82 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ca1f5fa6984f..fcb8d635aec0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -751,9 +751,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	unsigned int num_entries = gen8_pte_count(start, length);
 	unsigned int pte = gen8_pte_index(start);
 	unsigned int pte_end = pte + num_entries;
-	gen8_pte_t *pt_vaddr;
-	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-						 I915_CACHE_LLC);
+	gen8_pte_t scratch_pte =
+		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
+	gen8_pte_t *vaddr;
 
 	if (WARN_ON(!px_page(pt)))
 		return false;
@@ -766,12 +766,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 			return true;
 	}
 
-	pt_vaddr = kmap_px(pt);
-
+	vaddr = kmap_px(pt);
 	while (pte < pte_end)
-		pt_vaddr[pte++] = scratch_pte;
-
-	kunmap_px(ppgtt, pt_vaddr);
+		vaddr[pte++] = scratch_pte;
+	kunmap_px(ppgtt, vaddr);
 
 	return false;
 }
@@ -879,71 +877,93 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
 }
 
-static void
-gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
+struct sgt_dma {
+	struct scatterlist *sg;
+	dma_addr_t dma, max;
+};
+
+static __always_inline bool
+gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 			      struct i915_page_directory_pointer *pdp,
-			      struct sg_page_iter *sg_iter,
-			      uint64_t start,
+			      struct sgt_dma *iter,
+			      u64 start,
 			      enum i915_cache_level cache_level)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	gen8_pte_t *pt_vaddr;
-	unsigned pdpe = gen8_pdpe_index(start);
-	unsigned pde = gen8_pde_index(start);
-	unsigned pte = gen8_pte_index(start);
+	unsigned int pdpe = gen8_pdpe_index(start);
+	unsigned int pde = gen8_pde_index(start);
+	unsigned int pte = gen8_pte_index(start);
+	struct i915_page_directory *pd;
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
+	gen8_pte_t *vaddr;
+	bool ret = true;
 
-	pt_vaddr = NULL;
+	pd = pdp->page_directory[pdpe];
+	vaddr = kmap_px(pd->page_table[pde]);
+	do {
+		vaddr[pte] = pte_encode | iter->dma;
+		iter->dma += PAGE_SIZE;
+		if (iter->dma >= iter->max) {
+			iter->sg = __sg_next(iter->sg);
+			if (!iter->sg) {
+				ret = false;
+				break;
+			}
 
-	while (__sg_page_iter_next(sg_iter)) {
-		if (pt_vaddr == NULL) {
-			struct i915_page_directory *pd = pdp->page_directory[pdpe];
-			struct i915_page_table *pt = pd->page_table[pde];
-			pt_vaddr = kmap_px(pt);
+			iter->dma = sg_dma_address(iter->sg);
+			iter->max = iter->dma + iter->sg->length;
 		}
 
-		pt_vaddr[pte] =
-			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
-					cache_level);
 		if (++pte == GEN8_PTES) {
-			kunmap_px(ppgtt, pt_vaddr);
-			pt_vaddr = NULL;
 			if (++pde == I915_PDES) {
-				if (++pdpe == I915_PDPES_PER_PDP(vm->i915))
-					break;
+				pd = pdp->page_directory[++pdpe];
 				pde = 0;
 			}
+
+			kunmap_px(ppgtt, vaddr);
+			vaddr = kmap_px(pd->page_table[pde]);
 			pte = 0;
 		}
-	}
+	} while (1);
+	kunmap_px(ppgtt, vaddr);
 
-	if (pt_vaddr)
-		kunmap_px(ppgtt, pt_vaddr);
+	return ret;
 }
 
-static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
-				      struct sg_table *pages,
-				      uint64_t start,
-				      enum i915_cache_level cache_level,
-				      u32 unused)
+static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
+				   struct sg_table *pages,
+				   uint64_t start,
+				   enum i915_cache_level cache_level,
+				   u32 unused)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct sg_page_iter sg_iter;
+	struct sgt_dma iter = {
+		.sg = pages->sgl,
+		.dma = sg_dma_address(iter.sg),
+		.max = iter.dma + iter.sg->length,
+	};
 
-	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
+	gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter,
+				      start, cache_level);
+}
 
-	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
-		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
-					      cache_level);
-	} else {
-		struct i915_page_directory_pointer *pdp;
-		uint64_t pml4e;
-		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
+static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
+				   struct sg_table *pages,
+				   uint64_t start,
+				   enum i915_cache_level cache_level,
+				   u32 unused)
+{
+	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct sgt_dma iter = {
+		.sg = pages->sgl,
+		.dma = sg_dma_address(iter.sg),
+		.max = iter.dma + iter.sg->length,
+	};
+	struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
+	unsigned int pml4e = gen8_pml4e_index(start);
 
-		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
-			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
-						      start, cache_level);
-		}
-	}
+	while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter,
+					     start, cache_level))
+		;
 }
 
 static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
@@ -1591,7 +1611,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.start = 0;
 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
 	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
-	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
@@ -1606,6 +1625,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
 		ppgtt->base.total = 1ULL << 48;
 		ppgtt->switch_mm = gen8_48b_mm_switch;
+
+		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
 	} else {
 		ret = __pdp_init(dev_priv, &ppgtt->pdp);
 		if (ret)
@@ -1622,6 +1643,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 			if (ret)
 				goto free_scratch;
 		}
+
+		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
 	}
 
 	if (intel_vgpu_active(dev_priv))
@@ -1888,11 +1911,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	}
 }
 
-struct sgt_dma {
-	struct scatterlist *sg;
-	dma_addr_t dma, max;
-};
-
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      uint64_t start,
@@ -2434,26 +2452,15 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	struct sgt_iter sgt_iter;
 	gen8_pte_t __iomem *gtt_entries;
-	gen8_pte_t gtt_entry;
+	gen8_pte_t pte_encode = gen8_pte_encode(0, level);
 	dma_addr_t addr;
-	int i = 0;
-
-	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
 
-	for_each_sgt_dma(addr, sgt_iter, st) {
-		gtt_entry = gen8_pte_encode(addr, level);
-		gen8_set_pte(&gtt_entries[i++], gtt_entry);
-	}
+	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
+	gtt_entries += start >> PAGE_SHIFT;
+	for_each_sgt_dma(addr, sgt_iter, st)
+		gen8_set_pte(gtt_entries++, pte_encode | addr);
 
-	/*
-	 * XXX: This serves as a posting read to make sure that the PTE has
-	 * actually been updated. There is some concern that even though
-	 * registers and PTEs are within the same BAR that they are potentially
-	 * of NUMA access patterns. Therefore, even with the way we assume
-	 * hardware should work, we must keep this posting read for paranoia.
-	 */
-	if (i != 0)
-		WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
+	wmb();
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 04/22] drm/i915: Don't special case teardown of aliasing_ppgtt
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 02/22] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 03/22] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 05/22] drm/i915: Split ggtt/alasing_gtt unbind_vma Chris Wilson
                   ` (18 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

The aliasing_ppgtt is a regular ppgtt, and we can use the regular
i915_ppgtt_put() to properly tear it down.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 50 +++++++++++--------------------------
 1 file changed, 15 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index fcb8d635aec0..c3ebc965386e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2232,23 +2232,6 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
 }
 
-static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
-			   struct drm_i915_private *dev_priv,
-			   struct drm_i915_file_private *file_priv,
-			   const char *name)
-{
-	int ret;
-
-	ret = __hw_ppgtt_init(ppgtt, dev_priv);
-	if (ret == 0) {
-		kref_init(&ppgtt->ref);
-		i915_address_space_init(&ppgtt->base, dev_priv, name);
-		ppgtt->base.file = file_priv;
-	}
-
-	return ret;
-}
-
 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
 {
 	gtt_write_workarounds(dev_priv);
@@ -2286,12 +2269,16 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv,
 	if (!ppgtt)
 		return ERR_PTR(-ENOMEM);
 
-	ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name);
+	ret = __hw_ppgtt_init(ppgtt, dev_priv);
 	if (ret) {
 		kfree(ppgtt);
 		return ERR_PTR(ret);
 	}
 
+	kref_init(&ppgtt->ref);
+	i915_address_space_init(&ppgtt->base, dev_priv, name);
+	ppgtt->base.file = fpriv;
+
 	trace_i915_ppgtt_create(&ppgtt->base);
 
 	return ppgtt;
@@ -2751,19 +2738,15 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
 	struct i915_hw_ppgtt *ppgtt;
 	int err;
 
-	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-	if (!ppgtt)
-		return -ENOMEM;
-
-	err = __hw_ppgtt_init(ppgtt, i915);
-	if (err)
-		goto err_ppgtt;
+	ppgtt = i915_ppgtt_create(i915, NULL, "[alias]");
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
 
 	if (ppgtt->base.allocate_va_range) {
 		err = ppgtt->base.allocate_va_range(&ppgtt->base,
 						    0, ppgtt->base.total);
 		if (err)
-			goto err_ppgtt_cleanup;
+			goto err_ppgtt;
 	}
 
 	ppgtt->base.clear_range(&ppgtt->base,
@@ -2776,10 +2759,8 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
 
 	return 0;
 
-err_ppgtt_cleanup:
-	ppgtt->base.cleanup(&ppgtt->base);
 err_ppgtt:
-	kfree(ppgtt);
+	i915_ppgtt_put(ppgtt);
 	return err;
 }
 
@@ -2792,8 +2773,7 @@ void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
 	if (!ppgtt)
 		return;
 
-	ppgtt->base.cleanup(&ppgtt->base);
-	kfree(ppgtt);
+	i915_ppgtt_put(ppgtt);
 
 	ggtt->base.bind_vma = ggtt_bind_vma;
 }
@@ -2868,21 +2848,21 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 		WARN_ON(i915_vma_unbind(vma));
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	i915_gem_fini_aliasing_ppgtt(dev_priv);
 	i915_gem_cleanup_stolen(&dev_priv->drm);
 
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	i915_gem_fini_aliasing_ppgtt(dev_priv);
+
 	if (drm_mm_node_allocated(&ggtt->error_capture))
 		drm_mm_remove_node(&ggtt->error_capture);
 
 	if (drm_mm_initialized(&ggtt->base.mm)) {
 		intel_vgt_deballoon(dev_priv);
-
-		mutex_lock(&dev_priv->drm.struct_mutex);
 		i915_address_space_fini(&ggtt->base);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 
 	ggtt->base.cleanup(&ggtt->base);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	arch_phys_wc_del(ggtt->mtrr);
 	io_mapping_fini(&ggtt->mappable);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 05/22] drm/i915: Split ggtt/alasing_gtt unbind_vma
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (2 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 04/22] drm/i915: Don't special case teardown of aliasing_ppgtt Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 06/22] drm/i915: Convert clflushed pagetables over to WC maps Chris Wilson
                   ` (17 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

Similar to how we already split the bind_vma for ggtt/aliasing_gtt, also
split up the unbind for symmetry.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c3ebc965386e..2b86ec376480 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2642,6 +2642,15 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	return 0;
 }
 
+static void ggtt_unbind_vma(struct i915_vma *vma)
+{
+	struct drm_i915_private *i915 = vma->vm->i915;
+
+	intel_runtime_pm_get(i915);
+	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+	intel_runtime_pm_put(i915);
+}
+
 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 				 enum i915_cache_level cache_level,
 				 u32 flags)
@@ -2678,22 +2687,21 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	return 0;
 }
 
-static void ggtt_unbind_vma(struct i915_vma *vma)
+static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
-	struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
-	const u64 size = min(vma->size, vma->node.size);
 
 	if (vma->flags & I915_VMA_GLOBAL_BIND) {
 		intel_runtime_pm_get(i915);
-		vma->vm->clear_range(vma->vm,
-				     vma->node.start, size);
+		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
 		intel_runtime_pm_put(i915);
 	}
 
-	if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
-		appgtt->base.clear_range(&appgtt->base,
-					 vma->node.start, size);
+	if (vma->flags & I915_VMA_LOCAL_BIND) {
+		struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base;
+
+		vm->clear_range(vm, vma->node.start, vma->size);
+	}
 }
 
 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
@@ -2742,6 +2750,7 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
+
 	if (ppgtt->base.allocate_va_range) {
 		err = ppgtt->base.allocate_va_range(&ppgtt->base,
 						    0, ppgtt->base.total);
@@ -2754,9 +2763,13 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
 				ppgtt->base.total);
 
 	i915->mm.aliasing_ppgtt = ppgtt;
+
 	WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
 	ggtt->base.bind_vma = aliasing_gtt_bind_vma;
 
+	WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma);
+	ggtt->base.unbind_vma = aliasing_gtt_unbind_vma;
+
 	return 0;
 
 err_ppgtt:
@@ -2776,6 +2789,7 @@ void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
 	i915_ppgtt_put(ppgtt);
 
 	ggtt->base.bind_vma = ggtt_bind_vma;
+	ggtt->base.unbind_vma = ggtt_unbind_vma;
 }
 
 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 06/22] drm/i915: Convert clflushed pagetables over to WC maps
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (3 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 05/22] drm/i915: Split ggtt/alasing_gtt unbind_vma Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 07/22] drm/i915: Remove kmap/kunmap wrappers Chris Wilson
                   ` (16 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

We flush the entire page every time we update a few bytes, making the
update of a page table many, many times slower than is required. If we
create a WC map of the page for our updates, we can avoid the clflush
but incur additional cost for creating the pagetable. We amoritize that
cost by reusing page vmappings, and only changing the page protection in
batches.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h               |   2 -
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 333 +++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_gtt.h           |   8 +
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   8 +-
 4 files changed, 181 insertions(+), 170 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 80a4b348904b..251b2d66407e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2487,8 +2487,6 @@ struct drm_i915_private {
 		int	irq;
 	} lpe_audio;
 
-	I915_SELFTEST_DECLARE(struct fault_attr vm_fault);
-
 	/*
 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 	 * will be rejected. Instead look for a better place.
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2b86ec376480..9465a3a93bad 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -343,46 +343,72 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
 	return pte;
 }
 
-static int __setup_page_dma(struct drm_i915_private *dev_priv,
-			    struct i915_page_dma *p, gfp_t flags)
+static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 {
-	struct device *kdev = &dev_priv->drm.pdev->dev;
+	struct page *page;
 
-	if (I915_SELFTEST_ONLY(should_fail(&dev_priv->vm_fault, 1)))
-		i915_gem_shrink_all(dev_priv);
+	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
+		i915_gem_shrink_all(vm->i915);
 
-	p->page = alloc_page(flags);
-	if (!p->page)
-		return -ENOMEM;
+	if (vm->free_pages.nr)
+		return vm->free_pages.pages[--vm->free_pages.nr];
 
-	p->daddr = dma_map_page(kdev,
-				p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	page = alloc_page(gfp);
+	if (!page)
+		return NULL;
 
-	if (dma_mapping_error(kdev, p->daddr)) {
-		__free_page(p->page);
-		return -EINVAL;
+	if (vm->pt_kmap_wc)
+		set_pages_array_wc(&page, 1);
+
+	return page;
+}
+
+static void vm_free_pages_release(struct i915_address_space *vm)
+{
+	GEM_BUG_ON(!pagevec_count(&vm->free_pages));
+
+	if (vm->pt_kmap_wc)
+		set_pages_array_wb(vm->free_pages.pages,
+				   pagevec_count(&vm->free_pages));
+
+	__pagevec_release(&vm->free_pages);
+}
+
+static void vm_free_page(struct i915_address_space *vm, struct page *page)
+{
+	if (!pagevec_add(&vm->free_pages, page))
+		vm_free_pages_release(vm);
+}
+
+static int __setup_page_dma(struct i915_address_space *vm,
+			    struct i915_page_dma *p,
+			    gfp_t gfp)
+{
+	p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
+	if (unlikely(!p->page))
+		return -ENOMEM;
+
+	p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
+				PCI_DMA_BIDIRECTIONAL);
+	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
+		vm_free_page(vm, p->page);
+		return -ENOMEM;
 	}
 
 	return 0;
 }
 
-static int setup_page_dma(struct drm_i915_private *dev_priv,
+static int setup_page_dma(struct i915_address_space *vm,
 			  struct i915_page_dma *p)
 {
-	return __setup_page_dma(dev_priv, p, I915_GFP_DMA);
+	return __setup_page_dma(vm, p, I915_GFP_DMA);
 }
 
-static void cleanup_page_dma(struct drm_i915_private *dev_priv,
+static void cleanup_page_dma(struct i915_address_space *vm,
 			     struct i915_page_dma *p)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-
-	if (WARN_ON(!p->page))
-		return;
-
-	dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	__free_page(p->page);
-	memset(p, 0, sizeof(*p));
+	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	vm_free_page(vm, p->page);
 }
 
 static void *kmap_page_dma(struct i915_page_dma *p)
@@ -393,67 +419,54 @@ static void *kmap_page_dma(struct i915_page_dma *p)
 /* We use the flushing unmap only with ppgtt structures:
  * page directories, page tables and scratch pages.
  */
-static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr)
+static void kunmap_page_dma(void *vaddr)
 {
-	/* There are only few exceptions for gen >=6. chv and bxt.
-	 * And we are not sure about the latter so play safe for now.
-	 */
-	if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
-		drm_clflush_virt_range(vaddr, PAGE_SIZE);
-
 	kunmap_atomic(vaddr);
 }
 
 #define kmap_px(px) kmap_page_dma(px_base(px))
-#define kunmap_px(ppgtt, vaddr) \
-		kunmap_page_dma((ppgtt)->base.i915, (vaddr))
+#define kunmap_px(vaddr) kunmap_page_dma((vaddr))
 
-#define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px))
-#define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px))
-#define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v))
-#define fill32_px(dev_priv, px, v) \
-		fill_page_dma_32((dev_priv), px_base(px), (v))
+#define setup_px(vm, px) setup_page_dma((vm), px_base(px))
+#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
+#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
+#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
 
-static void fill_page_dma(struct drm_i915_private *dev_priv,
-			  struct i915_page_dma *p, const uint64_t val)
+static void fill_page_dma(struct i915_address_space *vm,
+			  struct i915_page_dma *p,
+			  const u64 val)
 {
+	u64 * const vaddr = kmap_page_dma(p);
 	int i;
-	uint64_t * const vaddr = kmap_page_dma(p);
 
 	for (i = 0; i < 512; i++)
 		vaddr[i] = val;
 
-	kunmap_page_dma(dev_priv, vaddr);
+	kunmap_page_dma(vaddr);
 }
 
-static void fill_page_dma_32(struct drm_i915_private *dev_priv,
-			     struct i915_page_dma *p, const uint32_t val32)
+static void fill_page_dma_32(struct i915_address_space *vm,
+			     struct i915_page_dma *p,
+			     const u32 v)
 {
-	uint64_t v = val32;
-
-	v = v << 32 | val32;
-
-	fill_page_dma(dev_priv, p, v);
+	fill_page_dma(vm, p, (u64)v << 32 | v);
 }
 
 static int
-setup_scratch_page(struct drm_i915_private *dev_priv,
-		   struct i915_page_dma *scratch,
-		   gfp_t gfp)
+setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 {
-	return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO);
+	return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO);
 }
 
-static void cleanup_scratch_page(struct drm_i915_private *dev_priv,
-				 struct i915_page_dma *scratch)
+static void cleanup_scratch_page(struct i915_address_space *vm)
 {
-	cleanup_page_dma(dev_priv, scratch);
+	cleanup_page_dma(vm, &vm->scratch_page);
 }
 
-static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
+static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 {
 	struct i915_page_table *pt;
-	const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES;
+	const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES;
 	int ret = -ENOMEM;
 
 	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
@@ -466,7 +479,7 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
 	if (!pt->used_ptes)
 		goto fail_bitmap;
 
-	ret = setup_px(dev_priv, pt);
+	ret = setup_px(vm, pt);
 	if (ret)
 		goto fail_page_m;
 
@@ -480,10 +493,9 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
 	return ERR_PTR(ret);
 }
 
-static void free_pt(struct drm_i915_private *dev_priv,
-		    struct i915_page_table *pt)
+static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
 {
-	cleanup_px(dev_priv, pt);
+	cleanup_px(vm, pt);
 	kfree(pt->used_ptes);
 	kfree(pt);
 }
@@ -496,7 +508,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
 	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
 				      I915_CACHE_LLC);
 
-	fill_px(vm->i915, pt, scratch_pte);
+	fill_px(vm, pt, scratch_pte);
 }
 
 static void gen6_initialize_pt(struct i915_address_space *vm,
@@ -509,10 +521,10 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
 	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
 				     I915_CACHE_LLC, 0);
 
-	fill32_px(vm->i915, pt, scratch_pte);
+	fill32_px(vm, pt, scratch_pte);
 }
 
-static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
+static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 {
 	struct i915_page_directory *pd;
 	int ret = -ENOMEM;
@@ -526,7 +538,7 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
 	if (!pd->used_pdes)
 		goto fail_bitmap;
 
-	ret = setup_px(dev_priv, pd);
+	ret = setup_px(vm, pd);
 	if (ret)
 		goto fail_page_m;
 
@@ -540,11 +552,11 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
 	return ERR_PTR(ret);
 }
 
-static void free_pd(struct drm_i915_private *dev_priv,
+static void free_pd(struct i915_address_space *vm,
 		    struct i915_page_directory *pd)
 {
 	if (px_page(pd)) {
-		cleanup_px(dev_priv, pd);
+		cleanup_px(vm, pd);
 		kfree(pd->used_pdes);
 		kfree(pd);
 	}
@@ -557,7 +569,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
 
 	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
 
-	fill_px(vm->i915, pd, scratch_pde);
+	fill_px(vm, pd, scratch_pde);
 }
 
 static int __pdp_init(struct drm_i915_private *dev_priv,
@@ -591,23 +603,23 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp)
 	pdp->page_directory = NULL;
 }
 
-static struct
-i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv)
+static struct i915_page_directory_pointer *
+alloc_pdp(struct i915_address_space *vm)
 {
 	struct i915_page_directory_pointer *pdp;
 	int ret = -ENOMEM;
 
-	WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv));
+	WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
 
 	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
 	if (!pdp)
 		return ERR_PTR(-ENOMEM);
 
-	ret = __pdp_init(dev_priv, pdp);
+	ret = __pdp_init(vm->i915, pdp);
 	if (ret)
 		goto fail_bitmap;
 
-	ret = setup_px(dev_priv, pdp);
+	ret = setup_px(vm, pdp);
 	if (ret)
 		goto fail_page_m;
 
@@ -621,12 +633,12 @@ i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv)
 	return ERR_PTR(ret);
 }
 
-static void free_pdp(struct drm_i915_private *dev_priv,
+static void free_pdp(struct i915_address_space *vm,
 		     struct i915_page_directory_pointer *pdp)
 {
 	__pdp_fini(pdp);
-	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
-		cleanup_px(dev_priv, pdp);
+	if (USES_FULL_48BIT_PPGTT(vm->i915)) {
+		cleanup_px(vm, pdp);
 		kfree(pdp);
 	}
 }
@@ -638,7 +650,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
 
 	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
 
-	fill_px(vm->i915, pdp, scratch_pdpe);
+	fill_px(vm, pdp, scratch_pdpe);
 }
 
 static void gen8_initialize_pml4(struct i915_address_space *vm,
@@ -649,7 +661,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
 	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
 					  I915_CACHE_LLC);
 
-	fill_px(vm->i915, pml4, scratch_pml4e);
+	fill_px(vm, pml4, scratch_pml4e);
 }
 
 static void
@@ -665,20 +677,18 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt,
 
 	page_directorypo = kmap_px(pdp);
 	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
-	kunmap_px(ppgtt, page_directorypo);
+	kunmap_px(page_directorypo);
 }
 
 static void
-gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt,
-		 struct i915_pml4 *pml4,
+gen8_setup_pml4e(struct i915_pml4 *pml4,
 		 struct i915_page_directory_pointer *pdp,
 		 int index)
 {
 	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
 
-	WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev)));
 	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
-	kunmap_px(ppgtt, pagemap);
+	kunmap_px(pagemap);
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
@@ -747,7 +757,6 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 				uint64_t start,
 				uint64_t length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	unsigned int num_entries = gen8_pte_count(start, length);
 	unsigned int pte = gen8_pte_index(start);
 	unsigned int pte_end = pte + num_entries;
@@ -769,7 +778,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	vaddr = kmap_px(pt);
 	while (pte < pte_end)
 		vaddr[pte++] = scratch_pte;
-	kunmap_px(ppgtt, vaddr);
+	kunmap_px(vaddr);
 
 	return false;
 }
@@ -782,7 +791,6 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 				uint64_t start,
 				uint64_t length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_table *pt;
 	uint64_t pde;
 	gen8_pde_t *pde_vaddr;
@@ -797,8 +805,8 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 			__clear_bit(pde, pd->used_pdes);
 			pde_vaddr = kmap_px(pd);
 			pde_vaddr[pde] = scratch_pde;
-			kunmap_px(ppgtt, pde_vaddr);
-			free_pt(vm->i915, pt);
+			kunmap_px(pde_vaddr);
+			free_pt(vm, pt);
 		}
 	}
 
@@ -827,7 +835,7 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
 			__clear_bit(pdpe, pdp->used_pdpes);
 			gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);
-			free_pd(vm->i915, pd);
+			free_pd(vm, pd);
 		}
 	}
 
@@ -848,7 +856,6 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
 				  uint64_t start,
 				  uint64_t length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory_pointer *pdp;
 	uint64_t pml4e;
 
@@ -860,8 +867,8 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
 
 		if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
 			__clear_bit(pml4e, pml4->used_pml4es);
-			gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e);
-			free_pdp(vm->i915, pdp);
+			gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e);
+			free_pdp(vm, pdp);
 		}
 	}
 }
@@ -919,12 +926,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 				pde = 0;
 			}
 
-			kunmap_px(ppgtt, vaddr);
+			kunmap_px(vaddr);
 			vaddr = kmap_px(pd->page_table[pde]);
 			pte = 0;
 		}
 	} while (1);
-	kunmap_px(ppgtt, vaddr);
+	kunmap_px(vaddr);
 
 	return ret;
 }
@@ -966,7 +973,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
 		;
 }
 
-static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
+static void gen8_free_page_tables(struct i915_address_space *vm,
 				  struct i915_page_directory *pd)
 {
 	int i;
@@ -978,34 +985,33 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
 		if (WARN_ON(!pd->page_table[i]))
 			continue;
 
-		free_pt(dev_priv, pd->page_table[i]);
+		free_pt(vm, pd->page_table[i]);
 		pd->page_table[i] = NULL;
 	}
 }
 
 static int gen8_init_scratch(struct i915_address_space *vm)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	int ret;
 
-	ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
+	ret = setup_scratch_page(vm, I915_GFP_DMA);
 	if (ret)
 		return ret;
 
-	vm->scratch_pt = alloc_pt(dev_priv);
+	vm->scratch_pt = alloc_pt(vm);
 	if (IS_ERR(vm->scratch_pt)) {
 		ret = PTR_ERR(vm->scratch_pt);
 		goto free_scratch_page;
 	}
 
-	vm->scratch_pd = alloc_pd(dev_priv);
+	vm->scratch_pd = alloc_pd(vm);
 	if (IS_ERR(vm->scratch_pd)) {
 		ret = PTR_ERR(vm->scratch_pd);
 		goto free_pt;
 	}
 
-	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
-		vm->scratch_pdp = alloc_pdp(dev_priv);
+	if (USES_FULL_48BIT_PPGTT(dev)) {
+		vm->scratch_pdp = alloc_pdp(vm);
 		if (IS_ERR(vm->scratch_pdp)) {
 			ret = PTR_ERR(vm->scratch_pdp);
 			goto free_pd;
@@ -1020,11 +1026,11 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 	return 0;
 
 free_pd:
-	free_pd(dev_priv, vm->scratch_pd);
+	free_pd(vm, vm->scratch_pd);
 free_pt:
-	free_pt(dev_priv, vm->scratch_pt);
+	free_pt(vm, vm->scratch_pt);
 free_scratch_page:
-	cleanup_scratch_page(dev_priv, &vm->scratch_page);
+	cleanup_scratch_page(vm);
 
 	return ret;
 }
@@ -1062,44 +1068,41 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
 
 static void gen8_free_scratch(struct i915_address_space *vm)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
-
-	if (USES_FULL_48BIT_PPGTT(dev_priv))
-		free_pdp(dev_priv, vm->scratch_pdp);
-	free_pd(dev_priv, vm->scratch_pd);
-	free_pt(dev_priv, vm->scratch_pt);
-	cleanup_scratch_page(dev_priv, &vm->scratch_page);
+	if (USES_FULL_48BIT_PPGTT(vm->i915))
+		free_pdp(vm, vm->scratch_pdp);
+	free_pd(vm, vm->scratch_pd);
+	free_pt(vm, vm->scratch_pt);
+	cleanup_scratch_page(vm);
 }
 
-static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv,
+static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
 				    struct i915_page_directory_pointer *pdp)
 {
 	int i;
 
-	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) {
+	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
 		if (WARN_ON(!pdp->page_directory[i]))
 			continue;
 
-		gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
-		free_pd(dev_priv, pdp->page_directory[i]);
+		gen8_free_page_tables(vm, pdp->page_directory[i]);
+		free_pd(vm, pdp->page_directory[i]);
 	}
 
-	free_pdp(dev_priv, pdp);
+	free_pdp(vm, pdp);
 }
 
 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
 {
-	struct drm_i915_private *dev_priv = ppgtt->base.i915;
 	int i;
 
 	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
 		if (WARN_ON(!ppgtt->pml4.pdps[i]))
 			continue;
 
-		gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]);
+		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
 	}
 
-	cleanup_px(dev_priv, &ppgtt->pml4);
+	cleanup_px(&ppgtt->base, &ppgtt->pml4);
 }
 
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
@@ -1110,8 +1113,8 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 	if (intel_vgpu_active(dev_priv))
 		gen8_ppgtt_notify_vgt(ppgtt, false);
 
-	if (!USES_FULL_48BIT_PPGTT(dev_priv))
-		gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp);
+	if (!USES_FULL_48BIT_PPGTT(vm->i915))
+		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
 	else
 		gen8_ppgtt_cleanup_4lvl(ppgtt);
 
@@ -1142,7 +1145,6 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 				     uint64_t length,
 				     unsigned long *new_pts)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_page_table *pt;
 	uint32_t pde;
 
@@ -1154,7 +1156,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 			continue;
 		}
 
-		pt = alloc_pt(dev_priv);
+		pt = alloc_pt(vm);
 		if (IS_ERR(pt))
 			goto unwind_out;
 
@@ -1168,7 +1170,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 
 unwind_out:
 	for_each_set_bit(pde, new_pts, I915_PDES)
-		free_pt(dev_priv, pd->page_table[pde]);
+		free_pt(vm, pd->page_table[pde]);
 
 	return -ENOMEM;
 }
@@ -1203,7 +1205,6 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
 				  uint64_t length,
 				  unsigned long *new_pds)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_page_directory *pd;
 	uint32_t pdpe;
 	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
@@ -1214,7 +1215,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
 		if (test_bit(pdpe, pdp->used_pdpes))
 			continue;
 
-		pd = alloc_pd(dev_priv);
+		pd = alloc_pd(vm);
 		if (IS_ERR(pd))
 			goto unwind_out;
 
@@ -1228,7 +1229,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
 
 unwind_out:
 	for_each_set_bit(pdpe, new_pds, pdpes)
-		free_pd(dev_priv, pdp->page_directory[pdpe]);
+		free_pd(vm, pdp->page_directory[pdpe]);
 
 	return -ENOMEM;
 }
@@ -1256,7 +1257,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 				  uint64_t length,
 				  unsigned long *new_pdps)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_page_directory_pointer *pdp;
 	uint32_t pml4e;
 
@@ -1264,7 +1264,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 		if (!test_bit(pml4e, pml4->used_pml4es)) {
-			pdp = alloc_pdp(dev_priv);
+			pdp = alloc_pdp(vm);
 			if (IS_ERR(pdp))
 				goto unwind_out;
 
@@ -1282,7 +1282,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 
 unwind_out:
 	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
-		free_pdp(dev_priv, pml4->pdps[pml4e]);
+		free_pdp(vm, pml4->pdps[pml4e]);
 
 	return -ENOMEM;
 }
@@ -1331,7 +1331,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	unsigned long *new_page_dirs, *new_page_tables;
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_page_directory *pd;
 	const uint64_t orig_start = start;
 	const uint64_t orig_length = length;
@@ -1400,7 +1399,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 			 * point we're still relying on insert_entries() */
 		}
 
-		kunmap_px(ppgtt, page_directory);
+		kunmap_px(page_directory);
 		__set_bit(pdpe, pdp->used_pdpes);
 		gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
 	}
@@ -1415,12 +1414,11 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 
 		for_each_set_bit(temp, new_page_tables + pdpe *
 				BITS_TO_LONGS(I915_PDES), I915_PDES)
-			free_pt(dev_priv,
-				pdp->page_directory[pdpe]->page_table[temp]);
+			free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]);
 	}
 
 	for_each_set_bit(pdpe, new_page_dirs, pdpes)
-		free_pd(dev_priv, pdp->page_directory[pdpe]);
+		free_pd(vm, pdp->page_directory[pdpe]);
 
 	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
 	mark_tlbs_dirty(ppgtt);
@@ -1433,7 +1431,6 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 				    uint64_t length)
 {
 	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory_pointer *pdp;
 	uint64_t pml4e;
 	int ret = 0;
@@ -1457,7 +1454,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 		if (ret)
 			goto err_out;
 
-		gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e);
+		gen8_setup_pml4e(pml4, pdp, pml4e);
 	}
 
 	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
@@ -1467,7 +1464,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 
 err_out:
 	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
-		gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]);
+		gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]);
 
 	return ret;
 }
@@ -1483,7 +1480,8 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
 		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
 }
 
-static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
+static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
+			  struct i915_page_directory_pointer *pdp,
 			  uint64_t start, uint64_t length,
 			  gen8_pte_t scratch_pte,
 			  struct seq_file *m)
@@ -1549,7 +1547,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 						 I915_CACHE_LLC);
 
 	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
-		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
+		gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
 	} else {
 		uint64_t pml4e;
 		struct i915_pml4 *pml4 = &ppgtt->pml4;
@@ -1560,7 +1558,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 				continue;
 
 			seq_printf(m, "    PML4E #%llu\n", pml4e);
-			gen8_dump_pdp(pdp, start, length, scratch_pte, m);
+			gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
 		}
 	}
 }
@@ -1616,8 +1614,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
 	ppgtt->debug_dump = gen8_dump_ppgtt;
 
+	/* There are only few exceptions for gen >=6. chv and bxt.
+	 * And we are not sure about the latter so play safe for now.
+	 */
+	if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
+		ppgtt->base.pt_kmap_wc = true;
+
 	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
-		ret = setup_px(dev_priv, &ppgtt->pml4);
+		ret = setup_px(&ppgtt->base, &ppgtt->pml4);
 		if (ret)
 			goto free_scratch;
 
@@ -1706,7 +1710,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 			}
 			seq_puts(m, "\n");
 		}
-		kunmap_px(ppgtt, pt_vaddr);
+		kunmap_px(pt_vaddr);
 	}
 }
 
@@ -1903,7 +1907,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 		for (i = first_pte; i < last_pte; i++)
 			pt_vaddr[i] = scratch_pte;
 
-		kunmap_px(ppgtt, pt_vaddr);
+		kunmap_px(pt_vaddr);
 
 		num_entries -= last_pte - first_pte;
 		first_pte = 0;
@@ -1942,12 +1946,12 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		}
 
 		if (++act_pte == GEN6_PTES) {
-			kunmap_px(ppgtt, vaddr);
+			kunmap_px(vaddr);
 			vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
 			act_pte = 0;
 		}
 	} while (1);
-	kunmap_px(ppgtt, vaddr);
+	kunmap_px(vaddr);
 }
 
 static int gen6_alloc_va_range(struct i915_address_space *vm,
@@ -1981,7 +1985,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 		/* We've already allocated a page table */
 		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
 
-		pt = alloc_pt(dev_priv);
+		pt = alloc_pt(vm);
 		if (IS_ERR(pt)) {
 			ret = PTR_ERR(pt);
 			goto unwind_out;
@@ -2029,7 +2033,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
 
 		ppgtt->pd.page_table[pde] = vm->scratch_pt;
-		free_pt(dev_priv, pt);
+		free_pt(vm, pt);
 	}
 
 	mark_tlbs_dirty(ppgtt);
@@ -2038,16 +2042,15 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 
 static int gen6_init_scratch(struct i915_address_space *vm)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
 	int ret;
 
-	ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA);
+	ret = setup_scratch_page(vm, I915_GFP_DMA);
 	if (ret)
 		return ret;
 
-	vm->scratch_pt = alloc_pt(dev_priv);
+	vm->scratch_pt = alloc_pt(vm);
 	if (IS_ERR(vm->scratch_pt)) {
-		cleanup_scratch_page(dev_priv, &vm->scratch_page);
+		cleanup_scratch_page(vm);
 		return PTR_ERR(vm->scratch_pt);
 	}
 
@@ -2058,17 +2061,14 @@ static int gen6_init_scratch(struct i915_address_space *vm)
 
 static void gen6_free_scratch(struct i915_address_space *vm)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
-
-	free_pt(dev_priv, vm->scratch_pt);
-	cleanup_scratch_page(dev_priv, &vm->scratch_page);
+	free_pt(vm, vm->scratch_pt);
+	cleanup_scratch_page(vm);
 }
 
 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory *pd = &ppgtt->pd;
-	struct drm_i915_private *dev_priv = vm->i915;
 	struct i915_page_table *pt;
 	uint32_t pde;
 
@@ -2076,7 +2076,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 
 	gen6_for_all_pdes(pt, pd, pde)
 		if (pt != vm->scratch_pt)
-			free_pt(dev_priv, pt);
+			free_pt(vm, pt);
 
 	gen6_free_scratch(vm);
 }
@@ -2185,6 +2185,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
 			   struct drm_i915_private *dev_priv)
 {
 	ppgtt->base.i915 = dev_priv;
+	ppgtt->base.dma = &dev_priv->drm.pdev->dev;
 
 	if (INTEL_INFO(dev_priv)->gen < 8)
 		return gen6_ppgtt_init(ppgtt);
@@ -2206,10 +2207,14 @@ static void i915_address_space_init(struct i915_address_space *vm,
 	INIT_LIST_HEAD(&vm->unbound_list);
 
 	list_add_tail(&vm->global_link, &dev_priv->vm_list);
+	pagevec_init(&vm->free_pages, false);
 }
 
 static void i915_address_space_fini(struct i915_address_space *vm)
 {
+	if (pagevec_count(&vm->free_pages))
+		vm_free_pages_release(vm);
+
 	i915_gem_timeline_fini(&vm->timeline);
 	drm_mm_takedown(&vm->mm);
 	list_del(&vm->global_link);
@@ -2317,9 +2322,8 @@ void i915_ppgtt_release(struct kref *kref)
 	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
 	WARN_ON(!list_empty(&ppgtt->base.unbound_list));
 
-	i915_address_space_fini(&ppgtt->base);
-
 	ppgtt->base.cleanup(&ppgtt->base);
+	i915_address_space_fini(&ppgtt->base);
 	kfree(ppgtt);
 }
 
@@ -2986,7 +2990,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 		return -ENOMEM;
 	}
 
-	ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32);
+	ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
 	if (ret) {
 		DRM_ERROR("Scratch setup failed\n");
 		/* iounmap will also get called at remove, but meh */
@@ -3075,7 +3079,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 
 	iounmap(ggtt->gsm);
-	cleanup_scratch_page(vm->i915, &vm->scratch_page);
+	cleanup_scratch_page(vm);
 }
 
 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
@@ -3226,6 +3230,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
 	int ret;
 
 	ggtt->base.i915 = dev_priv;
+	ggtt->base.dma = &dev_priv->drm.pdev->dev;
 
 	if (INTEL_GEN(dev_priv) <= 5)
 		ret = i915_gmch_probe(ggtt);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index fe922059a412..6162bedc0811 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -36,9 +36,11 @@
 
 #include <linux/io-mapping.h>
 #include <linux/mm.h>
+#include <linux/pagevec.h>
 
 #include "i915_gem_timeline.h"
 #include "i915_gem_request.h"
+#include "i915_selftest.h"
 
 #define I915_GTT_PAGE_SIZE 4096UL
 #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE
@@ -247,6 +249,7 @@ struct i915_address_space {
 	struct drm_mm mm;
 	struct i915_gem_timeline timeline;
 	struct drm_i915_private *i915;
+	struct device *dma;
 	/* Every address space belongs to a struct file - except for the global
 	 * GTT that is owned by the driver (and so @file is set to NULL). In
 	 * principle, no information should leak from one context to another
@@ -297,6 +300,9 @@ struct i915_address_space {
 	 */
 	struct list_head unbound_list;
 
+	struct pagevec free_pages;
+	bool pt_kmap_wc;
+
 	/* FIXME: Need a more generic return type */
 	gen6_pte_t (*pte_encode)(dma_addr_t addr,
 				 enum i915_cache_level level,
@@ -326,6 +332,8 @@ struct i915_address_space {
 	int (*bind_vma)(struct i915_vma *vma,
 			enum i915_cache_level cache_level,
 			u32 flags);
+
+	I915_SELFTEST_DECLARE(struct fault_attr fault_attr);
 };
 
 #define i915_is_ggtt(V) (!(V)->file)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 8e3db5a432ff..9a449ef75089 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -759,17 +759,17 @@ static int shrink_hole(struct drm_i915_private *i915,
 	unsigned long prime;
 	int err;
 
-	i915->vm_fault.probability = 999;
-	atomic_set(&i915->vm_fault.times, -1);
+	vm->fault_attr.probability = 999;
+	atomic_set(&vm->fault_attr.times, -1);
 
 	for_each_prime_number_from(prime, 0, ULONG_MAX - 1) {
-		i915->vm_fault.interval = prime;
+		vm->fault_attr.interval = prime;
 		err = __shrink_hole(i915, vm, hole_start, hole_end, end_time);
 		if (err)
 			break;
 	}
 
-	memset(&i915->vm_fault, 0, sizeof(i915->vm_fault));
+	memset(&vm->fault_attr, 0, sizeof(vm->fault_attr));
 
 	return err;
 }
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 07/22] drm/i915: Remove kmap/kunmap wrappers
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (4 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 06/22] drm/i915: Convert clflushed pagetables over to WC maps Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 08/22] drm/i915: Move allocate_va_range to GTT Chris Wilson
                   ` (15 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

As these are now both plain and simple kmap_atomic/kunmap_atomic pairs,
we can remove the wrappers for a small gain of clarity (in particular,
not hiding the atomic critical sections!).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 69 ++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9465a3a93bad..542ceb8da602 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -411,21 +411,7 @@ static void cleanup_page_dma(struct i915_address_space *vm,
 	vm_free_page(vm, p->page);
 }
 
-static void *kmap_page_dma(struct i915_page_dma *p)
-{
-	return kmap_atomic(p->page);
-}
-
-/* We use the flushing unmap only with ppgtt structures:
- * page directories, page tables and scratch pages.
- */
-static void kunmap_page_dma(void *vaddr)
-{
-	kunmap_atomic(vaddr);
-}
-
-#define kmap_px(px) kmap_page_dma(px_base(px))
-#define kunmap_px(vaddr) kunmap_page_dma((vaddr))
+#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
 
 #define setup_px(vm, px) setup_page_dma((vm), px_base(px))
 #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
@@ -436,13 +422,13 @@ static void fill_page_dma(struct i915_address_space *vm,
 			  struct i915_page_dma *p,
 			  const u64 val)
 {
-	u64 * const vaddr = kmap_page_dma(p);
+	u64 * const vaddr = kmap_atomic(p->page);
 	int i;
 
 	for (i = 0; i < 512; i++)
 		vaddr[i] = val;
 
-	kunmap_page_dma(vaddr);
+	kunmap_atomic(vaddr);
 }
 
 static void fill_page_dma_32(struct i915_address_space *vm,
@@ -675,9 +661,9 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt,
 	if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev)))
 		return;
 
-	page_directorypo = kmap_px(pdp);
+	page_directorypo = kmap_atomic_px(pdp);
 	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
-	kunmap_px(page_directorypo);
+	kunmap_atomic(page_directorypo);
 }
 
 static void
@@ -685,10 +671,10 @@ gen8_setup_pml4e(struct i915_pml4 *pml4,
 		 struct i915_page_directory_pointer *pdp,
 		 int index)
 {
-	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
+	gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4);
 
 	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
-	kunmap_px(pagemap);
+	kunmap_atomic(pagemap);
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
@@ -775,10 +761,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 			return true;
 	}
 
-	vaddr = kmap_px(pt);
+	vaddr = kmap_atomic_px(pt);
 	while (pte < pte_end)
 		vaddr[pte++] = scratch_pte;
-	kunmap_px(vaddr);
+	kunmap_atomic(vaddr);
 
 	return false;
 }
@@ -803,9 +789,9 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 
 		if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
 			__clear_bit(pde, pd->used_pdes);
-			pde_vaddr = kmap_px(pd);
+			pde_vaddr = kmap_atomic_px(pd);
 			pde_vaddr[pde] = scratch_pde;
-			kunmap_px(pde_vaddr);
+			kunmap_atomic(pde_vaddr);
 			free_pt(vm, pt);
 		}
 	}
@@ -905,7 +891,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 	bool ret = true;
 
 	pd = pdp->page_directory[pdpe];
-	vaddr = kmap_px(pd->page_table[pde]);
+	vaddr = kmap_atomic_px(pd->page_table[pde]);
 	do {
 		vaddr[pte] = pte_encode | iter->dma;
 		iter->dma += PAGE_SIZE;
@@ -926,12 +912,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 				pde = 0;
 			}
 
-			kunmap_px(vaddr);
-			vaddr = kmap_px(pd->page_table[pde]);
+			kunmap_atomic(vaddr);
+			vaddr = kmap_atomic_px(pd->page_table[pde]);
 			pte = 0;
 		}
 	} while (1);
-	kunmap_px(vaddr);
+	kunmap_atomic(vaddr);
 
 	return ret;
 }
@@ -1364,7 +1350,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	/* Allocations have completed successfully, so set the bitmaps, and do
 	 * the mappings. */
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		gen8_pde_t *const page_directory = kmap_px(pd);
+		gen8_pde_t *const page_directory = kmap_atomic_px(pd);
 		struct i915_page_table *pt;
 		uint64_t pd_len = length;
 		uint64_t pd_start = start;
@@ -1399,7 +1385,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 			 * point we're still relying on insert_entries() */
 		}
 
-		kunmap_px(page_directory);
+		kunmap_atomic(page_directory);
 		__set_bit(pdpe, pdp->used_pdpes);
 		gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
 	}
@@ -1506,7 +1492,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
 			if (!test_bit(pde, pd->used_pdes))
 				continue;
 
-			pt_vaddr = kmap_px(pt);
+			pt_vaddr = kmap_atomic_px(pt);
 			for (pte = 0; pte < GEN8_PTES; pte += 4) {
 				uint64_t va =
 					(pdpe << GEN8_PDPE_SHIFT) |
@@ -1530,9 +1516,6 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
 				}
 				seq_puts(m, "\n");
 			}
-			/* don't use kunmap_px, it could trigger
-			 * an unnecessary flush.
-			 */
 			kunmap_atomic(pt_vaddr);
 		}
 	}
@@ -1687,7 +1670,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 				   expected);
 		seq_printf(m, "\tPDE: %x\n", pd_entry);
 
-		pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
+		pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]);
 
 		for (pte = 0; pte < GEN6_PTES; pte+=4) {
 			unsigned long va =
@@ -1710,7 +1693,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 			}
 			seq_puts(m, "\n");
 		}
-		kunmap_px(pt_vaddr);
+		kunmap_atomic(pt_vaddr);
 	}
 }
 
@@ -1902,12 +1885,12 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 		if (last_pte > GEN6_PTES)
 			last_pte = GEN6_PTES;
 
-		pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
+		pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
 
 		for (i = first_pte; i < last_pte; i++)
 			pt_vaddr[i] = scratch_pte;
 
-		kunmap_px(pt_vaddr);
+		kunmap_atomic(pt_vaddr);
 
 		num_entries -= last_pte - first_pte;
 		first_pte = 0;
@@ -1928,7 +1911,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct sgt_dma iter;
 	gen6_pte_t *vaddr;
 
-	vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
+	vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
 	iter.sg = pages->sgl;
 	iter.dma = sg_dma_address(iter.sg);
 	iter.max = iter.dma + iter.sg->length;
@@ -1946,12 +1929,12 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		}
 
 		if (++act_pte == GEN6_PTES) {
-			kunmap_px(vaddr);
-			vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
+			kunmap_atomic(vaddr);
+			vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
 			act_pte = 0;
 		}
 	} while (1);
-	kunmap_px(vaddr);
+	kunmap_atomic(vaddr);
 }
 
 static int gen6_alloc_va_range(struct i915_address_space *vm,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 08/22] drm/i915: Move allocate_va_range to GTT
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (5 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 07/22] drm/i915: Remove kmap/kunmap wrappers Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 09/22] drm/i915: Always preallocate gen6/7 ppgtt Chris Wilson
                   ` (14 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

In the future, we need to call allocate_va_range on the aliasing-ppgtt
which means moving the call down from the vma into the vm (which is
more appropriate for calling the vm function).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++++++++++++++++------------
 drivers/gpu/drm/i915/i915_vma.c     |  9 ---------
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 542ceb8da602..1f887f1477c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -190,11 +190,18 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 			  enum i915_cache_level cache_level,
 			  u32 unused)
 {
-	u32 pte_flags = 0;
+	u32 pte_flags;
+	int ret;
+
+	trace_i915_va_alloc(vma);
+	ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size);
+	if (ret)
+		return ret;
 
 	vma->pages = vma->obj->mm.pages;
 
 	/* Currently applicable only to VLV */
+	pte_flags = 0;
 	if (vma->obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
@@ -206,9 +213,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 
 static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
-	vma->vm->clear_range(vma->vm,
-			     vma->node.start,
-			     vma->size);
+	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
 }
 
 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
@@ -2644,9 +2649,10 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 {
 	struct drm_i915_private *i915 = vma->vm->i915;
 	u32 pte_flags;
+	int ret;
 
 	if (unlikely(!vma->pages)) {
-		int ret = i915_get_ggtt_vma_pages(vma);
+		ret = i915_get_ggtt_vma_pages(vma);
 		if (ret)
 			return ret;
 	}
@@ -2656,6 +2662,22 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (vma->obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
+	if (flags & I915_VMA_LOCAL_BIND) {
+		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
+
+		if (appgtt->base.allocate_va_range) {
+			ret = appgtt->base.allocate_va_range(&appgtt->base,
+							     vma->node.start,
+							     vma->node.size);
+			if (ret)
+				return ret;
+		}
+
+		appgtt->base.insert_entries(&appgtt->base,
+					    vma->pages, vma->node.start,
+					    cache_level, pte_flags);
+	}
+
 	if (flags & I915_VMA_GLOBAL_BIND) {
 		intel_runtime_pm_get(i915);
 		vma->vm->insert_entries(vma->vm,
@@ -2664,13 +2686,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 		intel_runtime_pm_put(i915);
 	}
 
-	if (flags & I915_VMA_LOCAL_BIND) {
-		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
-		appgtt->base.insert_entries(&appgtt->base,
-					    vma->pages, vma->node.start,
-					    cache_level, pte_flags);
-	}
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 0dc994b76924..c1abfe7b48ea 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -263,15 +263,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 					vma->vm->total)))
 		return -ENODEV;
 
-	if (vma_flags == 0 && vma->vm->allocate_va_range) {
-		trace_i915_va_alloc(vma);
-		ret = vma->vm->allocate_va_range(vma->vm,
-						 vma->node.start,
-						 vma->node.size);
-		if (ret)
-			return ret;
-	}
-
 	trace_i915_vma_bind(vma, bind_flags);
 	ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
 	if (ret)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 09/22] drm/i915: Always preallocate gen6/7 ppgtt
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (6 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 08/22] drm/i915: Move allocate_va_range to GTT Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 10/22] drm/i915: Remove redundant clear of appgtt Chris Wilson
                   ` (13 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

The hardware does not cope very well with us changing the PD within an
active context (the context must be idle for it to re-read the PD). As
we only check whether the page is idle before changing the entry (and on
through the PD tree), we cannot reliably replace PD entries on
gen6/gen7. To fully avoid changing the tree at runtime, preallocate it
on init.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1f887f1477c9..9d3d06435262 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2097,6 +2097,12 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
 	if (ppgtt->node.start < ggtt->mappable_end)
 		DRM_DEBUG("Forced to use aperture for PDEs\n");
 
+	ppgtt->pd.base.ggtt_offset =
+		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
+
+	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
+		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
+
 	return 0;
 
 err_out:
@@ -2139,7 +2145,6 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	if (ret)
 		return ret;
 
-	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
@@ -2149,22 +2154,21 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
 	ppgtt->debug_dump = gen6_dump_ppgtt;
 
-	ppgtt->pd.base.ggtt_offset =
-		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
-
-	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
-		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
-
 	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
-
 	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
 
+	ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total);
+	if (ret) {
+		gen6_ppgtt_cleanup(&ppgtt->base);
+		return ret;
+	}
+
 	DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
 			 ppgtt->node.size >> 20,
 			 ppgtt->node.start / PAGE_SIZE);
 
-	DRM_DEBUG("Adding PPGTT at offset %x\n",
-		  ppgtt->pd.base.ggtt_offset << 10);
+	DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n",
+			 ppgtt->pd.base.ggtt_offset << 10);
 
 	return 0;
 }
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 10/22] drm/i915: Remove redundant clear of appgtt
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (7 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 09/22] drm/i915: Always preallocate gen6/7 ppgtt Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 11/22] drm/i915: Tidy gen6_write_pde() Chris Wilson
                   ` (12 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

Upon creation of the va range, it is initialised to point at scratch.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9d3d06435262..14e893cad375 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2756,7 +2756,6 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
-
 	if (ppgtt->base.allocate_va_range) {
 		err = ppgtt->base.allocate_va_range(&ppgtt->base,
 						    0, ppgtt->base.total);
@@ -2764,10 +2763,6 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
 			goto err_ppgtt;
 	}
 
-	ppgtt->base.clear_range(&ppgtt->base,
-				ppgtt->base.start,
-				ppgtt->base.total);
-
 	i915->mm.aliasing_ppgtt = ppgtt;
 
 	WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 11/22] drm/i915: Tidy gen6_write_pde()
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (8 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 10/22] drm/i915: Remove redundant clear of appgtt Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 12/22] drm/i915: Remove bitmap tracking for used-ptes Chris Wilson
                   ` (11 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

Stop passing around unused parameters makes the code more compact.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++++------------------------
 1 file changed, 14 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 14e893cad375..8b25c8b0c02f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1703,36 +1703,28 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 }
 
 /* Write pde (index) from the page directory @pd to the page table @pt */
-static void gen6_write_pde(struct i915_page_directory *pd,
-			    const int pde, struct i915_page_table *pt)
+static inline void gen6_write_pde(struct i915_hw_ppgtt *ppgtt,
+				  const unsigned int pde,
+				  struct i915_page_table *pt)
 {
 	/* Caller needs to make sure the write completes if necessary */
-	struct i915_hw_ppgtt *ppgtt =
-		container_of(pd, struct i915_hw_ppgtt, pd);
-	u32 pd_entry;
-
-	pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
-	pd_entry |= GEN6_PDE_VALID;
-
-	writel(pd_entry, ppgtt->pd_addr + pde);
+	writel(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
+	       ppgtt->pd_addr + pde);
 }
 
 /* Write all the page tables found in the ppgtt structure to incrementing page
  * directories. */
-static void gen6_write_page_range(struct drm_i915_private *dev_priv,
-				  struct i915_page_directory *pd,
+static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
 				  uint32_t start, uint32_t length)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct i915_page_table *pt;
-	uint32_t pde;
+	unsigned int pde;
 
-	gen6_for_each_pde(pt, pd, start, length, pde)
-		gen6_write_pde(pd, pde, pt);
+	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde)
+		gen6_write_pde(ppgtt, pde, pt);
+	wmb();
 
-	/* Make sure write is complete before other code can use this page
-	 * table. Also require for WC mapped PTEs */
-	readl(ggtt->gsm);
+	mark_tlbs_dirty(ppgtt);
 }
 
 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
@@ -1997,7 +1989,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 			   gen6_pte_count(start, length));
 
 		if (__test_and_clear_bit(pde, new_page_tables))
-			gen6_write_pde(&ppgtt->pd, pde, pt);
+			gen6_write_pde(ppgtt, pde, pt);
 
 		trace_i915_page_table_entry_map(vm, pde, pt,
 					 gen6_pte_index(start),
@@ -2155,7 +2147,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->debug_dump = gen6_dump_ppgtt;
 
 	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
-	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
+	gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
 
 	ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total);
 	if (ret) {
@@ -3389,8 +3381,6 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 		struct i915_address_space *vm;
 
 		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
-			/* TODO: Perhaps it shouldn't be gen6 specific */
-
 			struct i915_hw_ppgtt *ppgtt;
 
 			if (i915_is_ggtt(vm))
@@ -3398,8 +3388,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 			else
 				ppgtt = i915_vm_to_ppgtt(vm);
 
-			gen6_write_page_range(dev_priv, &ppgtt->pd,
-					      0, ppgtt->base.total);
+			gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
 		}
 	}
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 12/22] drm/i915: Remove bitmap tracking for used-ptes
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (9 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 11/22] drm/i915: Tidy gen6_write_pde() Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 13/22] drm/i915: Remove bitmap tracking for used-pdes Chris Wilson
                   ` (10 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

We only operate on known extents (both for alloc/clear) and so we can use
both the knowledge of the bind/unbind range along with the knowledge of
the existing pagetable to avoid having to allocate temporary and
auxiliary bitmaps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 334 ++++++++++++------------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   5 +-
 drivers/gpu/drm/i915/i915_trace.h   |  19 +-
 3 files changed, 119 insertions(+), 239 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8b25c8b0c02f..afacf722b8b1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -457,62 +457,38 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 {
 	struct i915_page_table *pt;
-	const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES;
-	int ret = -ENOMEM;
 
-	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
-	if (!pt)
+	pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN);
+	if (unlikely(!pt))
 		return ERR_PTR(-ENOMEM);
 
-	pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
-				GFP_KERNEL);
-
-	if (!pt->used_ptes)
-		goto fail_bitmap;
-
-	ret = setup_px(vm, pt);
-	if (ret)
-		goto fail_page_m;
+	if (unlikely(setup_px(vm, pt))) {
+		kfree(pt);
+		return ERR_PTR(-ENOMEM);
+	}
 
+	pt->used_ptes = 0;
 	return pt;
-
-fail_page_m:
-	kfree(pt->used_ptes);
-fail_bitmap:
-	kfree(pt);
-
-	return ERR_PTR(ret);
 }
 
 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
 {
 	cleanup_px(vm, pt);
-	kfree(pt->used_ptes);
 	kfree(pt);
 }
 
 static void gen8_initialize_pt(struct i915_address_space *vm,
 			       struct i915_page_table *pt)
 {
-	gen8_pte_t scratch_pte;
-
-	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-				      I915_CACHE_LLC);
-
-	fill_px(vm, pt, scratch_pte);
+	fill_px(vm, pt,
+		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC));
 }
 
 static void gen6_initialize_pt(struct i915_address_space *vm,
 			       struct i915_page_table *pt)
 {
-	gen6_pte_t scratch_pte;
-
-	WARN_ON(vm->scratch_page.daddr == 0);
-
-	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
-				     I915_CACHE_LLC, 0);
-
-	fill32_px(vm, pt, scratch_pte);
+	fill32_px(vm, pt,
+		  vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
 }
 
 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
@@ -556,11 +532,12 @@ static void free_pd(struct i915_address_space *vm,
 static void gen8_initialize_pd(struct i915_address_space *vm,
 			       struct i915_page_directory *pd)
 {
-	gen8_pde_t scratch_pde;
-
-	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
+	unsigned int i;
 
-	fill_px(vm, pd, scratch_pde);
+	fill_px(vm, pd,
+		gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
+	for (i = 0; i < I915_PDES; i++)
+		pd->page_table[i] = vm->scratch_pt;
 }
 
 static int __pdp_init(struct drm_i915_private *dev_priv,
@@ -745,8 +722,7 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
  */
 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 				struct i915_page_table *pt,
-				uint64_t start,
-				uint64_t length)
+				u64 start, u64 length)
 {
 	unsigned int num_entries = gen8_pte_count(start, length);
 	unsigned int pte = gen8_pte_index(start);
@@ -755,16 +731,11 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
 	gen8_pte_t *vaddr;
 
-	if (WARN_ON(!px_page(pt)))
-		return false;
-
-	GEM_BUG_ON(pte_end > GEN8_PTES);
+	GEM_BUG_ON(num_entries > pt->used_ptes);
 
-	bitmap_clear(pt->used_ptes, pte, num_entries);
-	if (USES_FULL_PPGTT(vm->i915)) {
-		if (bitmap_empty(pt->used_ptes, GEN8_PTES))
-			return true;
-	}
+	pt->used_ptes -= num_entries;
+	if (!pt->used_ptes)
+		return true;
 
 	vaddr = kmap_atomic_px(pt);
 	while (pte < pte_end)
@@ -774,31 +745,38 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	return false;
 }
 
+static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
+			       struct i915_page_directory *pd,
+			       struct i915_page_table *pt,
+			       unsigned int pde)
+{
+	gen8_pde_t *vaddr;
+
+	pd->page_table[pde] = pt;
+
+	vaddr = kmap_atomic_px(pd);
+	vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
+	kunmap_atomic(vaddr);
+}
+
 /* Removes entries from a single page dir, releasing it if it's empty.
  * Caller can use the return value to update higher-level entries
  */
 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 				struct i915_page_directory *pd,
-				uint64_t start,
-				uint64_t length)
+				u64 start, u64 length)
 {
 	struct i915_page_table *pt;
-	uint64_t pde;
-	gen8_pde_t *pde_vaddr;
-	gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
-						 I915_CACHE_LLC);
+	u32 pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
-		if (WARN_ON(!pd->page_table[pde]))
-			break;
+		if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
+			continue;
 
-		if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
-			__clear_bit(pde, pd->used_pdes);
-			pde_vaddr = kmap_atomic_px(pd);
-			pde_vaddr[pde] = scratch_pde;
-			kunmap_atomic(pde_vaddr);
-			free_pt(vm, pt);
-		}
+		gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
+		__clear_bit(pde, pd->used_pdes);
+
+		free_pt(vm, pt);
 	}
 
 	if (bitmap_empty(pd->used_pdes, I915_PDES))
@@ -1118,8 +1096,6 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  * @pd:	Page directory for this address range.
  * @start:	Starting virtual address to begin allocations.
  * @length:	Size of the allocations.
- * @new_pts:	Bitmap set by function with new allocations. Likely used by the
- *		caller to free on error.
  *
  * Allocate the required number of page tables. Extremely similar to
  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
@@ -1132,37 +1108,30 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  */
 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 				     struct i915_page_directory *pd,
-				     uint64_t start,
-				     uint64_t length,
-				     unsigned long *new_pts)
+				     u64 start, u64 length)
 {
 	struct i915_page_table *pt;
+	u64 from = start;
 	uint32_t pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
 		/* Don't reallocate page tables */
-		if (test_bit(pde, pd->used_pdes)) {
-			/* Scratch is never allocated this way */
-			WARN_ON(pt == vm->scratch_pt);
-			continue;
-		}
+		if (!test_bit(pde, pd->used_pdes)) {
+			pt = alloc_pt(vm);
+			if (IS_ERR(pt))
+				goto unwind;
 
-		pt = alloc_pt(vm);
-		if (IS_ERR(pt))
-			goto unwind_out;
-
-		gen8_initialize_pt(vm, pt);
-		pd->page_table[pde] = pt;
-		__set_bit(pde, new_pts);
+			gen8_initialize_pt(vm, pt);
+			pd->page_table[pde] = pt;
+		}
+		pt->used_ptes += gen8_pte_count(start, length);
 		trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
 	}
 
 	return 0;
 
-unwind_out:
-	for_each_set_bit(pde, new_pts, I915_PDES)
-		free_pt(vm, pd->page_table[pde]);
-
+unwind:
+	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
 	return -ENOMEM;
 }
 
@@ -1279,9 +1248,8 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 }
 
 static void
-free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
+free_gen8_temp_bitmaps(unsigned long *new_pds)
 {
-	kfree(new_pts);
 	kfree(new_pds);
 }
 
@@ -1290,29 +1258,16 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
  */
 static
 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
-					 unsigned long **new_pts,
 					 uint32_t pdpes)
 {
 	unsigned long *pds;
-	unsigned long *pts;
 
 	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
 	if (!pds)
 		return -ENOMEM;
 
-	pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
-		      GFP_TEMPORARY);
-	if (!pts)
-		goto err_out;
-
 	*new_pds = pds;
-	*new_pts = pts;
-
 	return 0;
-
-err_out:
-	free_gen8_temp_bitmaps(pds, pts);
-	return -ENOMEM;
 }
 
 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
@@ -1321,7 +1276,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 				    uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	unsigned long *new_page_dirs, *new_page_tables;
+	unsigned long *new_page_dirs;
 	struct i915_page_directory *pd;
 	const uint64_t orig_start = start;
 	const uint64_t orig_length = length;
@@ -1329,7 +1284,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
 	int ret;
 
-	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
+	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
 	if (ret)
 		return ret;
 
@@ -1337,14 +1292,13 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
 						new_page_dirs);
 	if (ret) {
-		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+		free_gen8_temp_bitmaps(new_page_dirs);
 		return ret;
 	}
 
 	/* For every page directory referenced, allocate page tables */
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
-						new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
+		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length);
 		if (ret)
 			goto err_out;
 	}
@@ -1370,11 +1324,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 			WARN_ON(!pd_len);
 			WARN_ON(!gen8_pte_count(pd_start, pd_len));
 
-			/* Set our used ptes within the page table */
-			bitmap_set(pt->used_ptes,
-				   gen8_pte_index(pd_start),
-				   gen8_pte_count(pd_start, pd_len));
-
 			/* Our pde is now pointing to the pagetable, pt */
 			__set_bit(pde, pd->used_pdes);
 
@@ -1383,8 +1332,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 							      I915_CACHE_LLC);
 			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
 							gen8_pte_index(start),
-							gen8_pte_count(start, length),
-							GEN8_PTES);
+							gen8_pte_count(start, length));
 
 			/* NB: We haven't yet mapped ptes to pages. At this
 			 * point we're still relying on insert_entries() */
@@ -1395,23 +1343,15 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 		gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
 	}
 
-	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	free_gen8_temp_bitmaps(new_page_dirs);
 	mark_tlbs_dirty(ppgtt);
 	return 0;
 
 err_out:
-	while (pdpe--) {
-		unsigned long temp;
-
-		for_each_set_bit(temp, new_page_tables + pdpe *
-				BITS_TO_LONGS(I915_PDES), I915_PDES)
-			free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]);
-	}
-
 	for_each_set_bit(pdpe, new_page_dirs, pdpes)
 		free_pd(vm, pdp->page_directory[pdpe]);
 
-	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	free_gen8_temp_bitmaps(new_page_dirs);
 	mark_tlbs_dirty(ppgtt);
 	return ret;
 }
@@ -1553,14 +1493,14 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 
 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
 {
-	unsigned long *new_page_dirs, *new_page_tables;
+	unsigned long *new_page_dirs;
 	uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev));
 	int ret;
 
 	/* We allocate temp bitmap for page tables for no gain
 	 * but as this is for init only, lets keep the things simple
 	 */
-	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
+	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
 	if (ret)
 		return ret;
 
@@ -1573,7 +1513,7 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
 	if (!ret)
 		*ppgtt->pdp.used_pdpes = *new_page_dirs;
 
-	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	free_gen8_temp_bitmaps(new_page_dirs);
 
 	return ret;
 }
@@ -1703,9 +1643,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 }
 
 /* Write pde (index) from the page directory @pd to the page table @pt */
-static inline void gen6_write_pde(struct i915_hw_ppgtt *ppgtt,
+static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt,
 				  const unsigned int pde,
-				  struct i915_page_table *pt)
+				  const struct i915_page_table *pt)
 {
 	/* Caller needs to make sure the write completes if necessary */
 	writel(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
@@ -1722,16 +1662,15 @@ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
 
 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde)
 		gen6_write_pde(ppgtt, pde, pt);
-	wmb();
 
 	mark_tlbs_dirty(ppgtt);
+	wmb();
 }
 
-static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
+static inline uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 {
-	BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
-
-	return (ppgtt->pd.base.ggtt_offset / 64) << 16;
+	GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
+	return ppgtt->pd.base.ggtt_offset << 10;
 }
 
 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
@@ -1863,35 +1802,36 @@ static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
 
 /* PPGTT support for Sandybdrige/Gen6 and later */
 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
-				   uint64_t start,
-				   uint64_t length)
+				   u64 start, u64 length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	gen6_pte_t *pt_vaddr, scratch_pte;
-	unsigned first_entry = start >> PAGE_SHIFT;
-	unsigned num_entries = length >> PAGE_SHIFT;
-	unsigned act_pt = first_entry / GEN6_PTES;
-	unsigned first_pte = first_entry % GEN6_PTES;
-	unsigned last_pte, i;
-
-	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
-				     I915_CACHE_LLC, 0);
+	unsigned int first_entry = start >> PAGE_SHIFT;
+	unsigned int pde = first_entry / GEN6_PTES;
+	unsigned int pte = first_entry % GEN6_PTES;
+	unsigned int num_entries = length >> PAGE_SHIFT;
+	gen6_pte_t scratch_pte =
+		vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
 
 	while (num_entries) {
-		last_pte = first_pte + num_entries;
-		if (last_pte > GEN6_PTES)
-			last_pte = GEN6_PTES;
+		struct i915_page_table *pt = ppgtt->pd.page_table[pde++];
+		unsigned int end = min(pte + num_entries, GEN6_PTES);
+		gen6_pte_t *vaddr;
 
-		pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
+		num_entries -= end - pte;
 
-		for (i = first_pte; i < last_pte; i++)
-			pt_vaddr[i] = scratch_pte;
+		/* Note that the hw doesn't support removing PDE on the fly
+		 * (they are cached inside the context with no means to
+		 * invalidate the cache), so we can only reset the PTE
+		 * entries back to scratch.
+		 */
 
-		kunmap_atomic(pt_vaddr);
+		vaddr = kmap_atomic_px(pt);
+		do {
+			vaddr[pte++] = scratch_pte;
+		} while (pte < end);
+		kunmap_atomic(vaddr);
 
-		num_entries -= last_pte - first_pte;
-		first_pte = 0;
-		act_pt++;
+		pte = 0;
 	}
 }
 
@@ -1935,89 +1875,37 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 }
 
 static int gen6_alloc_va_range(struct i915_address_space *vm,
-			       uint64_t start_in, uint64_t length_in)
+			       u64 start, u64 length)
 {
-	DECLARE_BITMAP(new_page_tables, I915_PDES);
-	struct drm_i915_private *dev_priv = vm->i915;
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_table *pt;
-	uint32_t start, length, start_save, length_save;
-	uint32_t pde;
-	int ret;
-
-	start = start_save = start_in;
-	length = length_save = length_in;
-
-	bitmap_zero(new_page_tables, I915_PDES);
+	u64 from = start;
+	unsigned int pde;
+	bool flush = false;
 
-	/* The allocation is done in two stages so that we can bail out with
-	 * minimal amount of pain. The first stage finds new page tables that
-	 * need allocation. The second stage marks use ptes within the page
-	 * tables.
-	 */
 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
-		if (pt != vm->scratch_pt) {
-			WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
-			continue;
-		}
-
-		/* We've already allocated a page table */
-		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
+		if (pt == vm->scratch_pt) {
+			pt = alloc_pt(vm);
+			if (IS_ERR(pt))
+				goto unwind_out;
 
-		pt = alloc_pt(vm);
-		if (IS_ERR(pt)) {
-			ret = PTR_ERR(pt);
-			goto unwind_out;
+			gen6_initialize_pt(vm, pt);
+			ppgtt->pd.page_table[pde] = pt;
+			gen6_write_pde(ppgtt, pde, pt);
+			flush = true;
 		}
-
-		gen6_initialize_pt(vm, pt);
-
-		ppgtt->pd.page_table[pde] = pt;
-		__set_bit(pde, new_page_tables);
-		trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
 	}
 
-	start = start_save;
-	length = length_save;
-
-	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
-		DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
-
-		bitmap_zero(tmp_bitmap, GEN6_PTES);
-		bitmap_set(tmp_bitmap, gen6_pte_index(start),
-			   gen6_pte_count(start, length));
-
-		if (__test_and_clear_bit(pde, new_page_tables))
-			gen6_write_pde(ppgtt, pde, pt);
-
-		trace_i915_page_table_entry_map(vm, pde, pt,
-					 gen6_pte_index(start),
-					 gen6_pte_count(start, length),
-					 GEN6_PTES);
-		bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
-				GEN6_PTES);
+	if (flush) {
+		mark_tlbs_dirty(ppgtt);
+		wmb();
 	}
 
-	WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
-
-	/* Make sure write is complete before other code can use this page
-	 * table. Also require for WC mapped PTEs */
-	readl(ggtt->gsm);
-
-	mark_tlbs_dirty(ppgtt);
 	return 0;
 
 unwind_out:
-	for_each_set_bit(pde, new_page_tables, I915_PDES) {
-		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
-
-		ppgtt->pd.page_table[pde] = vm->scratch_pt;
-		free_pt(vm, pt);
-	}
-
-	mark_tlbs_dirty(ppgtt);
-	return ret;
+	gen6_ppgtt_clear_range(vm, from, start);
+	return -ENOMEM;
 }
 
 static int gen6_init_scratch(struct i915_address_space *vm)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 6162bedc0811..5ad5b59a01b1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -69,7 +69,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
 #define GEN6_PTE_UNCACHED		(1 << 1)
 #define GEN6_PTE_VALID			(1 << 0)
 
-#define I915_PTES(pte_len)		(PAGE_SIZE / (pte_len))
+#define I915_PTES(pte_len)		((unsigned int)(PAGE_SIZE / (pte_len)))
 #define I915_PTE_MASK(pte_len)		(I915_PTES(pte_len) - 1)
 #define I915_PDES			512
 #define I915_PDE_MASK			(I915_PDES - 1)
@@ -220,8 +220,7 @@ struct i915_page_dma {
 
 struct i915_page_table {
 	struct i915_page_dma base;
-
-	unsigned long *used_ptes;
+	unsigned int used_ptes;
 };
 
 struct i915_page_directory {
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 4461df5a94fe..de31c49781d3 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -245,15 +245,14 @@ DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc,
 
 DECLARE_EVENT_CLASS(i915_page_table_entry_update,
 	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table *pt, u32 first, u32 count, u32 bits),
-	TP_ARGS(vm, pde, pt, first, count, bits),
+		 struct i915_page_table *pt, u32 first, u32 count),
+	TP_ARGS(vm, pde, pt, first, count),
 
 	TP_STRUCT__entry(
 		__field(struct i915_address_space *, vm)
 		__field(u32, pde)
 		__field(u32, first)
 		__field(u32, last)
-		__dynamic_array(char, cur_ptes, TRACE_PT_SIZE(bits))
 	),
 
 	TP_fast_assign(
@@ -261,22 +260,16 @@ DECLARE_EVENT_CLASS(i915_page_table_entry_update,
 		__entry->pde = pde;
 		__entry->first = first;
 		__entry->last = first + count - 1;
-		scnprintf(__get_str(cur_ptes),
-			  TRACE_PT_SIZE(bits),
-			  "%*pb",
-			  bits,
-			  pt->used_ptes);
 	),
 
-	TP_printk("vm=%p, pde=%d, updating %u:%u\t%s",
-		  __entry->vm, __entry->pde, __entry->last, __entry->first,
-		  __get_str(cur_ptes))
+	TP_printk("vm=%p, pde=%d, updating %u:%u",
+		  __entry->vm, __entry->pde, __entry->last, __entry->first)
 );
 
 DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map,
 	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table *pt, u32 first, u32 count, u32 bits),
-	TP_ARGS(vm, pde, pt, first, count, bits)
+		 struct i915_page_table *pt, u32 first, u32 count),
+	TP_ARGS(vm, pde, pt, first, count)
 );
 
 TRACE_EVENT(i915_gem_object_change_domain,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 13/22] drm/i915: Remove bitmap tracking for used-pdes
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (10 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 12/22] drm/i915: Remove bitmap tracking for used-ptes Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 14/22] drm/i915: Remove bitmap tracking for used-pdpes Chris Wilson
                   ` (9 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

We only operate on known extents (both for alloc/clear) and so we can use
both the knowledge of the bind/unbind range along with the knowledge of
the existing pagetable to avoid having to allocate temporary and
auxiliary bitmaps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 246 ++++++++++++------------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   6 +-
 2 files changed, 84 insertions(+), 168 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index afacf722b8b1..725709ff7f8b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -494,39 +494,25 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 {
 	struct i915_page_directory *pd;
-	int ret = -ENOMEM;
 
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN);
+	if (unlikely(!pd))
 		return ERR_PTR(-ENOMEM);
 
-	pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
-				sizeof(*pd->used_pdes), GFP_KERNEL);
-	if (!pd->used_pdes)
-		goto fail_bitmap;
-
-	ret = setup_px(vm, pd);
-	if (ret)
-		goto fail_page_m;
+	if (unlikely(setup_px(vm, pd))) {
+		kfree(pd);
+		return ERR_PTR(-ENOMEM);
+	}
 
+	pd->used_pdes = 0;
 	return pd;
-
-fail_page_m:
-	kfree(pd->used_pdes);
-fail_bitmap:
-	kfree(pd);
-
-	return ERR_PTR(ret);
 }
 
 static void free_pd(struct i915_address_space *vm,
 		    struct i915_page_directory *pd)
 {
-	if (px_page(pd)) {
-		cleanup_px(vm, pd);
-		kfree(pd->used_pdes);
-		kfree(pd);
-	}
+	cleanup_px(vm, pd);
+	kfree(pd);
 }
 
 static void gen8_initialize_pd(struct i915_address_space *vm,
@@ -540,10 +526,11 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
 		pd->page_table[i] = vm->scratch_pt;
 }
 
-static int __pdp_init(struct drm_i915_private *dev_priv,
+static int __pdp_init(struct i915_address_space *vm,
 		      struct i915_page_directory_pointer *pdp)
 {
-	size_t pdpes = I915_PDPES_PER_PDP(dev_priv);
+	size_t pdpes = I915_PDPES_PER_PDP(vm->i915);
+	int i;
 
 	pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
 				  sizeof(unsigned long),
@@ -551,8 +538,8 @@ static int __pdp_init(struct drm_i915_private *dev_priv,
 	if (!pdp->used_pdpes)
 		return -ENOMEM;
 
-	pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
-				      GFP_KERNEL);
+	pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
+					    GFP_KERNEL);
 	if (!pdp->page_directory) {
 		kfree(pdp->used_pdpes);
 		/* the PDP might be the statically allocated top level. Keep it
@@ -561,6 +548,9 @@ static int __pdp_init(struct drm_i915_private *dev_priv,
 		return -ENOMEM;
 	}
 
+	for (i = 0; i < pdpes; i++)
+		pdp->page_directory[i] = vm->scratch_pd;
+
 	return 0;
 }
 
@@ -583,7 +573,7 @@ alloc_pdp(struct i915_address_space *vm)
 	if (!pdp)
 		return ERR_PTR(-ENOMEM);
 
-	ret = __pdp_init(vm->i915, pdp);
+	ret = __pdp_init(vm, pdp);
 	if (ret)
 		goto fail_bitmap;
 
@@ -633,25 +623,9 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
 }
 
 static void
-gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt,
-		struct i915_page_directory_pointer *pdp,
-		struct i915_page_directory *pd,
-		int index)
-{
-	gen8_ppgtt_pdpe_t *page_directorypo;
-
-	if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev)))
-		return;
-
-	page_directorypo = kmap_atomic_px(pdp);
-	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
-	kunmap_atomic(page_directorypo);
-}
-
-static void
-gen8_setup_pml4e(struct i915_pml4 *pml4,
-		 struct i915_page_directory_pointer *pdp,
-		 int index)
+gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
+		     struct i915_page_directory_pointer *pdp,
+		     int index)
 {
 	gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4);
 
@@ -759,9 +733,6 @@ static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
 	kunmap_atomic(vaddr);
 }
 
-/* Removes entries from a single page dir, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries
- */
 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 				struct i915_page_directory *pd,
 				u64 start, u64 length)
@@ -774,15 +745,28 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 			continue;
 
 		gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
-		__clear_bit(pde, pd->used_pdes);
+		pd->used_pdes--;
 
 		free_pt(vm, pt);
 	}
 
-	if (bitmap_empty(pd->used_pdes, I915_PDES))
-		return true;
+	return !pd->used_pdes;
+}
 
-	return false;
+static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
+				struct i915_page_directory_pointer *pdp,
+				struct i915_page_directory *pd,
+				unsigned int pdpe)
+{
+	gen8_ppgtt_pdpe_t *vaddr;
+
+	pdp->page_directory[pdpe] = pd;
+	if (!USES_FULL_48BIT_PPGTT(vm->i915))
+		return;
+
+	vaddr = kmap_atomic_px(pdp);
+	vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
+	kunmap_atomic(vaddr);
 }
 
 /* Removes entries from a single page dir pointer, releasing it if it's empty.
@@ -790,25 +774,20 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
  */
 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 				 struct i915_page_directory_pointer *pdp,
-				 uint64_t start,
-				 uint64_t length)
+				 u64 start, u64 length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory *pd;
-	uint64_t pdpe;
+	unsigned int pdpe;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		if (WARN_ON(!pdp->page_directory[pdpe]))
-			break;
+		if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
+			continue;
 
-		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
-			__clear_bit(pdpe, pdp->used_pdpes);
-			gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);
-			free_pd(vm, pd);
-		}
-	}
+		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
+		__clear_bit(pdpe, pdp->used_pdpes);
 
-	mark_tlbs_dirty(ppgtt);
+		free_pd(vm, pd);
+	}
 
 	if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
 		return true;
@@ -816,15 +795,21 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 	return false;
 }
 
+static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
+				  u64 start, u64 length)
+{
+	gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
+}
+
 /* Removes entries from a single pml4.
  * This is the top-level structure in 4-level page tables used on gen8+.
  * Empty entries are always scratch pml4e.
  */
-static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
-				  struct i915_pml4 *pml4,
-				  uint64_t start,
-				  uint64_t length)
+static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
+				  u64 start, u64 length)
 {
+	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_pml4 *pml4 = &ppgtt->pml4;
 	struct i915_page_directory_pointer *pdp;
 	uint64_t pml4e;
 
@@ -836,23 +821,12 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
 
 		if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
 			__clear_bit(pml4e, pml4->used_pml4es);
-			gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e);
+			gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
 			free_pdp(vm, pdp);
 		}
 	}
 }
 
-static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
-				   uint64_t start, uint64_t length)
-{
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
-	if (USES_FULL_48BIT_PPGTT(vm->i915))
-		gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length);
-	else
-		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
-}
-
 struct sgt_dma {
 	struct scatterlist *sg;
 	dma_addr_t dma, max;
@@ -950,12 +924,9 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
 	if (!px_page(pd))
 		return;
 
-	for_each_set_bit(i, pd->used_pdes, I915_PDES) {
-		if (WARN_ON(!pd->page_table[i]))
-			continue;
-
-		free_pt(vm, pd->page_table[i]);
-		pd->page_table[i] = NULL;
+	for (i = 0; i < I915_PDES; i++) {
+		if (pd->page_table[i] != vm->scratch_pt)
+			free_pt(vm, pd->page_table[i]);
 	}
 }
 
@@ -1050,7 +1021,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
 	int i;
 
 	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
-		if (WARN_ON(!pdp->page_directory[i]))
+		if (pdp->page_directory[i] == vm->scratch_pd)
 			continue;
 
 		gen8_free_page_tables(vm, pdp->page_directory[i]);
@@ -1090,44 +1061,28 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 	gen8_free_scratch(vm);
 }
 
-/**
- * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
- * @vm:	Master vm structure.
- * @pd:	Page directory for this address range.
- * @start:	Starting virtual address to begin allocations.
- * @length:	Size of the allocations.
- *
- * Allocate the required number of page tables. Extremely similar to
- * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
- * the page directory boundary (instead of the page directory pointer). That
- * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
- * possible, and likely that the caller will need to use multiple calls of this
- * function to achieve the appropriate allocation.
- *
- * Return: 0 if success; negative error code otherwise.
- */
-static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
-				     struct i915_page_directory *pd,
-				     u64 start, u64 length)
+static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
+			       struct i915_page_directory *pd,
+			       u64 start, u64 length)
 {
 	struct i915_page_table *pt;
 	u64 from = start;
-	uint32_t pde;
+	unsigned int pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
-		/* Don't reallocate page tables */
-		if (!test_bit(pde, pd->used_pdes)) {
+		if (pt == vm->scratch_pt) {
 			pt = alloc_pt(vm);
 			if (IS_ERR(pt))
 				goto unwind;
 
 			gen8_initialize_pt(vm, pt);
-			pd->page_table[pde] = pt;
+
+			gen8_ppgtt_set_pde(vm, pd, pt, pde);
+			pd->used_pdes++;
 		}
+
 		pt->used_ptes += gen8_pte_count(start, length);
-		trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
 	}
-
 	return 0;
 
 unwind:
@@ -1204,7 +1159,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
  *		caller to free on error.
  *
  * Allocate the required number of page directory pointers. Extremely similar to
- * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
+ * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pd().
  * The main difference is here we are limited by the pml4 boundary (instead of
  * the page directory pointer).
  *
@@ -1256,9 +1211,8 @@ free_gen8_temp_bitmaps(unsigned long *new_pds)
 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
  * of these are based on the number of PDPEs in the system.
  */
-static
-int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
-					 uint32_t pdpes)
+static int __must_check
+alloc_gen8_temp_bitmaps(unsigned long **new_pds, uint32_t pdpes)
 {
 	unsigned long *pds;
 
@@ -1278,8 +1232,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	unsigned long *new_page_dirs;
 	struct i915_page_directory *pd;
-	const uint64_t orig_start = start;
-	const uint64_t orig_length = length;
 	uint32_t pdpe;
 	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
 	int ret;
@@ -1298,51 +1250,16 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 
 	/* For every page directory referenced, allocate page tables */
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length);
+		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
 		if (ret)
 			goto err_out;
-	}
 
-	start = orig_start;
-	length = orig_length;
+		if (test_and_set_bit(pdpe, pdp->used_pdpes))
+			gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
+	}
 
 	/* Allocations have completed successfully, so set the bitmaps, and do
 	 * the mappings. */
-	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		gen8_pde_t *const page_directory = kmap_atomic_px(pd);
-		struct i915_page_table *pt;
-		uint64_t pd_len = length;
-		uint64_t pd_start = start;
-		uint32_t pde;
-
-		/* Every pd should be allocated, we just did that above. */
-		WARN_ON(!pd);
-
-		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
-			/* Same reasoning as pd */
-			WARN_ON(!pt);
-			WARN_ON(!pd_len);
-			WARN_ON(!gen8_pte_count(pd_start, pd_len));
-
-			/* Our pde is now pointing to the pagetable, pt */
-			__set_bit(pde, pd->used_pdes);
-
-			/* Map the PDE to the page table */
-			page_directory[pde] = gen8_pde_encode(px_dma(pt),
-							      I915_CACHE_LLC);
-			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
-							gen8_pte_index(start),
-							gen8_pte_count(start, length));
-
-			/* NB: We haven't yet mapped ptes to pages. At this
-			 * point we're still relying on insert_entries() */
-		}
-
-		kunmap_atomic(page_directory);
-		__set_bit(pdpe, pdp->used_pdpes);
-		gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
-	}
-
 	free_gen8_temp_bitmaps(new_page_dirs);
 	mark_tlbs_dirty(ppgtt);
 	return 0;
@@ -1385,7 +1302,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 		if (ret)
 			goto err_out;
 
-		gen8_setup_pml4e(pml4, pdp, pml4e);
+		gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
 	}
 
 	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
@@ -1434,7 +1351,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
 			uint32_t  pte;
 			gen8_pte_t *pt_vaddr;
 
-			if (!test_bit(pde, pd->used_pdes))
+			if (pd->page_table[pde] == ppgtt->base.scratch_pt)
 				continue;
 
 			pt_vaddr = kmap_atomic_px(pt);
@@ -1537,7 +1454,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.start = 0;
 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
 	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
-	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
 	ppgtt->debug_dump = gen8_dump_ppgtt;
@@ -1559,8 +1475,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 		ppgtt->switch_mm = gen8_48b_mm_switch;
 
 		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
+		ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
 	} else {
-		ret = __pdp_init(dev_priv, &ppgtt->pdp);
+		ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
 		if (ret)
 			goto free_scratch;
 
@@ -1577,6 +1494,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 		}
 
 		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
+		ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl;
 	}
 
 	if (intel_vgpu_active(dev_priv))
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 5ad5b59a01b1..a62b0ef1f3fc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -226,8 +226,8 @@ struct i915_page_table {
 struct i915_page_directory {
 	struct i915_page_dma base;
 
-	unsigned long *used_pdes;
 	struct i915_page_table *page_table[I915_PDES]; /* PDEs */
+	unsigned int used_pdes;
 };
 
 struct i915_page_directory_pointer {
@@ -520,9 +520,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length)
 static inline dma_addr_t
 i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
 {
-	return test_bit(n, ppgtt->pdp.used_pdpes) ?
-		px_dma(ppgtt->pdp.page_directory[n]) :
-		px_dma(ppgtt->base.scratch_pd);
+	return px_dma(ppgtt->pdp.page_directory[n]);
 }
 
 static inline struct i915_ggtt *
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 14/22] drm/i915: Remove bitmap tracking for used-pdpes
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (11 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 13/22] drm/i915: Remove bitmap tracking for used-pdes Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 15/22] drm/i915: Remove bitmap tracking for used-pml4 Chris Wilson
                   ` (8 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

We only operate on known extents (both for alloc/clear) and so we can use
both the knowledge of the bind/unbind range along with the knowledge of
the existing pagetable to avoid having to allocate temporary and
auxiliary bitmaps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 279 +++++++++++-------------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   3 +-
 2 files changed, 86 insertions(+), 196 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 725709ff7f8b..ca23dbb5a053 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -529,24 +529,13 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
 static int __pdp_init(struct i915_address_space *vm,
 		      struct i915_page_directory_pointer *pdp)
 {
-	size_t pdpes = I915_PDPES_PER_PDP(vm->i915);
-	int i;
-
-	pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
-				  sizeof(unsigned long),
-				  GFP_KERNEL);
-	if (!pdp->used_pdpes)
-		return -ENOMEM;
+	const unsigned int pdpes = I915_PDPES_PER_PDP(vm->i915);
+	unsigned int i;
 
 	pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
-					    GFP_KERNEL);
-	if (!pdp->page_directory) {
-		kfree(pdp->used_pdpes);
-		/* the PDP might be the statically allocated top level. Keep it
-		 * as clean as possible */
-		pdp->used_pdpes = NULL;
+					    GFP_KERNEL | __GFP_NOWARN);
+	if (unlikely(!pdp->page_directory))
 		return -ENOMEM;
-	}
 
 	for (i = 0; i < pdpes; i++)
 		pdp->page_directory[i] = vm->scratch_pd;
@@ -556,7 +545,6 @@ static int __pdp_init(struct i915_address_space *vm,
 
 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
 {
-	kfree(pdp->used_pdpes);
 	kfree(pdp->page_directory);
 	pdp->page_directory = NULL;
 }
@@ -614,23 +602,12 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
 static void gen8_initialize_pml4(struct i915_address_space *vm,
 				 struct i915_pml4 *pml4)
 {
-	gen8_ppgtt_pml4e_t scratch_pml4e;
-
-	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
-					  I915_CACHE_LLC);
-
-	fill_px(vm, pml4, scratch_pml4e);
-}
-
-static void
-gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
-		     struct i915_page_directory_pointer *pdp,
-		     int index)
-{
-	gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4);
+	unsigned int i;
 
-	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
-	kunmap_atomic(pagemap);
+	fill_px(vm, pml4,
+		gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
+	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++)
+		pml4->pdps[i] = vm->scratch_pdp;
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
@@ -784,15 +761,12 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 			continue;
 
 		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
-		__clear_bit(pdpe, pdp->used_pdpes);
+		pdp->used_pdpes--;
 
 		free_pd(vm, pd);
 	}
 
-	if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
-		return true;
-
-	return false;
+	return !pdp->used_pdpes;
 }
 
 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
@@ -801,6 +775,19 @@ static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
 	gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
 }
 
+static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
+				 struct i915_page_directory_pointer *pdp,
+				 unsigned int pml4e)
+{
+	gen8_ppgtt_pml4e_t *vaddr;
+
+	pml4->pdps[pml4e] = pdp;
+
+	vaddr = kmap_atomic_px(pml4);
+	vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
+	kunmap_atomic(vaddr);
+}
+
 /* Removes entries from a single pml4.
  * This is the top-level structure in 4-level page tables used on gen8+.
  * Empty entries are always scratch pml4e.
@@ -811,19 +798,18 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_pml4 *pml4 = &ppgtt->pml4;
 	struct i915_page_directory_pointer *pdp;
-	uint64_t pml4e;
+	unsigned int pml4e;
 
 	GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (WARN_ON(!pml4->pdps[pml4e]))
-			break;
+		if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
+			continue;
 
-		if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
-			__clear_bit(pml4e, pml4->used_pml4es);
-			gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
-			free_pdp(vm, pdp);
-		}
+		gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
+		__clear_bit(pml4e, pml4->used_pml4es);
+
+		free_pdp(vm, pdp);
 	}
 }
 
@@ -1020,7 +1006,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
 {
 	int i;
 
-	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
+	for (i = 0; i < I915_PDPES_PER_PDP(vm->i915); i++) {
 		if (pdp->page_directory[i] == vm->scratch_pd)
 			continue;
 
@@ -1091,65 +1077,6 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 }
 
 /**
- * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
- * @vm:	Master vm structure.
- * @pdp:	Page directory pointer for this address range.
- * @start:	Starting virtual address to begin allocations.
- * @length:	Size of the allocations.
- * @new_pds:	Bitmap set by function with new allocations. Likely used by the
- *		caller to free on error.
- *
- * Allocate the required number of page directories starting at the pde index of
- * @start, and ending at the pde index @start + @length. This function will skip
- * over already allocated page directories within the range, and only allocate
- * new ones, setting the appropriate pointer within the pdp as well as the
- * correct position in the bitmap @new_pds.
- *
- * The function will only allocate the pages within the range for a give page
- * directory pointer. In other words, if @start + @length straddles a virtually
- * addressed PDP boundary (512GB for 4k pages), there will be more allocations
- * required by the caller, This is not currently possible, and the BUG in the
- * code will prevent it.
- *
- * Return: 0 if success; negative error code otherwise.
- */
-static int
-gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
-				  struct i915_page_directory_pointer *pdp,
-				  uint64_t start,
-				  uint64_t length,
-				  unsigned long *new_pds)
-{
-	struct i915_page_directory *pd;
-	uint32_t pdpe;
-	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
-
-	WARN_ON(!bitmap_empty(new_pds, pdpes));
-
-	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		if (test_bit(pdpe, pdp->used_pdpes))
-			continue;
-
-		pd = alloc_pd(vm);
-		if (IS_ERR(pd))
-			goto unwind_out;
-
-		gen8_initialize_pd(vm, pd);
-		pdp->page_directory[pdpe] = pd;
-		__set_bit(pdpe, new_pds);
-		trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
-	}
-
-	return 0;
-
-unwind_out:
-	for_each_set_bit(pdpe, new_pds, pdpes)
-		free_pd(vm, pdp->page_directory[pdpe]);
-
-	return -ENOMEM;
-}
-
-/**
  * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
  * @vm:	Master vm structure.
  * @pml4:	Page map level 4 for this address range.
@@ -1169,23 +1096,19 @@ static int
 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 				  struct i915_pml4 *pml4,
 				  uint64_t start,
-				  uint64_t length,
-				  unsigned long *new_pdps)
+				  uint64_t length)
 {
 	struct i915_page_directory_pointer *pdp;
 	uint32_t pml4e;
 
-	WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
-
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 		if (!test_bit(pml4e, pml4->used_pml4es)) {
 			pdp = alloc_pdp(vm);
 			if (IS_ERR(pdp))
-				goto unwind_out;
+				return PTR_ERR(pdp);
 
 			gen8_initialize_pdp(vm, pdp);
 			pml4->pdps[pml4e] = pdp;
-			__set_bit(pml4e, new_pdps);
 			trace_i915_page_directory_pointer_entry_alloc(vm,
 								      pml4e,
 								      start,
@@ -1194,34 +1117,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 	}
 
 	return 0;
-
-unwind_out:
-	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
-		free_pdp(vm, pml4->pdps[pml4e]);
-
-	return -ENOMEM;
-}
-
-static void
-free_gen8_temp_bitmaps(unsigned long *new_pds)
-{
-	kfree(new_pds);
-}
-
-/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
- * of these are based on the number of PDPEs in the system.
- */
-static int __must_check
-alloc_gen8_temp_bitmaps(unsigned long **new_pds, uint32_t pdpes)
-{
-	unsigned long *pds;
-
-	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
-	if (!pds)
-		return -ENOMEM;
-
-	*new_pds = pds;
-	return 0;
 }
 
 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
@@ -1230,47 +1125,37 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 				    uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	unsigned long *new_page_dirs;
 	struct i915_page_directory *pd;
-	uint32_t pdpe;
-	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
+	u64 from = start;
+	unsigned int pdpe;
 	int ret;
 
-	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
-	if (ret)
-		return ret;
-
-	/* Do the allocations first so we can easily bail out */
-	ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
-						new_page_dirs);
-	if (ret) {
-		free_gen8_temp_bitmaps(new_page_dirs);
-		return ret;
-	}
-
-	/* For every page directory referenced, allocate page tables */
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
-		if (ret)
-			goto err_out;
+		if (pd == vm->scratch_pd) {
+			pd = alloc_pd(vm);
+			if (IS_ERR(pd))
+				goto unwind;
 
-		if (test_and_set_bit(pdpe, pdp->used_pdpes))
+			gen8_initialize_pd(vm, pd);
 			gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
+			pdp->used_pdpes++;
+		}
+
+		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
+		if (unlikely(ret)) {
+			gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
+			pdp->used_pdpes--;
+			free_pd(vm, pd);
+			goto unwind;
+		}
 	}
 
-	/* Allocations have completed successfully, so set the bitmaps, and do
-	 * the mappings. */
-	free_gen8_temp_bitmaps(new_page_dirs);
 	mark_tlbs_dirty(ppgtt);
 	return 0;
 
-err_out:
-	for_each_set_bit(pdpe, new_page_dirs, pdpes)
-		free_pd(vm, pdp->page_directory[pdpe]);
-
-	free_gen8_temp_bitmaps(new_page_dirs);
-	mark_tlbs_dirty(ppgtt);
-	return ret;
+unwind:
+	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
+	return -ENOMEM;
 }
 
 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
@@ -1290,8 +1175,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
 	/* The pagedirectory and pagetable allocations are done in the shared 3
 	 * and 4 level code. Just allocate the pdps.
 	 */
-	ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
-						new_pdps);
+	ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length);
 	if (ret)
 		return ret;
 
@@ -1343,7 +1227,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
 		uint64_t pd_start = start;
 		uint32_t pde;
 
-		if (!test_bit(pdpe, pdp->used_pdpes))
+		if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd)
 			continue;
 
 		seq_printf(m, "\tPDPE #%d\n", pdpe);
@@ -1408,31 +1292,36 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 	}
 }
 
-static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
+static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
 {
-	unsigned long *new_page_dirs;
-	uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev));
-	int ret;
+	struct i915_address_space *vm = &ppgtt->base;
+	struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
+	struct i915_page_directory *pd;
+	u64 start = 0, length = ppgtt->base.total;
+	u64 from = start;
+	unsigned int pdpe;
 
-	/* We allocate temp bitmap for page tables for no gain
-	 * but as this is for init only, lets keep the things simple
-	 */
-	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
-	if (ret)
-		return ret;
+	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
+		pd = alloc_pd(vm);
+		if (IS_ERR(pd))
+			goto unwind;
 
-	/* Allocate for all pdps regardless of how the ppgtt
-	 * was defined.
-	 */
-	ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
-						0, 1ULL << 32,
-						new_page_dirs);
-	if (!ret)
-		*ppgtt->pdp.used_pdpes = *new_page_dirs;
+		gen8_initialize_pd(vm, pd);
+		gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
+		pdp->used_pdpes++;
+	}
 
-	free_gen8_temp_bitmaps(new_page_dirs);
+	pdp->used_pdpes++; /* never remove */
+	return 0;
 
-	return ret;
+unwind:
+	start -= from;
+	gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
+		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
+		free_pd(vm, pd);
+	}
+	pdp->used_pdpes = 0;
+	return -ENOMEM;
 }
 
 /*
@@ -1488,9 +1377,11 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 							      GEN8_PML4E_SHIFT);
 
 		if (intel_vgpu_active(dev_priv)) {
-			ret = gen8_preallocate_top_level_pdps(ppgtt);
-			if (ret)
+			ret = gen8_preallocate_top_level_pdp(ppgtt);
+			if (ret) {
+				__pdp_fini(&ppgtt->pdp);
 				goto free_scratch;
+			}
 		}
 
 		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a62b0ef1f3fc..3628c7c564ae 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -232,9 +232,8 @@ struct i915_page_directory {
 
 struct i915_page_directory_pointer {
 	struct i915_page_dma base;
-
-	unsigned long *used_pdpes;
 	struct i915_page_directory **page_directory;
+	unsigned int used_pdpes;
 };
 
 struct i915_pml4 {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 15/22] drm/i915: Remove bitmap tracking for used-pml4
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (12 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 14/22] drm/i915: Remove bitmap tracking for used-pdpes Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 16/22] drm/i915: Remove superfluous posting reads after clear GGTT Chris Wilson
                   ` (7 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

We only operate on known extents (both for alloc/clear) and so we can use
both the knowledge of the bind/unbind range along with the knowledge of
the existing pagetable to avoid having to allocate temporary and
auxiliary bitmaps.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 132 +++++++++++-------------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   2 -
 2 files changed, 38 insertions(+), 96 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ca23dbb5a053..372d9f56c91d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -807,7 +807,6 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 			continue;
 
 		gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
-		__clear_bit(pml4e, pml4->used_pml4es);
 
 		free_pdp(vm, pdp);
 	}
@@ -1021,8 +1020,8 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
 {
 	int i;
 
-	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
-		if (WARN_ON(!ppgtt->pml4.pdps[i]))
+	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
+		if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp)
 			continue;
 
 		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
@@ -1076,53 +1075,9 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 	return -ENOMEM;
 }
 
-/**
- * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
- * @vm:	Master vm structure.
- * @pml4:	Page map level 4 for this address range.
- * @start:	Starting virtual address to begin allocations.
- * @length:	Size of the allocations.
- * @new_pdps:	Bitmap set by function with new allocations. Likely used by the
- *		caller to free on error.
- *
- * Allocate the required number of page directory pointers. Extremely similar to
- * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pd().
- * The main difference is here we are limited by the pml4 boundary (instead of
- * the page directory pointer).
- *
- * Return: 0 if success; negative error code otherwise.
- */
-static int
-gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
-				  struct i915_pml4 *pml4,
-				  uint64_t start,
-				  uint64_t length)
-{
-	struct i915_page_directory_pointer *pdp;
-	uint32_t pml4e;
-
-	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (!test_bit(pml4e, pml4->used_pml4es)) {
-			pdp = alloc_pdp(vm);
-			if (IS_ERR(pdp))
-				return PTR_ERR(pdp);
-
-			gen8_initialize_pdp(vm, pdp);
-			pml4->pdps[pml4e] = pdp;
-			trace_i915_page_directory_pointer_entry_alloc(vm,
-								      pml4e,
-								      start,
-								      GEN8_PML4E_SHIFT);
-		}
-	}
-
-	return 0;
-}
-
-static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
-				    struct i915_page_directory_pointer *pdp,
-				    uint64_t start,
-				    uint64_t length)
+static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
+				struct i915_page_directory_pointer *pdp,
+				u64 start, u64 length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory *pd;
@@ -1158,58 +1113,46 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	return -ENOMEM;
 }
 
-static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
-				    struct i915_pml4 *pml4,
-				    uint64_t start,
-				    uint64_t length)
+static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
+				 u64 start, u64 length)
 {
-	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
-	struct i915_page_directory_pointer *pdp;
-	uint64_t pml4e;
-	int ret = 0;
-
-	/* Do the pml4 allocations first, so we don't need to track the newly
-	 * allocated tables below the pdp */
-	bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
+	return gen8_ppgtt_alloc_pdp(vm,
+				    &i915_vm_to_ppgtt(vm)->pdp, start, length);
+}
 
-	/* The pagedirectory and pagetable allocations are done in the shared 3
-	 * and 4 level code. Just allocate the pdps.
-	 */
-	ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length);
-	if (ret)
-		return ret;
+static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
+				 u64 start, u64 length)
+{
+	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_pml4 *pml4 = &ppgtt->pml4;
+	struct i915_page_directory_pointer *pdp;
+	u64 from = start;
+	u32 pml4e;
+	int ret;
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		WARN_ON(!pdp);
+		if (pml4->pdps[pml4e] == vm->scratch_pdp) {
+			pdp = alloc_pdp(vm);
+			if (IS_ERR(pdp))
+				goto unwind;
 
-		ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
-		if (ret)
-			goto err_out;
+			gen8_initialize_pdp(vm, pdp);
+			gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
+		}
 
-		gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
+		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
+		if (unlikely(ret)) {
+			gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
+			free_pdp(vm, pdp);
+			goto unwind;
+		}
 	}
 
-	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
-		  GEN8_PML4ES_PER_PML4);
-
 	return 0;
 
-err_out:
-	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
-		gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]);
-
-	return ret;
-}
-
-static int gen8_alloc_va_range(struct i915_address_space *vm,
-			       uint64_t start, uint64_t length)
-{
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
-	if (USES_FULL_48BIT_PPGTT(vm->i915))
-		return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
-	else
-		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
+unwind:
+	gen8_ppgtt_clear_4lvl(vm, from, start - from);
+	return -ENOMEM;
 }
 
 static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
@@ -1283,7 +1226,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 		struct i915_page_directory_pointer *pdp;
 
 		gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-			if (!test_bit(pml4e, pml4->used_pml4es))
+			if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp)
 				continue;
 
 			seq_printf(m, "    PML4E #%llu\n", pml4e);
@@ -1342,7 +1285,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
 	ppgtt->base.start = 0;
 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
-	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
 	ppgtt->debug_dump = gen8_dump_ppgtt;
@@ -1363,6 +1305,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 		ppgtt->base.total = 1ULL << 48;
 		ppgtt->switch_mm = gen8_48b_mm_switch;
 
+		ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
 		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
 		ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
 	} else {
@@ -1384,6 +1327,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 			}
 		}
 
+		ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl;
 		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
 		ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl;
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 3628c7c564ae..933888725368 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -238,8 +238,6 @@ struct i915_page_directory_pointer {
 
 struct i915_pml4 {
 	struct i915_page_dma base;
-
-	DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4);
 	struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4];
 };
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 16/22] drm/i915: Remove superfluous posting reads after clear GGTT
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (13 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 15/22] drm/i915: Remove bitmap tracking for used-pml4 Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 17/22] drm/i915: Always mark the PDP as dirty when altered Chris Wilson
                   ` (6 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

The barrier here is not required - we apply the barrier before the range
is ever reused by the GPU instead.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 372d9f56c91d..83c367814496 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2178,7 +2178,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 				      I915_CACHE_LLC);
 	for (i = 0; i < num_entries; i++)
 		gen8_set_pte(&gtt_base[i], scratch_pte);
-	readl(gtt_base);
 }
 
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
@@ -2203,7 +2202,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
-	readl(gtt_base);
 }
 
 static void i915_ggtt_insert_page(struct i915_address_space *vm,
@@ -2227,7 +2225,6 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 
 	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
-
 }
 
 static void i915_ggtt_clear_range(struct i915_address_space *vm,
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 17/22] drm/i915: Always mark the PDP as dirty when altered
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (14 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 16/22] drm/i915: Remove superfluous posting reads after clear GGTT Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 18/22] drm/i915: Remove defunct GTT tracepoints Chris Wilson
                   ` (5 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

We want to reload the PDP (and flush the TLB) when the addresses are
changed.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 83c367814496..92fd85a9d06d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1079,7 +1079,6 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 				struct i915_page_directory_pointer *pdp,
 				u64 start, u64 length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory *pd;
 	u64 from = start;
 	unsigned int pdpe;
@@ -1094,6 +1093,8 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 			gen8_initialize_pd(vm, pd);
 			gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
 			pdp->used_pdpes++;
+
+			mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
 		}
 
 		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
@@ -1105,7 +1106,6 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
 		}
 	}
 
-	mark_tlbs_dirty(ppgtt);
 	return 0;
 
 unwind:
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 18/22] drm/i915: Remove defunct GTT tracepoints
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (15 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 17/22] drm/i915: Always mark the PDP as dirty when altered Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 19/22] drm/i915: Remove unused ppgtt->enable() Chris Wilson
                   ` (4 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

The tracepoints are now entirely synonymous with binding and unbinding the
VMA (and the tracepoints there).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c |  4 --
 drivers/gpu/drm/i915/i915_trace.h   | 97 -------------------------------------
 2 files changed, 101 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 92fd85a9d06d..e39b6ace0b40 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -193,7 +193,6 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 	u32 pte_flags;
 	int ret;
 
-	trace_i915_va_alloc(vma);
 	ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size);
 	if (ret)
 		return ret;
@@ -1315,9 +1314,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
 		ppgtt->base.total = 1ULL << 32;
 		ppgtt->switch_mm = gen8_legacy_mm_switch;
-		trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
-							      0, 0,
-							      GEN8_PML4E_SHIFT);
 
 		if (intel_vgpu_active(dev_priv)) {
 			ret = gen8_preallocate_top_level_pdp(ppgtt);
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index de31c49781d3..7a547cdfc381 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -175,103 +175,6 @@ TRACE_EVENT(i915_vma_unbind,
 		      __entry->obj, __entry->offset, __entry->size, __entry->vm)
 );
 
-TRACE_EVENT(i915_va_alloc,
-	TP_PROTO(struct i915_vma *vma),
-	TP_ARGS(vma),
-
-	TP_STRUCT__entry(
-		__field(struct i915_address_space *, vm)
-		__field(u64, start)
-		__field(u64, end)
-	),
-
-	TP_fast_assign(
-		__entry->vm = vma->vm;
-		__entry->start = vma->node.start;
-		__entry->end = vma->node.start + vma->node.size - 1;
-	),
-
-	TP_printk("vm=%p (%c), 0x%llx-0x%llx",
-		  __entry->vm, i915_is_ggtt(__entry->vm) ? 'G' : 'P',  __entry->start, __entry->end)
-);
-
-DECLARE_EVENT_CLASS(i915_px_entry,
-	TP_PROTO(struct i915_address_space *vm, u32 px, u64 start, u64 px_shift),
-	TP_ARGS(vm, px, start, px_shift),
-
-	TP_STRUCT__entry(
-		__field(struct i915_address_space *, vm)
-		__field(u32, px)
-		__field(u64, start)
-		__field(u64, end)
-	),
-
-	TP_fast_assign(
-		__entry->vm = vm;
-		__entry->px = px;
-		__entry->start = start;
-		__entry->end = ((start + (1ULL << px_shift)) & ~((1ULL << px_shift)-1)) - 1;
-	),
-
-	TP_printk("vm=%p, pde=%d (0x%llx-0x%llx)",
-		  __entry->vm, __entry->px, __entry->start, __entry->end)
-);
-
-DEFINE_EVENT(i915_px_entry, i915_page_table_entry_alloc,
-	     TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift),
-	     TP_ARGS(vm, pde, start, pde_shift)
-);
-
-DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc,
-		   TP_PROTO(struct i915_address_space *vm, u32 pdpe, u64 start, u64 pdpe_shift),
-		   TP_ARGS(vm, pdpe, start, pdpe_shift),
-
-		   TP_printk("vm=%p, pdpe=%d (0x%llx-0x%llx)",
-			     __entry->vm, __entry->px, __entry->start, __entry->end)
-);
-
-DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc,
-		   TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift),
-		   TP_ARGS(vm, pml4e, start, pml4e_shift),
-
-		   TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)",
-			     __entry->vm, __entry->px, __entry->start, __entry->end)
-);
-
-/* Avoid extra math because we only support two sizes. The format is defined by
- * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */
-#define TRACE_PT_SIZE(bits) \
-	((((bits) == 1024) ? 288 : 144) + 1)
-
-DECLARE_EVENT_CLASS(i915_page_table_entry_update,
-	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table *pt, u32 first, u32 count),
-	TP_ARGS(vm, pde, pt, first, count),
-
-	TP_STRUCT__entry(
-		__field(struct i915_address_space *, vm)
-		__field(u32, pde)
-		__field(u32, first)
-		__field(u32, last)
-	),
-
-	TP_fast_assign(
-		__entry->vm = vm;
-		__entry->pde = pde;
-		__entry->first = first;
-		__entry->last = first + count - 1;
-	),
-
-	TP_printk("vm=%p, pde=%d, updating %u:%u",
-		  __entry->vm, __entry->pde, __entry->last, __entry->first)
-);
-
-DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map,
-	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table *pt, u32 first, u32 count),
-	TP_ARGS(vm, pde, pt, first, count)
-);
-
 TRACE_EVENT(i915_gem_object_change_domain,
 	    TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write),
 	    TP_ARGS(obj, old_read, old_write),
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 19/22] drm/i915: Remove unused ppgtt->enable()
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (16 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 18/22] drm/i915: Remove defunct GTT tracepoints Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-10 19:38 ` [PATCH v2 20/22] drm/i915: Remove i915_address_space.start Chris Wilson
                   ` (3 subsequent siblings)
  21 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

We never assign or use the ppgtt->enable() callback, so remove it.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 933888725368..6210550e15cf 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -385,7 +385,6 @@ struct i915_hw_ppgtt {
 
 	gen6_pte_t __iomem *pd_addr;
 
-	int (*enable)(struct i915_hw_ppgtt *ppgtt);
 	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
 			 struct drm_i915_gem_request *req);
 	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 20/22] drm/i915: Remove i915_address_space.start
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (17 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 19/22] drm/i915: Remove unused ppgtt->enable() Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-13 14:47   ` Matthew Auld
  2017-02-10 19:38 ` [PATCH v2 21/22] drm/i915: Only preallocate the aliasing GTT to the extents of the global GTT Chris Wilson
                   ` (2 subsequent siblings)
  21 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

Once upon a time, back in the UMS days, we supported userspace
initialising the GTT and sharing portions of the GTT with other users.
Now, we own the GTT (both global and per-process) and the tables always
start at 0 - so we can remove i915_address_space.start and forget about
this old complication.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_evict.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c   | 13 +++++--------
 drivers/gpu/drm/i915/i915_gem_gtt.h   |  1 -
 drivers/gpu/drm/i915/i915_vgpu.c      |  9 ++++-----
 5 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 6e8a288f47b2..cda957c674ee 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -476,7 +476,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		   dpy_count, dpy_size);
 
 	seq_printf(m, "%llu [%llu] gtt total\n",
-		   ggtt->base.total, ggtt->mappable_end - ggtt->base.start);
+		   ggtt->base.total, ggtt->mappable_end);
 
 	seq_putc(m, '\n');
 	print_batch_pool_stats(m, dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index ca25b1f7f6d1..a0de5734f7d0 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -274,7 +274,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 	check_color = vm->mm.color_adjust;
 	if (check_color) {
 		/* Expand search to cover neighbouring guard pages (or lack!) */
-		if (start > vm->start)
+		if (start)
 			start -= I915_GTT_PAGE_SIZE;
 
 		/* Always look at the page afterwards to avoid the end-of-GTT */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e39b6ace0b40..d71aaccb35ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1212,8 +1212,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 {
 	struct i915_address_space *vm = &ppgtt->base;
-	uint64_t start = ppgtt->base.start;
-	uint64_t length = ppgtt->base.total;
+	u64 start = 0, length = ppgtt->base.total;
 	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
 						 I915_CACHE_LLC);
 
@@ -1282,7 +1281,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	if (ret)
 		return ret;
 
-	ppgtt->base.start = 0;
 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
@@ -1345,7 +1343,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 	gen6_pte_t scratch_pte;
 	uint32_t pd_entry;
 	uint32_t  pte, pde;
-	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
+	uint32_t start = 0, length = ppgtt->base.total;
 
 	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
 				     I915_CACHE_LLC, 0);
@@ -1779,7 +1777,6 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
-	ppgtt->base.start = 0;
 	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
 	ppgtt->debug_dump = gen6_dump_ppgtt;
 
@@ -1820,7 +1817,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
 {
 	i915_gem_timeline_init(dev_priv, &vm->timeline, name);
 
-	drm_mm_init(&vm->mm, vm->start, vm->total);
+	drm_mm_init(&vm->mm, 0, vm->total);
 	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 
 	INIT_LIST_HEAD(&vm->active_list);
@@ -2006,7 +2003,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
 
 	i915_check_and_clear_faults(dev_priv);
 
-	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
+	ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
 
 	i915_ggtt_invalidate(dev_priv);
 }
@@ -2974,7 +2971,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 	i915_check_and_clear_faults(dev_priv);
 
 	/* First fill our portion of the GTT with scratch pages */
-	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
+	ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
 
 	ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 6210550e15cf..e85ff6c97208 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -256,7 +256,6 @@ struct i915_address_space {
 	 */
 	struct drm_i915_file_private *file;
 	struct list_head global_link;
-	u64 start;		/* Start offset always 0 for dri2 */
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 
 	bool closed;
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index d0abfd08a01c..14014068dfcf 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -179,7 +179,7 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt,
 int intel_vgt_balloon(struct drm_i915_private *dev_priv)
 {
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	unsigned long ggtt_end = ggtt->base.start + ggtt->base.total;
+	unsigned long ggtt_end = ggtt->base.total;
 
 	unsigned long mappable_base, mappable_size, mappable_end;
 	unsigned long unmappable_base, unmappable_size, unmappable_end;
@@ -202,8 +202,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
 	DRM_INFO("Unmappable graphic memory: base 0x%lx size %ldKiB\n",
 		 unmappable_base, unmappable_size / 1024);
 
-	if (mappable_base < ggtt->base.start ||
-	    mappable_end > ggtt->mappable_end ||
+	if (mappable_end > ggtt->mappable_end ||
 	    unmappable_base < ggtt->mappable_end ||
 	    unmappable_end > ggtt_end) {
 		DRM_ERROR("Invalid ballooning configuration!\n");
@@ -231,9 +230,9 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv)
 	}
 
 	/* Mappable graphic memory ballooning */
-	if (mappable_base > ggtt->base.start) {
+	if (mappable_base) {
 		ret = vgt_balloon_space(ggtt, &bl_info.space[0],
-					ggtt->base.start, mappable_base);
+					0, mappable_base);
 
 		if (ret)
 			goto err;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 21/22] drm/i915: Only preallocate the aliasing GTT to the extents of the global GTT
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (18 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 20/22] drm/i915: Remove i915_address_space.start Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-14 15:56   ` Matthew Auld
  2017-02-10 19:38 ` [PATCH v2 22/22] drm/i915: Differentiate the aliasing_ppgtt with an invalid filp Chris Wilson
  2017-02-14 11:22 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() (rev3) Patchwork
  21 siblings, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

As the aliasing GTT is only accessed via the global GTT, we will never
use more of it than we expose via the Global GTT and so we only need to
preallocate sufficient space within the ppgtt for the full GTT. Equally,
if the aliasing GTT is smaller than the global GTT, we have a serious
issue and must bail.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d71aaccb35ac..ae34a692bf87 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2379,9 +2379,14 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
+	if (WARN_ON(ppgtt->base.total < ggtt->base.total)) {
+		err = -ENODEV;
+		goto err_ppgtt;
+	}
+
 	if (ppgtt->base.allocate_va_range) {
 		err = ppgtt->base.allocate_va_range(&ppgtt->base,
-						    0, ppgtt->base.total);
+						    0, ggtt->base.total);
 		if (err)
 			goto err_ppgtt;
 	}
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v2 22/22] drm/i915: Differentiate the aliasing_ppgtt with an invalid filp
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (19 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 21/22] drm/i915: Only preallocate the aliasing GTT to the extents of the global GTT Chris Wilson
@ 2017-02-10 19:38 ` Chris Wilson
  2017-02-13 15:07   ` Matthew Auld
  2017-02-14  9:47   ` [PATCH] drm/i915: Use preferred kernel types in i915_gem_gtt.c Chris Wilson
  2017-02-14 11:22 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() (rev3) Patchwork
  21 siblings, 2 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-10 19:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

Use an invalid filp so that the aliasing_ppgtt can be clearly
identified.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ae34a692bf87..75712cc47941 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2375,7 +2375,7 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
 	struct i915_hw_ppgtt *ppgtt;
 	int err;
 
-	ppgtt = i915_ppgtt_create(i915, NULL, "[alias]");
+	ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]");
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v2 20/22] drm/i915: Remove i915_address_space.start
  2017-02-10 19:38 ` [PATCH v2 20/22] drm/i915: Remove i915_address_space.start Chris Wilson
@ 2017-02-13 14:47   ` Matthew Auld
  0 siblings, 0 replies; 31+ messages in thread
From: Matthew Auld @ 2017-02-13 14:47 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Matthew Auld, Mika Kuoppala

On 10 February 2017 at 19:38, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Once upon a time, back in the UMS days, we supported userspace
> initialising the GTT and sharing portions of the GTT with other users.
> Now, we own the GTT (both global and per-process) and the tables always
> start at 0 - so we can remove i915_address_space.start and forget about
> this old complication.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2 03/22] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-10 19:38 ` [PATCH v2 03/22] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Chris Wilson
@ 2017-02-13 14:58   ` Mika Kuoppala
  2017-02-13 15:12     ` Chris Wilson
  2017-02-13 15:44   ` [PATCH v3] " Chris Wilson
  1 sibling, 1 reply; 31+ messages in thread
From: Mika Kuoppala @ 2017-02-13 14:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: matthew.auld

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Improve the sg iteration and in hte process eliminate a bug in
> miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
> full length of the sg table.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 157 +++++++++++++++++++-----------------
>  1 file changed, 82 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index ca1f5fa6984f..fcb8d635aec0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -751,9 +751,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  	unsigned int num_entries = gen8_pte_count(start, length);
>  	unsigned int pte = gen8_pte_index(start);
>  	unsigned int pte_end = pte + num_entries;
> -	gen8_pte_t *pt_vaddr;
> -	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
> -						 I915_CACHE_LLC);
> +	gen8_pte_t scratch_pte =
> +		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);

Please const the scratch_pte and pte_end while you are here.

> +	gen8_pte_t *vaddr;
>  
>  	if (WARN_ON(!px_page(pt)))
>  		return false;
> @@ -766,12 +766,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  			return true;
>  	}
>  
> -	pt_vaddr = kmap_px(pt);
> -
> +	vaddr = kmap_px(pt);
>  	while (pte < pte_end)
> -		pt_vaddr[pte++] = scratch_pte;
> -
> -	kunmap_px(ppgtt, pt_vaddr);
> +		vaddr[pte++] = scratch_pte;
> +	kunmap_px(ppgtt, vaddr);
>  
>  	return false;
>  }
> @@ -879,71 +877,93 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
>  		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
>  }
>  
> -static void
> -gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
> +struct sgt_dma {
> +	struct scatterlist *sg;
> +	dma_addr_t dma, max;
> +};
> +
> +static __always_inline bool
> +gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
>  			      struct i915_page_directory_pointer *pdp,
> -			      struct sg_page_iter *sg_iter,
> -			      uint64_t start,
> +			      struct sgt_dma *iter,
> +			      u64 start,
>  			      enum i915_cache_level cache_level)
>  {
> -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -	gen8_pte_t *pt_vaddr;
> -	unsigned pdpe = gen8_pdpe_index(start);
> -	unsigned pde = gen8_pde_index(start);
> -	unsigned pte = gen8_pte_index(start);
> +	unsigned int pdpe = gen8_pdpe_index(start);
> +	unsigned int pde = gen8_pde_index(start);
> +	unsigned int pte = gen8_pte_index(start);
> +	struct i915_page_directory *pd;
> +	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
> +	gen8_pte_t *vaddr;
> +	bool ret = true;
>  
> -	pt_vaddr = NULL;
> +	pd = pdp->page_directory[pdpe];
> +	vaddr = kmap_px(pd->page_table[pde]);
> +	do {
> +		vaddr[pte] = pte_encode | iter->dma;
> +		iter->dma += PAGE_SIZE;
> +		if (iter->dma >= iter->max) {
> +			iter->sg = __sg_next(iter->sg);
> +			if (!iter->sg) {
> +				ret = false;
> +				break;

We never exit with ret = true in this loop.

> +			}
>  
> -	while (__sg_page_iter_next(sg_iter)) {
> -		if (pt_vaddr == NULL) {
> -			struct i915_page_directory *pd = pdp->page_directory[pdpe];
> -			struct i915_page_table *pt = pd->page_table[pde];
> -			pt_vaddr = kmap_px(pt);
> +			iter->dma = sg_dma_address(iter->sg);
> +			iter->max = iter->dma + iter->sg->length;
>  		}
>  
> -		pt_vaddr[pte] =
> -			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
> -					cache_level);
>  		if (++pte == GEN8_PTES) {
> -			kunmap_px(ppgtt, pt_vaddr);
> -			pt_vaddr = NULL;
>  			if (++pde == I915_PDES) {
> -				if (++pdpe == I915_PDPES_PER_PDP(vm->i915))
> -					break;
> +				pd = pdp->page_directory[++pdpe];
>  				pde = 0;
>  			}
> +
> +			kunmap_px(ppgtt, vaddr);
> +			vaddr = kmap_px(pd->page_table[pde]);
>  			pte = 0;
>  		}
> -	}
> +	} while (1);
> +	kunmap_px(ppgtt, vaddr);
>  
> -	if (pt_vaddr)
> -		kunmap_px(ppgtt, pt_vaddr);
> +	return ret;
>  }
>  
> -static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
> -				      struct sg_table *pages,
> -				      uint64_t start,
> -				      enum i915_cache_level cache_level,
> -				      u32 unused)
> +static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
> +				   struct sg_table *pages,
> +				   uint64_t start,
> +				   enum i915_cache_level cache_level,
> +				   u32 unused)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -	struct sg_page_iter sg_iter;
> +	struct sgt_dma iter = {
> +		.sg = pages->sgl,
> +		.dma = sg_dma_address(iter.sg),
> +		.max = iter.dma + iter.sg->length,
> +	};
>  
> -	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
> +	gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter,
> +				      start, cache_level);
> +}
>  
> -	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
> -		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
> -					      cache_level);
> -	} else {
> -		struct i915_page_directory_pointer *pdp;
> -		uint64_t pml4e;
> -		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
> +static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
> +				   struct sg_table *pages,
> +				   uint64_t start,
> +				   enum i915_cache_level cache_level,
> +				   u32 unused)
> +{
> +	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> +	struct sgt_dma iter = {
> +		.sg = pages->sgl,
> +		.dma = sg_dma_address(iter.sg),
> +		.max = iter.dma + iter.sg->length,
> +	};
> +	struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
> +	unsigned int pml4e = gen8_pml4e_index(start);
>  
> -		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
> -			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
> -						      start, cache_level);
> -		}
> -	}
> +	while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter,
> +					     start, cache_level))
> +		;
>  }
>  
>  static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
> @@ -1591,7 +1611,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  	ppgtt->base.start = 0;
>  	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
>  	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
> -	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
>  	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
>  	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
>  	ppgtt->base.bind_vma = ppgtt_bind_vma;
> @@ -1606,6 +1625,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  
>  		ppgtt->base.total = 1ULL << 48;
>  		ppgtt->switch_mm = gen8_48b_mm_switch;
> +
> +		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
>  	} else {
>  		ret = __pdp_init(dev_priv, &ppgtt->pdp);
>  		if (ret)
> @@ -1622,6 +1643,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  			if (ret)
>  				goto free_scratch;
>  		}
> +
> +		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
>  	}
>  
>  	if (intel_vgpu_active(dev_priv))
> @@ -1888,11 +1911,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  	}
>  }
>  
> -struct sgt_dma {
> -	struct scatterlist *sg;
> -	dma_addr_t dma, max;
> -};
> -
>  static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  				      struct sg_table *pages,
>  				      uint64_t start,
> @@ -2434,26 +2452,15 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>  	struct sgt_iter sgt_iter;
>  	gen8_pte_t __iomem *gtt_entries;
> -	gen8_pte_t gtt_entry;
> +	gen8_pte_t pte_encode = gen8_pte_encode(0, level);

Const would do here also for consistency.

-Mika

>  	dma_addr_t addr;
> -	int i = 0;
> -
> -	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
>  
> -	for_each_sgt_dma(addr, sgt_iter, st) {
> -		gtt_entry = gen8_pte_encode(addr, level);
> -		gen8_set_pte(&gtt_entries[i++], gtt_entry);
> -	}
> +	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
> +	gtt_entries += start >> PAGE_SHIFT;
> +	for_each_sgt_dma(addr, sgt_iter, st)
> +		gen8_set_pte(gtt_entries++, pte_encode | addr);
>  
> -	/*
> -	 * XXX: This serves as a posting read to make sure that the PTE has
> -	 * actually been updated. There is some concern that even though
> -	 * registers and PTEs are within the same BAR that they are potentially
> -	 * of NUMA access patterns. Therefore, even with the way we assume
> -	 * hardware should work, we must keep this posting read for paranoia.
> -	 */
> -	if (i != 0)
> -		WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
> +	wmb();
>  
>  	/* This next bit makes the above posting read even more important. We
>  	 * want to flush the TLBs only after we're certain all the PTE updates
> -- 
> 2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2 22/22] drm/i915: Differentiate the aliasing_ppgtt with an invalid filp
  2017-02-10 19:38 ` [PATCH v2 22/22] drm/i915: Differentiate the aliasing_ppgtt with an invalid filp Chris Wilson
@ 2017-02-13 15:07   ` Matthew Auld
  2017-02-14  9:47   ` [PATCH] drm/i915: Use preferred kernel types in i915_gem_gtt.c Chris Wilson
  1 sibling, 0 replies; 31+ messages in thread
From: Matthew Auld @ 2017-02-13 15:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Matthew Auld, Mika Kuoppala

On 10 February 2017 at 19:38, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Use an invalid filp so that the aliasing_ppgtt can be clearly
> identified.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2 03/22] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-13 14:58   ` Mika Kuoppala
@ 2017-02-13 15:12     ` Chris Wilson
  0 siblings, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-13 15:12 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx, matthew.auld

On Mon, Feb 13, 2017 at 04:58:06PM +0200, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > +static __always_inline bool
> > +gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
> >  			      struct i915_page_directory_pointer *pdp,
> > -			      struct sg_page_iter *sg_iter,
> > -			      uint64_t start,
> > +			      struct sgt_dma *iter,
> > +			      u64 start,
> >  			      enum i915_cache_level cache_level)
> >  {
> > -	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> > -	gen8_pte_t *pt_vaddr;
> > -	unsigned pdpe = gen8_pdpe_index(start);
> > -	unsigned pde = gen8_pde_index(start);
> > -	unsigned pte = gen8_pte_index(start);
> > +	unsigned int pdpe = gen8_pdpe_index(start);
> > +	unsigned int pde = gen8_pde_index(start);
> > +	unsigned int pte = gen8_pte_index(start);
> > +	struct i915_page_directory *pd;
> > +	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
> > +	gen8_pte_t *vaddr;
> > +	bool ret = true;
> >  
> > -	pt_vaddr = NULL;
> > +	pd = pdp->page_directory[pdpe];
> > +	vaddr = kmap_px(pd->page_table[pde]);
> > +	do {
> > +		vaddr[pte] = pte_encode | iter->dma;
> > +		iter->dma += PAGE_SIZE;
> > +		if (iter->dma >= iter->max) {
> > +			iter->sg = __sg_next(iter->sg);
> > +			if (!iter->sg) {
> > +				ret = false;
> > +				break;
> 
> We never exit with ret = true in this loop.

Indeed, and I have a test now to prove it.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3] drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
  2017-02-10 19:38 ` [PATCH v2 03/22] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Chris Wilson
  2017-02-13 14:58   ` Mika Kuoppala
@ 2017-02-13 15:44   ` Chris Wilson
  1 sibling, 0 replies; 31+ messages in thread
From: Chris Wilson @ 2017-02-13 15:44 UTC (permalink / raw)
  To: intel-gfx; +Cc: matthew.auld, mika.kuoppala

Improve the sg iteration and in hte process eliminate a bug in
miscomputing the pml4 length as orig_nents<<PAGE_SHIFT is no longer the
full length of the sg table.

v2: Check for the end of the fourth level page table (the final pdpe)
and move onto the next.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 170 +++++++++++++++++++-----------------
 1 file changed, 91 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ca1f5fa6984f..ddfb5963f521 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -751,9 +751,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	unsigned int num_entries = gen8_pte_count(start, length);
 	unsigned int pte = gen8_pte_index(start);
 	unsigned int pte_end = pte + num_entries;
-	gen8_pte_t *pt_vaddr;
-	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-						 I915_CACHE_LLC);
+	const gen8_pte_t scratch_pte =
+		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
+	gen8_pte_t *vaddr;
 
 	if (WARN_ON(!px_page(pt)))
 		return false;
@@ -766,12 +766,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 			return true;
 	}
 
-	pt_vaddr = kmap_px(pt);
-
+	vaddr = kmap_px(pt);
 	while (pte < pte_end)
-		pt_vaddr[pte++] = scratch_pte;
-
-	kunmap_px(ppgtt, pt_vaddr);
+		vaddr[pte++] = scratch_pte;
+	kunmap_px(ppgtt, vaddr);
 
 	return false;
 }
@@ -879,71 +877,98 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
 }
 
-static void
-gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
+struct sgt_dma {
+	struct scatterlist *sg;
+	dma_addr_t dma, max;
+};
+
+static __always_inline bool
+gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 			      struct i915_page_directory_pointer *pdp,
-			      struct sg_page_iter *sg_iter,
-			      uint64_t start,
+			      struct sgt_dma *iter,
+			      u64 start,
 			      enum i915_cache_level cache_level)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	gen8_pte_t *pt_vaddr;
-	unsigned pdpe = gen8_pdpe_index(start);
-	unsigned pde = gen8_pde_index(start);
-	unsigned pte = gen8_pte_index(start);
+	unsigned int pdpe = gen8_pdpe_index(start);
+	unsigned int pde = gen8_pde_index(start);
+	unsigned int pte = gen8_pte_index(start);
+	struct i915_page_directory *pd;
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
+	gen8_pte_t *vaddr;
+	bool ret;
 
-	pt_vaddr = NULL;
+	pd = pdp->page_directory[pdpe];
+	vaddr = kmap_px(pd->page_table[pde]);
+	do {
+		vaddr[pte] = pte_encode | iter->dma;
+		iter->dma += PAGE_SIZE;
+		if (iter->dma >= iter->max) {
+			iter->sg = __sg_next(iter->sg);
+			if (!iter->sg) {
+				ret = false;
+				break;
+			}
 
-	while (__sg_page_iter_next(sg_iter)) {
-		if (pt_vaddr == NULL) {
-			struct i915_page_directory *pd = pdp->page_directory[pdpe];
-			struct i915_page_table *pt = pd->page_table[pde];
-			pt_vaddr = kmap_px(pt);
+			iter->dma = sg_dma_address(iter->sg);
+			iter->max = iter->dma + iter->sg->length;
 		}
 
-		pt_vaddr[pte] =
-			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
-					cache_level);
 		if (++pte == GEN8_PTES) {
-			kunmap_px(ppgtt, pt_vaddr);
-			pt_vaddr = NULL;
 			if (++pde == I915_PDES) {
-				if (++pdpe == I915_PDPES_PER_PDP(vm->i915))
+				if (++pdpe == GEN8_PML4ES_PER_PML4) {
+					ret = true;
 					break;
+				}
+
+				pd = pdp->page_directory[pdpe];
 				pde = 0;
 			}
+
+			kunmap_px(ppgtt, vaddr);
+			vaddr = kmap_px(pd->page_table[pde]);
 			pte = 0;
 		}
-	}
+	} while (1);
+	kunmap_px(ppgtt, vaddr);
 
-	if (pt_vaddr)
-		kunmap_px(ppgtt, pt_vaddr);
+	return ret;
 }
 
-static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
-				      struct sg_table *pages,
-				      uint64_t start,
-				      enum i915_cache_level cache_level,
-				      u32 unused)
+static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
+				   struct sg_table *pages,
+				   u64 start,
+				   enum i915_cache_level cache_level,
+				   u32 unused)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct sg_page_iter sg_iter;
+	struct sgt_dma iter = {
+		.sg = pages->sgl,
+		.dma = sg_dma_address(iter.sg),
+		.max = iter.dma + iter.sg->length,
+	};
 
-	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
+	gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter,
+				      start, cache_level);
+}
 
-	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
-		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
-					      cache_level);
-	} else {
-		struct i915_page_directory_pointer *pdp;
-		uint64_t pml4e;
-		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
+static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
+				   struct sg_table *pages,
+				   uint64_t start,
+				   enum i915_cache_level cache_level,
+				   u32 unused)
+{
+	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct sgt_dma iter = {
+		.sg = pages->sgl,
+		.dma = sg_dma_address(iter.sg),
+		.max = iter.dma + iter.sg->length,
+	};
+	struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
+	unsigned int pml4e = gen8_pml4e_index(start);
 
-		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
-			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
-						      start, cache_level);
-		}
-	}
+	while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter,
+					     start, cache_level))
+		;
 }
 
 static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
@@ -1525,8 +1550,8 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 	struct i915_address_space *vm = &ppgtt->base;
 	uint64_t start = ppgtt->base.start;
 	uint64_t length = ppgtt->base.total;
-	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-						 I915_CACHE_LLC);
+	const gen8_pte_t scratch_pte =
+		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
 
 	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
 		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
@@ -1591,7 +1616,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.start = 0;
 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
 	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
-	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
@@ -1606,6 +1630,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
 		ppgtt->base.total = 1ULL << 48;
 		ppgtt->switch_mm = gen8_48b_mm_switch;
+
+		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
 	} else {
 		ret = __pdp_init(dev_priv, &ppgtt->pdp);
 		if (ret)
@@ -1622,6 +1648,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 			if (ret)
 				goto free_scratch;
 		}
+
+		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
 	}
 
 	if (intel_vgpu_active(dev_priv))
@@ -1888,11 +1916,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	}
 }
 
-struct sgt_dma {
-	struct scatterlist *sg;
-	dma_addr_t dma, max;
-};
-
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      uint64_t start,
@@ -2434,26 +2457,15 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	struct sgt_iter sgt_iter;
 	gen8_pte_t __iomem *gtt_entries;
-	gen8_pte_t gtt_entry;
+	const gen8_pte_t pte_encode = gen8_pte_encode(0, level);
 	dma_addr_t addr;
-	int i = 0;
 
-	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
+	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
+	gtt_entries += start >> PAGE_SHIFT;
+	for_each_sgt_dma(addr, sgt_iter, st)
+		gen8_set_pte(gtt_entries++, pte_encode | addr);
 
-	for_each_sgt_dma(addr, sgt_iter, st) {
-		gtt_entry = gen8_pte_encode(addr, level);
-		gen8_set_pte(&gtt_entries[i++], gtt_entry);
-	}
-
-	/*
-	 * XXX: This serves as a posting read to make sure that the PTE has
-	 * actually been updated. There is some concern that even though
-	 * registers and PTEs are within the same BAR that they are potentially
-	 * of NUMA access patterns. Therefore, even with the way we assume
-	 * hardware should work, we must keep this posting read for paranoia.
-	 */
-	if (i != 0)
-		WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
+	wmb();
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -2541,7 +2553,9 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned num_entries = length >> PAGE_SHIFT;
-	gen8_pte_t scratch_pte, __iomem *gtt_base =
+	const gen8_pte_t scratch_pte =
+		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
+	gen8_pte_t __iomem *gtt_base =
 		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
 	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
 	int i;
@@ -2551,8 +2565,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 
-	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-				      I915_CACHE_LLC);
 	for (i = 0; i < num_entries; i++)
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 	readl(gtt_base);
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH] drm/i915: Use preferred kernel types in i915_gem_gtt.c
  2017-02-10 19:38 ` [PATCH v2 22/22] drm/i915: Differentiate the aliasing_ppgtt with an invalid filp Chris Wilson
  2017-02-13 15:07   ` Matthew Auld
@ 2017-02-14  9:47   ` Chris Wilson
  2017-02-14 12:01     ` Joonas Lahtinen
  1 sibling, 1 reply; 31+ messages in thread
From: Chris Wilson @ 2017-02-14  9:47 UTC (permalink / raw)
  To: intel-gfx

Make checkpatch happy and make the use of u32/u64 consistent throughout
i915_gem_gtt.[ch]

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 83 +++++++++++++++++++------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h | 56 ++++++++++++-------------
 2 files changed, 69 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9dd161a4a8a2..65892d6e3877 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -887,7 +887,7 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
 
 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
 				   struct sg_table *pages,
-				   uint64_t start,
+				   u64 start,
 				   enum i915_cache_level cache_level,
 				   u32 unused)
 {
@@ -1161,25 +1161,25 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 
 static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
 			  struct i915_page_directory_pointer *pdp,
-			  uint64_t start, uint64_t length,
+			  u64 start, u64 length,
 			  gen8_pte_t scratch_pte,
 			  struct seq_file *m)
 {
 	struct i915_page_directory *pd;
-	uint32_t pdpe;
+	u32 pdpe;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		struct i915_page_table *pt;
-		uint64_t pd_len = length;
-		uint64_t pd_start = start;
-		uint32_t pde;
+		u64 pd_len = length;
+		u64 pd_start = start;
+		u32 pde;
 
 		if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd)
 			continue;
 
 		seq_printf(m, "\tPDPE #%d\n", pdpe);
 		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
-			uint32_t  pte;
+			u32 pte;
 			gen8_pte_t *pt_vaddr;
 
 			if (pd->page_table[pde] == ppgtt->base.scratch_pt)
@@ -1187,10 +1187,9 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
 
 			pt_vaddr = kmap_atomic_px(pt);
 			for (pte = 0; pte < GEN8_PTES; pte += 4) {
-				uint64_t va =
-					(pdpe << GEN8_PDPE_SHIFT) |
-					(pde << GEN8_PDE_SHIFT) |
-					(pte << GEN8_PTE_SHIFT);
+				u64 va = (pdpe << GEN8_PDPE_SHIFT |
+					  pde << GEN8_PDE_SHIFT |
+					  pte << GEN8_PTE_SHIFT);
 				int i;
 				bool found = false;
 
@@ -1224,7 +1223,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
 		gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
 	} else {
-		uint64_t pml4e;
+		u64 pml4e;
 		struct i915_pml4 *pml4 = &ppgtt->pml4;
 		struct i915_page_directory_pointer *pdp;
 
@@ -1406,7 +1405,7 @@ static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt,
 /* Write all the page tables found in the ppgtt structure to incrementing page
  * directories. */
 static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
-				  uint32_t start, uint32_t length)
+				  u32 start, u32 length)
 {
 	struct i915_page_table *pt;
 	unsigned int pde;
@@ -1418,7 +1417,7 @@ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
 	wmb();
 }
 
-static inline uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
+static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 {
 	GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
 	return ppgtt->pd.base.ggtt_offset << 10;
@@ -1512,7 +1511,7 @@ static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv)
 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
-	uint32_t ecochk, ecobits;
+	u32 ecochk, ecobits;
 	enum intel_engine_id id;
 
 	ecobits = I915_READ(GAC_ECO_BITS);
@@ -1536,7 +1535,7 @@ static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
 
 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
 {
-	uint32_t ecochk, gab_ctl, ecobits;
+	u32 ecochk, gab_ctl, ecobits;
 
 	ecobits = I915_READ(GAC_ECO_BITS);
 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
@@ -1588,8 +1587,9 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
-				      uint64_t start,
-				      enum i915_cache_level cache_level, u32 flags)
+				      u64 start,
+				      enum i915_cache_level cache_level,
+				      u32 flags)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	unsigned first_entry = start >> PAGE_SHIFT;
@@ -1689,7 +1689,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory *pd = &ppgtt->pd;
 	struct i915_page_table *pt;
-	uint32_t pde;
+	u32 pde;
 
 	drm_mm_remove_node(&ppgtt->node);
 
@@ -1747,10 +1747,10 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
 }
 
 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
-				  uint64_t start, uint64_t length)
+				  u64 start, u64 length)
 {
 	struct i915_page_table *unused;
-	uint32_t pde;
+	u32 pde;
 
 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
 		ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
@@ -2044,7 +2044,7 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 
 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 				  dma_addr_t addr,
-				  uint64_t offset,
+				  u64 offset,
 				  enum i915_cache_level level,
 				  u32 unused)
 {
@@ -2059,8 +2059,9 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
-				     uint64_t start,
-				     enum i915_cache_level level, u32 unused)
+				     u64 start,
+				     enum i915_cache_level level,
+				     u32 unused)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	struct sgt_iter sgt_iter;
@@ -2085,7 +2086,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 struct insert_entries {
 	struct i915_address_space *vm;
 	struct sg_table *st;
-	uint64_t start;
+	u64 start;
 	enum i915_cache_level level;
 	u32 flags;
 };
@@ -2100,7 +2101,7 @@ static int gen8_ggtt_insert_entries__cb(void *_arg)
 
 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
 					  struct sg_table *st,
-					  uint64_t start,
+					  u64 start,
 					  enum i915_cache_level level,
 					  u32 flags)
 {
@@ -2110,7 +2111,7 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
 
 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
 				  dma_addr_t addr,
-				  uint64_t offset,
+				  u64 offset,
 				  enum i915_cache_level level,
 				  u32 flags)
 {
@@ -2131,8 +2132,9 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
  */
 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
-				     uint64_t start,
-				     enum i915_cache_level level, u32 flags)
+				     u64 start,
+				     enum i915_cache_level level,
+				     u32 flags)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
@@ -2151,12 +2153,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 }
 
 static void nop_clear_range(struct i915_address_space *vm,
-			    uint64_t start, uint64_t length)
+			    u64 start, u64 length)
 {
 }
 
 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
-				  uint64_t start, uint64_t length)
+				  u64 start, u64 length)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	unsigned first_entry = start >> PAGE_SHIFT;
@@ -2178,8 +2180,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 }
 
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
-				  uint64_t start,
-				  uint64_t length)
+				  u64 start, u64 length)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	unsigned first_entry = start >> PAGE_SHIFT;
@@ -2203,7 +2204,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 
 static void i915_ggtt_insert_page(struct i915_address_space *vm,
 				  dma_addr_t addr,
-				  uint64_t offset,
+				  u64 offset,
 				  enum i915_cache_level cache_level,
 				  u32 unused)
 {
@@ -2215,8 +2216,9 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm,
 
 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *pages,
-				     uint64_t start,
-				     enum i915_cache_level cache_level, u32 unused)
+				     u64 start,
+				     enum i915_cache_level cache_level,
+				     u32 unused)
 {
 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
@@ -2225,8 +2227,7 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 }
 
 static void i915_ggtt_clear_range(struct i915_address_space *vm,
-				  uint64_t start,
-				  uint64_t length)
+				  u64 start, u64 length)
 {
 	intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
 }
@@ -2635,7 +2636,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
  * writing this data shouldn't be harmful even in those cases. */
 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
 {
-	uint64_t pat;
+	u64 pat;
 
 	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
 	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
@@ -2670,7 +2671,7 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
 
 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
 {
-	uint64_t pat;
+	u64 pat;
 
 	/*
 	 * Map WB on BDW to snooped on CHV.
@@ -3067,7 +3068,7 @@ static noinline struct sg_table *
 intel_rotate_pages(struct intel_rotation_info *rot_info,
 		   struct drm_i915_gem_object *obj)
 {
-	const size_t n_pages = obj->base.size / PAGE_SIZE;
+	const unsigned long n_pages = obj->base.size / PAGE_SIZE;
 	unsigned int size = intel_rotation_info_size(rot_info);
 	struct sgt_iter sgt_iter;
 	dma_addr_t dma_addr;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index e85ff6c97208..f7d4e194a227 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -53,11 +53,11 @@
 struct drm_i915_file_private;
 struct drm_i915_fence_reg;
 
-typedef uint32_t gen6_pte_t;
-typedef uint64_t gen8_pte_t;
-typedef uint64_t gen8_pde_t;
-typedef uint64_t gen8_ppgtt_pdpe_t;
-typedef uint64_t gen8_ppgtt_pml4e_t;
+typedef u32 gen6_pte_t;
+typedef u64 gen8_pte_t;
+typedef u64 gen8_pde_t;
+typedef u64 gen8_ppgtt_pdpe_t;
+typedef u64 gen8_ppgtt_pml4e_t;
 
 #define ggtt_total_entries(ggtt) ((ggtt)->base.total >> PAGE_SHIFT)
 
@@ -143,7 +143,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
 #define GEN8_PPAT_WC			(1<<0)
 #define GEN8_PPAT_UC			(0<<0)
 #define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
-#define GEN8_PPAT(i, x)			((uint64_t) (x) << ((i) * 8))
+#define GEN8_PPAT(i, x)			((u64)(x) << ((i) * 8))
 
 struct sg_table;
 
@@ -210,7 +210,7 @@ struct i915_page_dma {
 		/* For gen6/gen7 only. This is the offset in the GGTT
 		 * where the page directory entries for PPGTT begin
 		 */
-		uint32_t ggtt_offset;
+		u32 ggtt_offset;
 	};
 };
 
@@ -305,20 +305,19 @@ struct i915_address_space {
 	/* flags for pte_encode */
 #define PTE_READ_ONLY	(1<<0)
 	int (*allocate_va_range)(struct i915_address_space *vm,
-				 uint64_t start,
-				 uint64_t length);
+				 u64 start, u64 length);
 	void (*clear_range)(struct i915_address_space *vm,
-			    uint64_t start,
-			    uint64_t length);
+			    u64 start, u64 length);
 	void (*insert_page)(struct i915_address_space *vm,
 			    dma_addr_t addr,
-			    uint64_t offset,
+			    u64 offset,
 			    enum i915_cache_level cache_level,
 			    u32 flags);
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
-			       uint64_t start,
-			       enum i915_cache_level cache_level, u32 flags);
+			       u64 start,
+			       enum i915_cache_level cache_level,
+			       u32 flags);
 	void (*cleanup)(struct i915_address_space *vm);
 	/** Unmap an object from an address space. This usually consists of
 	 * setting the valid PTE entries to a reserved scratch page. */
@@ -411,9 +410,9 @@ struct i915_hw_ppgtt {
 		(pt = (pd)->page_table[iter], true);			\
 	     ++iter)
 
-static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
+static inline u32 i915_pte_index(u64 address, unsigned int pde_shift)
 {
-	const uint32_t mask = NUM_PTE(pde_shift) - 1;
+	const u32 mask = NUM_PTE(pde_shift) - 1;
 
 	return (address >> PAGE_SHIFT) & mask;
 }
@@ -422,11 +421,10 @@ static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
  * does not cross a page table boundary, so the max value would be
  * GEN6_PTES for GEN6, and GEN8_PTES for GEN8.
 */
-static inline uint32_t i915_pte_count(uint64_t addr, size_t length,
-				      uint32_t pde_shift)
+static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift)
 {
-	const uint64_t mask = ~((1ULL << pde_shift) - 1);
-	uint64_t end;
+	const u64 mask = ~((1ULL << pde_shift) - 1);
+	u64 end;
 
 	WARN_ON(length == 0);
 	WARN_ON(offset_in_page(addr|length));
@@ -439,22 +437,22 @@ static inline uint32_t i915_pte_count(uint64_t addr, size_t length,
 	return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
 }
 
-static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift)
+static inline u32 i915_pde_index(u64 addr, u32 shift)
 {
 	return (addr >> shift) & I915_PDE_MASK;
 }
 
-static inline uint32_t gen6_pte_index(uint32_t addr)
+static inline u32 gen6_pte_index(u32 addr)
 {
 	return i915_pte_index(addr, GEN6_PDE_SHIFT);
 }
 
-static inline size_t gen6_pte_count(uint32_t addr, uint32_t length)
+static inline u32 gen6_pte_count(u32 addr, u32 length)
 {
 	return i915_pte_count(addr, length, GEN6_PDE_SHIFT);
 }
 
-static inline uint32_t gen6_pde_index(uint32_t addr)
+static inline u32 gen6_pde_index(u32 addr)
 {
 	return i915_pde_index(addr, GEN6_PDE_SHIFT);
 }
@@ -487,27 +485,27 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
 		    temp = min(temp - start, length);			\
 		    start += temp, length -= temp; }), ++iter)
 
-static inline uint32_t gen8_pte_index(uint64_t address)
+static inline u32 gen8_pte_index(u64 address)
 {
 	return i915_pte_index(address, GEN8_PDE_SHIFT);
 }
 
-static inline uint32_t gen8_pde_index(uint64_t address)
+static inline u32 gen8_pde_index(u64 address)
 {
 	return i915_pde_index(address, GEN8_PDE_SHIFT);
 }
 
-static inline uint32_t gen8_pdpe_index(uint64_t address)
+static inline u32 gen8_pdpe_index(u64 address)
 {
 	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
 }
 
-static inline uint32_t gen8_pml4e_index(uint64_t address)
+static inline u32 gen8_pml4e_index(u64 address)
 {
 	return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
 }
 
-static inline size_t gen8_pte_count(uint64_t address, uint64_t length)
+static inline u64 gen8_pte_count(u64 address, u64 length)
 {
 	return i915_pte_count(address, length, GEN8_PDE_SHIFT);
 }
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v2,01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() (rev3)
  2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
                   ` (20 preceding siblings ...)
  2017-02-10 19:38 ` [PATCH v2 22/22] drm/i915: Differentiate the aliasing_ppgtt with an invalid filp Chris Wilson
@ 2017-02-14 11:22 ` Patchwork
  21 siblings, 0 replies; 31+ messages in thread
From: Patchwork @ 2017-02-14 11:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v2,01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() (rev3)
URL   : https://patchwork.freedesktop.org/series/19475/
State : success

== Summary ==

Series 19475v3 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/19475/revisions/3/mbox/

Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                pass       -> INCOMPLETE (fi-ilk-650) FDO#99732

FDO#99732 https://bugs.freedesktop.org/show_bug.cgi?id=99732

fi-bdw-5557u     total:252  pass:241  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:252  pass:213  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:252  pass:233  dwarn:0   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:252  pass:225  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:252  pass:221  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-ilk-650       total:209  pass:165  dwarn:0   dfail:0   fail:0   skip:43 
fi-ivb-3520m     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:252  pass:235  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:252  pass:230  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:252  pass:224  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:252  pass:223  dwarn:0   dfail:0   fail:0   skip:29 

37f3b76e282e6d7fdd7d6c85d782298af1a0793b drm-tip: 2017y-02m-14d-09h-39m-22s UTC integration manifest
80d4fa2 drm/i915: Split ggtt/alasing_gtt unbind_vma
b3d3277 drm/i915: Don't special case teardown of aliasing_ppgtt
99edb46 drm/i915: Micro-optimise gen8_ppgtt_insert_entries()
d44e410 drm/i915: Micro-optimise gen6_ppgtt_insert_entries()
cf1ece2 drm/i915: Micro-optimise i915_get_ggtt_vma_pages()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3797/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH] drm/i915: Use preferred kernel types in i915_gem_gtt.c
  2017-02-14  9:47   ` [PATCH] drm/i915: Use preferred kernel types in i915_gem_gtt.c Chris Wilson
@ 2017-02-14 12:01     ` Joonas Lahtinen
  0 siblings, 0 replies; 31+ messages in thread
From: Joonas Lahtinen @ 2017-02-14 12:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ti, 2017-02-14 at 09:47 +0000, Chris Wilson wrote:
> Make checkpatch happy and make the use of u32/u64 consistent throughout
> i915_gem_gtt.[ch]
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Yes please.

Reviewed-by: Joonas Lahtien <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2 21/22] drm/i915: Only preallocate the aliasing GTT to the extents of the global GTT
  2017-02-10 19:38 ` [PATCH v2 21/22] drm/i915: Only preallocate the aliasing GTT to the extents of the global GTT Chris Wilson
@ 2017-02-14 15:56   ` Matthew Auld
  0 siblings, 0 replies; 31+ messages in thread
From: Matthew Auld @ 2017-02-14 15:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Intel Graphics Development, Matthew Auld, Mika Kuoppala

On 10 February 2017 at 19:38, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> As the aliasing GTT is only accessed via the global GTT, we will never
> use more of it than we expose via the Global GTT and so we only need to
> preallocate sufficient space within the ppgtt for the full GTT. Equally,
> if the aliasing GTT is smaller than the global GTT, we have a serious
> issue and must bail.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2017-02-14 15:56 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-10 19:38 [PATCH v2 01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
2017-02-10 19:38 ` [PATCH v2 02/22] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Chris Wilson
2017-02-10 19:38 ` [PATCH v2 03/22] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Chris Wilson
2017-02-13 14:58   ` Mika Kuoppala
2017-02-13 15:12     ` Chris Wilson
2017-02-13 15:44   ` [PATCH v3] " Chris Wilson
2017-02-10 19:38 ` [PATCH v2 04/22] drm/i915: Don't special case teardown of aliasing_ppgtt Chris Wilson
2017-02-10 19:38 ` [PATCH v2 05/22] drm/i915: Split ggtt/alasing_gtt unbind_vma Chris Wilson
2017-02-10 19:38 ` [PATCH v2 06/22] drm/i915: Convert clflushed pagetables over to WC maps Chris Wilson
2017-02-10 19:38 ` [PATCH v2 07/22] drm/i915: Remove kmap/kunmap wrappers Chris Wilson
2017-02-10 19:38 ` [PATCH v2 08/22] drm/i915: Move allocate_va_range to GTT Chris Wilson
2017-02-10 19:38 ` [PATCH v2 09/22] drm/i915: Always preallocate gen6/7 ppgtt Chris Wilson
2017-02-10 19:38 ` [PATCH v2 10/22] drm/i915: Remove redundant clear of appgtt Chris Wilson
2017-02-10 19:38 ` [PATCH v2 11/22] drm/i915: Tidy gen6_write_pde() Chris Wilson
2017-02-10 19:38 ` [PATCH v2 12/22] drm/i915: Remove bitmap tracking for used-ptes Chris Wilson
2017-02-10 19:38 ` [PATCH v2 13/22] drm/i915: Remove bitmap tracking for used-pdes Chris Wilson
2017-02-10 19:38 ` [PATCH v2 14/22] drm/i915: Remove bitmap tracking for used-pdpes Chris Wilson
2017-02-10 19:38 ` [PATCH v2 15/22] drm/i915: Remove bitmap tracking for used-pml4 Chris Wilson
2017-02-10 19:38 ` [PATCH v2 16/22] drm/i915: Remove superfluous posting reads after clear GGTT Chris Wilson
2017-02-10 19:38 ` [PATCH v2 17/22] drm/i915: Always mark the PDP as dirty when altered Chris Wilson
2017-02-10 19:38 ` [PATCH v2 18/22] drm/i915: Remove defunct GTT tracepoints Chris Wilson
2017-02-10 19:38 ` [PATCH v2 19/22] drm/i915: Remove unused ppgtt->enable() Chris Wilson
2017-02-10 19:38 ` [PATCH v2 20/22] drm/i915: Remove i915_address_space.start Chris Wilson
2017-02-13 14:47   ` Matthew Auld
2017-02-10 19:38 ` [PATCH v2 21/22] drm/i915: Only preallocate the aliasing GTT to the extents of the global GTT Chris Wilson
2017-02-14 15:56   ` Matthew Auld
2017-02-10 19:38 ` [PATCH v2 22/22] drm/i915: Differentiate the aliasing_ppgtt with an invalid filp Chris Wilson
2017-02-13 15:07   ` Matthew Auld
2017-02-14  9:47   ` [PATCH] drm/i915: Use preferred kernel types in i915_gem_gtt.c Chris Wilson
2017-02-14 12:01     ` Joonas Lahtinen
2017-02-14 11:22 ` ✓ Fi.CI.BAT: success for series starting with [v2,01/22] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() (rev3) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.