All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range
@ 2016-10-04 13:54 Michał Winiarski
  2016-10-04 13:54 ` [PATCH 2/3] drm/i915/gtt: Free unused lower-level page tables Michał Winiarski
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Michał Winiarski @ 2016-10-04 13:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

Let's use more top-down approach, where each gen8_ppgtt_clear_* function
is responsible for clearing the struct passed as an argument and calling
relevant clear_range functions on lower-level tables.
Doing this rather than operating on PTE ranges makes the implementation
of shrinking page tables quite simple.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++++++++++++---------------
 1 file changed, 66 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0bb4232..6086122 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -706,59 +706,84 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
 }
 
-static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
-				       struct i915_page_directory_pointer *pdp,
-				       uint64_t start,
-				       uint64_t length,
-				       gen8_pte_t scratch_pte)
+static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
+				struct i915_page_table *pt,
+				uint64_t start,
+				uint64_t length,
+				bool use_scratch)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
+	unsigned int pte_start = gen8_pte_index(start);
+	unsigned int num_entries = min(gen8_pte_count(start, length),
+				       GEN8_PTES);
+	uint64_t pte;
+
 	gen8_pte_t *pt_vaddr;
-	unsigned pdpe = gen8_pdpe_index(start);
-	unsigned pde = gen8_pde_index(start);
-	unsigned pte = gen8_pte_index(start);
-	unsigned num_entries = length >> PAGE_SHIFT;
-	unsigned last_pte, i;
+	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
+						 I915_CACHE_LLC, use_scratch);
 
-	if (WARN_ON(!pdp))
+	if (WARN_ON(!px_page(pt)))
 		return;
 
-	while (num_entries) {
-		struct i915_page_directory *pd;
-		struct i915_page_table *pt;
+	bitmap_clear(pt->used_ptes, pte_start, num_entries);
 
-		if (WARN_ON(!pdp->page_directory[pdpe]))
-			break;
+	pt_vaddr = kmap_px(pt);
 
-		pd = pdp->page_directory[pdpe];
+	for (pte = pte_start; pte < num_entries; pte++)
+		pt_vaddr[pte] = scratch_pte;
 
+	kunmap_px(ppgtt, pt_vaddr);
+}
+
+static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+				struct i915_page_directory *pd,
+				uint64_t start,
+				uint64_t length,
+				bool use_scratch)
+{
+	struct i915_page_table *pt;
+	uint64_t pde;
+
+	gen8_for_each_pde(pt, pd, start, length, pde) {
 		if (WARN_ON(!pd->page_table[pde]))
 			break;
 
-		pt = pd->page_table[pde];
+		gen8_ppgtt_clear_pt(vm, pt, start, length, use_scratch);
+	}
+}
 
-		if (WARN_ON(!px_page(pt)))
-			break;
+static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+				 struct i915_page_directory_pointer *pdp,
+				 uint64_t start,
+				 uint64_t length,
+				 bool use_scratch)
+{
+	struct i915_page_directory *pd;
+	uint64_t pdpe;
 
-		last_pte = pte + num_entries;
-		if (last_pte > GEN8_PTES)
-			last_pte = GEN8_PTES;
+	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
+		if (WARN_ON(!pdp->page_directory[pdpe]))
+			break;
 
-		pt_vaddr = kmap_px(pt);
+		gen8_ppgtt_clear_pd(vm, pd, start, length, use_scratch);
+	}
+}
 
-		for (i = pte; i < last_pte; i++) {
-			pt_vaddr[i] = scratch_pte;
-			num_entries--;
-		}
+static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
+				  struct i915_pml4 *pml4,
+				  uint64_t start,
+				  uint64_t length,
+				  bool use_scratch)
+{
+	struct i915_page_directory_pointer *pdp;
+	uint64_t pml4e;
 
-		kunmap_px(ppgtt, pt_vaddr);
+	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
+		if (WARN_ON(!pml4->pdps[pml4e]))
+			break;
 
-		pte = 0;
-		if (++pde == I915_PDES) {
-			if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
-				break;
-			pde = 0;
-		}
+		gen8_ppgtt_clear_pdp(vm, pdp, start, length, use_scratch);
 	}
 }
 
@@ -768,21 +793,13 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 				   bool use_scratch)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-						 I915_CACHE_LLC, use_scratch);
 
-	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
-		gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
-					   scratch_pte);
-	} else {
-		uint64_t pml4e;
-		struct i915_page_directory_pointer *pdp;
-
-		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
-			gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
-						   scratch_pte);
-		}
-	}
+	if (!USES_FULL_48BIT_PPGTT(vm->dev))
+		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length,
+				     use_scratch);
+	else
+		gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length,
+				      use_scratch);
 }
 
 static void
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] drm/i915/gtt: Free unused lower-level page tables
  2016-10-04 13:54 [PATCH 1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Michał Winiarski
@ 2016-10-04 13:54 ` Michał Winiarski
  2016-10-05  6:40   ` Joonas Lahtinen
  2016-10-04 13:54 ` [PATCH 3/3] drm/i915: Remove unused "valid" parameter from pte_encode Michał Winiarski
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Michał Winiarski @ 2016-10-04 13:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

Since "Dynamic page table allocations" were introduced, our page tables
can grow (being dynamically allocated) with address space range usage.
Unfortunately, their lifetime is bound to vm. This is not a huge problem
when we're not using softpin - drm_mm is creating an upper bound on used
range by causing addresses for our VMAs to eventually be reused.

With softpin, long lived contexts can drain the system out of memory
even with a single "small" object. For example:

bo = bo_alloc(size);
while(true)
    offset += size;
    exec(bo, offset);

Will cause us to create new allocations until all memory in the system
is used for tracking GPU pages (even though almost all PTEs in this vm
are pointing to scratch).

Let's free unused page tables in clear_range to prevent this - if no
entries are used, we can safely free it and return this information to
the caller (so that higher-level entry is pointing to scratch).

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 103 ++++++++++++++++++++++++++++++------
 1 file changed, 88 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6086122..281e349 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -218,9 +218,10 @@ static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
 }
 
 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
-				  const enum i915_cache_level level)
+				  const enum i915_cache_level level,
+				  bool valid)
 {
-	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
+	gen8_pde_t pde = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
 	pde |= addr;
 	if (level != I915_CACHE_NONE)
 		pde |= PPAT_CACHED_PDE_INDEX;
@@ -532,7 +533,8 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
 {
 	gen8_pde_t scratch_pde;
 
-	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
+	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC,
+				      true);
 
 	fill_px(vm->dev, pd, scratch_pde);
 }
@@ -613,7 +615,8 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
 {
 	gen8_ppgtt_pdpe_t scratch_pdpe;
 
-	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
+	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC,
+					true);
 
 	fill_px(vm->dev, pdp, scratch_pdpe);
 }
@@ -624,7 +627,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
 	gen8_ppgtt_pml4e_t scratch_pml4e;
 
 	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
-					  I915_CACHE_LLC);
+					  I915_CACHE_LLC, true);
 
 	fill_px(vm->dev, pml4, scratch_pml4e);
 }
@@ -641,7 +644,8 @@ gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
 		return;
 
 	page_directorypo = kmap_px(pdp);
-	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
+	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC,
+						   true);
 	kunmap_px(ppgtt, page_directorypo);
 }
 
@@ -654,7 +658,7 @@ gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
 	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
 
 	WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
-	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
+	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC, true);
 	kunmap_px(ppgtt, pagemap);
 }
 
@@ -706,7 +710,7 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
 }
 
-static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
+static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 				struct i915_page_table *pt,
 				uint64_t start,
 				uint64_t length,
@@ -724,50 +728,102 @@ static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 						 I915_CACHE_LLC, use_scratch);
 
 	if (WARN_ON(!px_page(pt)))
-		return;
+		return false;
 
 	bitmap_clear(pt->used_ptes, pte_start, num_entries);
 
+	if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
+		free_pt(vm->dev, pt);
+		return true;
+	}
+
 	pt_vaddr = kmap_px(pt);
 
 	for (pte = pte_start; pte < num_entries; pte++)
 		pt_vaddr[pte] = scratch_pte;
 
 	kunmap_px(ppgtt, pt_vaddr);
+
+	return false;
 }
 
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
+static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 				struct i915_page_directory *pd,
 				uint64_t start,
 				uint64_t length,
 				bool use_scratch)
 {
+	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
 	struct i915_page_table *pt;
 	uint64_t pde;
 
+	gen8_pde_t *pde_vaddr;
+	gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
+						 I915_CACHE_LLC, use_scratch);
+	bool reduce;
+
 	gen8_for_each_pde(pt, pd, start, length, pde) {
 		if (WARN_ON(!pd->page_table[pde]))
 			break;
 
-		gen8_ppgtt_clear_pt(vm, pt, start, length, use_scratch);
+		reduce = gen8_ppgtt_clear_pt(vm, pt, start, length,
+					     use_scratch);
+
+		if (reduce) {
+			__clear_bit(pde, pd->used_pdes);
+			pde_vaddr = kmap_px(pd);
+			pde_vaddr[pde] = scratch_pde;
+			kunmap_px(ppgtt, pde_vaddr);
+		}
+	}
+
+	if (bitmap_empty(pd->used_pdes, I915_PDES)) {
+		free_pd(vm->dev, pd);
+		return true;
 	}
+
+	return false;
 }
 
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
+static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 				 struct i915_page_directory_pointer *pdp,
 				 uint64_t start,
 				 uint64_t length,
 				 bool use_scratch)
 {
+	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
 	struct i915_page_directory *pd;
 	uint64_t pdpe;
 
+	gen8_ppgtt_pdpe_t *pdpe_vaddr;
+	gen8_ppgtt_pdpe_t scratch_pdpe =
+		gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC,
+				 use_scratch);
+	bool reduce;
+
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		if (WARN_ON(!pdp->page_directory[pdpe]))
 			break;
 
-		gen8_ppgtt_clear_pd(vm, pd, start, length, use_scratch);
+		reduce = gen8_ppgtt_clear_pd(vm, pd, start, length,
+					     use_scratch);
+
+		if (reduce) {
+			__clear_bit(pdpe, pdp->used_pdpes);
+			pdpe_vaddr = kmap_px(pdp);
+			pdpe_vaddr[pdpe] = scratch_pdpe;
+			kunmap_px(ppgtt, pdpe_vaddr);
+		}
+	}
+
+	if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(vm->dev))) {
+		free_pdp(vm->dev, pdp);
+		return true;
 	}
+
+	return false;
 }
 
 static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
@@ -776,14 +832,30 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
 				  uint64_t length,
 				  bool use_scratch)
 {
+	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
 	struct i915_page_directory_pointer *pdp;
 	uint64_t pml4e;
 
+	gen8_ppgtt_pml4e_t *pml4e_vaddr;
+	gen8_ppgtt_pml4e_t scratch_pml4e =
+		gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC,
+				  use_scratch);
+	bool reduce;
+
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 		if (WARN_ON(!pml4->pdps[pml4e]))
 			break;
 
-		gen8_ppgtt_clear_pdp(vm, pdp, start, length, use_scratch);
+		reduce = gen8_ppgtt_clear_pdp(vm, pdp, start, length,
+					      use_scratch);
+
+		if (reduce) {
+			__clear_bit(pml4e, pml4->used_pml4es);
+			pml4e_vaddr = kmap_px(pml4);
+			pml4e_vaddr[pml4e] = scratch_pml4e;
+			kunmap_px(ppgtt, pml4e_vaddr);
+		}
 	}
 }
 
@@ -1310,7 +1382,8 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 
 			/* Map the PDE to the page table */
 			page_directory[pde] = gen8_pde_encode(px_dma(pt),
-							      I915_CACHE_LLC);
+							      I915_CACHE_LLC,
+							      true);
 			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
 							gen8_pte_index(start),
 							gen8_pte_count(start, length),
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] drm/i915: Remove unused "valid" parameter from pte_encode
  2016-10-04 13:54 [PATCH 1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Michał Winiarski
  2016-10-04 13:54 ` [PATCH 2/3] drm/i915/gtt: Free unused lower-level page tables Michał Winiarski
@ 2016-10-04 13:54 ` Michał Winiarski
  2016-10-05  5:48   ` Joonas Lahtinen
  2016-10-05  8:49   ` Mika Kuoppala
  2016-10-04 14:26 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Patchwork
  2016-10-05  5:44 ` [PATCH 1/3] " Joonas Lahtinen
  3 siblings, 2 replies; 9+ messages in thread
From: Michał Winiarski @ 2016-10-04 13:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

We're no longer using any invalid PTEs - everything that's not used
should be pointing to scratch.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c            |   6 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 154 +++++++++++------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h        |   5 +-
 4 files changed, 65 insertions(+), 103 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1418c1c..b344340 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -919,8 +919,7 @@ out_unpin:
 	if (node.allocated) {
 		wmb();
 		ggtt->base.clear_range(&ggtt->base,
-				       node.start, node.size,
-				       true);
+				       node.start, node.size);
 		i915_gem_object_unpin_pages(obj);
 		remove_mappable_node(&node);
 	} else {
@@ -1228,8 +1227,7 @@ out_unpin:
 	if (node.allocated) {
 		wmb();
 		ggtt->base.clear_range(&ggtt->base,
-				       node.start, node.size,
-				       true);
+				       node.start, node.size);
 		i915_gem_object_unpin_pages(obj);
 		remove_mappable_node(&node);
 	} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 33c8522..54b091b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -370,8 +370,7 @@ static void reloc_cache_fini(struct reloc_cache *cache)
 
 			ggtt->base.clear_range(&ggtt->base,
 					       cache->node.start,
-					       cache->node.size,
-					       true);
+					       cache->node.size);
 			drm_mm_remove_node(&cache->node);
 		} else {
 			i915_vma_unpin((struct i915_vma *)cache->node.mm);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 281e349..fa78bb1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -191,15 +191,13 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
 	vma->vm->clear_range(vma->vm,
 			     vma->node.start,
-			     vma->size,
-			     true);
+			     vma->size);
 }
 
 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
-				  enum i915_cache_level level,
-				  bool valid)
+				  enum i915_cache_level level)
 {
-	gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
+	gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
 	pte |= addr;
 
 	switch (level) {
@@ -218,10 +216,9 @@ static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
 }
 
 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
-				  const enum i915_cache_level level,
-				  bool valid)
+				  const enum i915_cache_level level)
 {
-	gen8_pde_t pde = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
+	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
 	pde |= addr;
 	if (level != I915_CACHE_NONE)
 		pde |= PPAT_CACHED_PDE_INDEX;
@@ -235,9 +232,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
 
 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
 				 enum i915_cache_level level,
-				 bool valid, u32 unused)
+				 u32 unused)
 {
-	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
+	gen6_pte_t pte = GEN6_PTE_VALID;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
 
 	switch (level) {
@@ -257,9 +254,9 @@ static gen6_pte_t snb_pte_encode(dma_addr_t addr,
 
 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
 				 enum i915_cache_level level,
-				 bool valid, u32 unused)
+				 u32 unused)
 {
-	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
+	gen6_pte_t pte = GEN6_PTE_VALID;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
 
 	switch (level) {
@@ -281,9 +278,9 @@ static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
 
 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
 				 enum i915_cache_level level,
-				 bool valid, u32 flags)
+				 u32 flags)
 {
-	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
+	gen6_pte_t pte = GEN6_PTE_VALID;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
 
 	if (!(flags & PTE_READ_ONLY))
@@ -297,9 +294,9 @@ static gen6_pte_t byt_pte_encode(dma_addr_t addr,
 
 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
 				 enum i915_cache_level level,
-				 bool valid, u32 unused)
+				 u32 unused)
 {
-	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
+	gen6_pte_t pte = GEN6_PTE_VALID;
 	pte |= HSW_PTE_ADDR_ENCODE(addr);
 
 	if (level != I915_CACHE_NONE)
@@ -310,9 +307,9 @@ static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
 
 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
 				  enum i915_cache_level level,
-				  bool valid, u32 unused)
+				  u32 unused)
 {
-	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
+	gen6_pte_t pte = GEN6_PTE_VALID;
 	pte |= HSW_PTE_ADDR_ENCODE(addr);
 
 	switch (level) {
@@ -473,7 +470,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
 	gen8_pte_t scratch_pte;
 
 	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-				      I915_CACHE_LLC, true);
+				      I915_CACHE_LLC);
 
 	fill_px(vm->dev, pt, scratch_pte);
 }
@@ -486,7 +483,7 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
 	WARN_ON(vm->scratch_page.daddr == 0);
 
 	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
-				     I915_CACHE_LLC, true, 0);
+				     I915_CACHE_LLC, 0);
 
 	fill32_px(vm->dev, pt, scratch_pte);
 }
@@ -533,8 +530,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
 {
 	gen8_pde_t scratch_pde;
 
-	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC,
-				      true);
+	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
 
 	fill_px(vm->dev, pd, scratch_pde);
 }
@@ -615,8 +611,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
 {
 	gen8_ppgtt_pdpe_t scratch_pdpe;
 
-	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC,
-					true);
+	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
 
 	fill_px(vm->dev, pdp, scratch_pdpe);
 }
@@ -627,7 +622,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
 	gen8_ppgtt_pml4e_t scratch_pml4e;
 
 	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
-					  I915_CACHE_LLC, true);
+					  I915_CACHE_LLC);
 
 	fill_px(vm->dev, pml4, scratch_pml4e);
 }
@@ -644,8 +639,7 @@ gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
 		return;
 
 	page_directorypo = kmap_px(pdp);
-	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC,
-						   true);
+	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
 	kunmap_px(ppgtt, page_directorypo);
 }
 
@@ -658,7 +652,7 @@ gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
 	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
 
 	WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
-	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC, true);
+	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
 	kunmap_px(ppgtt, pagemap);
 }
 
@@ -713,8 +707,7 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 				struct i915_page_table *pt,
 				uint64_t start,
-				uint64_t length,
-				bool use_scratch)
+				uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 
@@ -725,7 +718,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 
 	gen8_pte_t *pt_vaddr;
 	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-						 I915_CACHE_LLC, use_scratch);
+						 I915_CACHE_LLC);
 
 	if (WARN_ON(!px_page(pt)))
 		return false;
@@ -750,8 +743,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 				struct i915_page_directory *pd,
 				uint64_t start,
-				uint64_t length,
-				bool use_scratch)
+				uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 
@@ -760,15 +752,14 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 
 	gen8_pde_t *pde_vaddr;
 	gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
-						 I915_CACHE_LLC, use_scratch);
+						 I915_CACHE_LLC);
 	bool reduce;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
 		if (WARN_ON(!pd->page_table[pde]))
 			break;
 
-		reduce = gen8_ppgtt_clear_pt(vm, pt, start, length,
-					     use_scratch);
+		reduce = gen8_ppgtt_clear_pt(vm, pt, start, length);
 
 		if (reduce) {
 			__clear_bit(pde, pd->used_pdes);
@@ -789,8 +780,7 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 				 struct i915_page_directory_pointer *pdp,
 				 uint64_t start,
-				 uint64_t length,
-				 bool use_scratch)
+				 uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 
@@ -799,16 +789,14 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 
 	gen8_ppgtt_pdpe_t *pdpe_vaddr;
 	gen8_ppgtt_pdpe_t scratch_pdpe =
-		gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC,
-				 use_scratch);
+		gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
 	bool reduce;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		if (WARN_ON(!pdp->page_directory[pdpe]))
 			break;
 
-		reduce = gen8_ppgtt_clear_pd(vm, pd, start, length,
-					     use_scratch);
+		reduce = gen8_ppgtt_clear_pd(vm, pd, start, length);
 
 		if (reduce) {
 			__clear_bit(pdpe, pdp->used_pdpes);
@@ -829,8 +817,7 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
 				  struct i915_pml4 *pml4,
 				  uint64_t start,
-				  uint64_t length,
-				  bool use_scratch)
+				  uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 
@@ -839,16 +826,14 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
 
 	gen8_ppgtt_pml4e_t *pml4e_vaddr;
 	gen8_ppgtt_pml4e_t scratch_pml4e =
-		gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC,
-				  use_scratch);
+		gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC);
 	bool reduce;
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
 		if (WARN_ON(!pml4->pdps[pml4e]))
 			break;
 
-		reduce = gen8_ppgtt_clear_pdp(vm, pdp, start, length,
-					      use_scratch);
+		reduce = gen8_ppgtt_clear_pdp(vm, pdp, start, length);
 
 		if (reduce) {
 			__clear_bit(pml4e, pml4->used_pml4es);
@@ -860,18 +845,14 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
 }
 
 static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
-				   uint64_t start,
-				   uint64_t length,
-				   bool use_scratch)
+				   uint64_t start, uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 
 	if (!USES_FULL_48BIT_PPGTT(vm->dev))
-		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length,
-				     use_scratch);
+		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
 	else
-		gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length,
-				      use_scratch);
+		gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length);
 }
 
 static void
@@ -898,7 +879,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
 
 		pt_vaddr[pte] =
 			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
-					cache_level, true);
+					cache_level);
 		if (++pte == GEN8_PTES) {
 			kunmap_px(ppgtt, pt_vaddr);
 			pt_vaddr = NULL;
@@ -1382,8 +1363,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 
 			/* Map the PDE to the page table */
 			page_directory[pde] = gen8_pde_encode(px_dma(pt),
-							      I915_CACHE_LLC,
-							      true);
+							      I915_CACHE_LLC);
 			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
 							gen8_pte_index(start),
 							gen8_pte_count(start, length),
@@ -1542,7 +1522,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 	uint64_t start = ppgtt->base.start;
 	uint64_t length = ppgtt->base.total;
 	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-						 I915_CACHE_LLC, true);
+						 I915_CACHE_LLC);
 
 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
 		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
@@ -1659,7 +1639,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
 
 	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
-				     I915_CACHE_LLC, true, 0);
+				     I915_CACHE_LLC, 0);
 
 	gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
 		u32 expected;
@@ -1872,8 +1852,7 @@ static void gen6_ppgtt_enable(struct drm_device *dev)
 /* PPGTT support for Sandybdrige/Gen6 and later */
 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 				   uint64_t start,
-				   uint64_t length,
-				   bool use_scratch)
+				   uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	gen6_pte_t *pt_vaddr, scratch_pte;
@@ -1884,7 +1863,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	unsigned last_pte, i;
 
 	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
-				     I915_CACHE_LLC, true, 0);
+				     I915_CACHE_LLC, 0);
 
 	while (num_entries) {
 		last_pte = first_pte + num_entries;
@@ -1922,7 +1901,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
 
 		pt_vaddr[act_pte] =
-			vm->pte_encode(addr, cache_level, true, flags);
+			vm->pte_encode(addr, cache_level, flags);
 
 		if (++act_pte == GEN6_PTES) {
 			kunmap_px(ppgtt, pt_vaddr);
@@ -2376,8 +2355,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
 
 	i915_check_and_clear_faults(dev_priv);
 
-	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
-			     true);
+	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
 
 	i915_ggtt_flush(dev_priv);
 }
@@ -2411,7 +2389,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 
 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
+	gen8_set_pte(pte, gen8_pte_encode(addr, level));
 
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
@@ -2438,7 +2416,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
 
 	for_each_sgt_dma(addr, sgt_iter, st) {
-		gtt_entry = gen8_pte_encode(addr, level, true);
+		gtt_entry = gen8_pte_encode(addr, level);
 		gen8_set_pte(&gtt_entries[i++], gtt_entry);
 	}
 
@@ -2502,7 +2480,7 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
 
 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-	iowrite32(vm->pte_encode(addr, level, true, flags), pte);
+	iowrite32(vm->pte_encode(addr, level, flags), pte);
 
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
@@ -2535,7 +2513,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
 
 	for_each_sgt_dma(addr, sgt_iter, st) {
-		gtt_entry = vm->pte_encode(addr, level, true, flags);
+		gtt_entry = vm->pte_encode(addr, level, flags);
 		iowrite32(gtt_entry, &gtt_entries[i++]);
 	}
 
@@ -2559,16 +2537,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 }
 
 static void nop_clear_range(struct i915_address_space *vm,
-			    uint64_t start,
-			    uint64_t length,
-			    bool use_scratch)
+			    uint64_t start, uint64_t length)
 {
 }
 
 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
-				  uint64_t start,
-				  uint64_t length,
-				  bool use_scratch)
+				  uint64_t start, uint64_t length)
 {
 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
@@ -2588,8 +2562,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		num_entries = max_entries;
 
 	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
-				      I915_CACHE_LLC,
-				      use_scratch);
+				      I915_CACHE_LLC);
 	for (i = 0; i < num_entries; i++)
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 	readl(gtt_base);
@@ -2599,8 +2572,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  uint64_t start,
-				  uint64_t length,
-				  bool use_scratch)
+				  uint64_t length)
 {
 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
@@ -2620,7 +2592,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 		num_entries = max_entries;
 
 	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
-				     I915_CACHE_LLC, use_scratch, 0);
+				     I915_CACHE_LLC, 0);
 
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
@@ -2667,8 +2639,7 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 
 static void i915_ggtt_clear_range(struct i915_address_space *vm,
 				  uint64_t start,
-				  uint64_t length,
-				  bool unused)
+				  uint64_t length)
 {
 	struct drm_i915_private *dev_priv = to_i915(vm->dev);
 	unsigned first_entry = start >> PAGE_SHIFT;
@@ -2752,13 +2723,11 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
 
 	if (vma->flags & I915_VMA_GLOBAL_BIND)
 		vma->vm->clear_range(vma->vm,
-				     vma->node.start, size,
-				     true);
+				     vma->node.start, size);
 
 	if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
 		appgtt->base.clear_range(&appgtt->base,
-					 vma->node.start, size,
-					 true);
+					 vma->node.start, size);
 }
 
 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
@@ -2819,13 +2788,12 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
 			      hole_start, hole_end);
 		ggtt->base.clear_range(&ggtt->base, hole_start,
-				     hole_end - hole_start, true);
+				       hole_end - hole_start);
 	}
 
 	/* And finally clear the reserved guard page */
 	ggtt->base.clear_range(&ggtt->base,
-			       ggtt->base.total - PAGE_SIZE, PAGE_SIZE,
-			       true);
+			       ggtt->base.total - PAGE_SIZE, PAGE_SIZE);
 
 	if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
 		struct i915_hw_ppgtt *ppgtt;
@@ -2851,8 +2819,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 
 		ppgtt->base.clear_range(&ppgtt->base,
 					ppgtt->base.start,
-					ppgtt->base.total,
-					true);
+					ppgtt->base.total);
 
 		dev_priv->mm.aliasing_ppgtt = ppgtt;
 		WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
@@ -3327,8 +3294,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 	i915_check_and_clear_faults(dev_priv);
 
 	/* First fill our portion of the GTT with scratch pages */
-	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
-			       true);
+	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
 
 	ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index ec78be2..931d712 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -395,7 +395,7 @@ struct i915_address_space {
 	/* FIXME: Need a more generic return type */
 	gen6_pte_t (*pte_encode)(dma_addr_t addr,
 				 enum i915_cache_level level,
-				 bool valid, u32 flags); /* Create a valid PTE */
+				 u32 flags); /* Create a valid PTE */
 	/* flags for pte_encode */
 #define PTE_READ_ONLY	(1<<0)
 	int (*allocate_va_range)(struct i915_address_space *vm,
@@ -403,8 +403,7 @@ struct i915_address_space {
 				 uint64_t length);
 	void (*clear_range)(struct i915_address_space *vm,
 			    uint64_t start,
-			    uint64_t length,
-			    bool use_scratch);
+			    uint64_t length);
 	void (*insert_page)(struct i915_address_space *vm,
 			    dma_addr_t addr,
 			    uint64_t offset,
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range
  2016-10-04 13:54 [PATCH 1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Michał Winiarski
  2016-10-04 13:54 ` [PATCH 2/3] drm/i915/gtt: Free unused lower-level page tables Michał Winiarski
  2016-10-04 13:54 ` [PATCH 3/3] drm/i915: Remove unused "valid" parameter from pte_encode Michał Winiarski
@ 2016-10-04 14:26 ` Patchwork
  2016-10-05  5:44 ` [PATCH 1/3] " Joonas Lahtinen
  3 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2016-10-04 14:26 UTC (permalink / raw)
  To: Michał Winiarski; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range
URL   : https://patchwork.freedesktop.org/series/13282/
State : failure

== Summary ==

Series 13282v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/13282/revisions/1/mbox/

Test core_auth:
        Subgroup basic-auth:
                pass       -> INCOMPLETE (fi-bsw-n3050)
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                pass       -> DMESG-WARN (fi-byt-j1900)

fi-bdw-5557u     total:244  pass:229  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050     total:1    pass:0    dwarn:0   dfail:0   fail:0   skip:0  
fi-bxt-t5700     total:244  pass:214  dwarn:0   dfail:0   fail:0   skip:30 
fi-byt-j1900     total:244  pass:210  dwarn:2   dfail:0   fail:1   skip:31 
fi-byt-n2820     total:244  pass:208  dwarn:0   dfail:0   fail:1   skip:35 
fi-hsw-4770      total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-hsw-4770r     total:244  pass:222  dwarn:0   dfail:0   fail:0   skip:22 
fi-ilk-650       total:244  pass:182  dwarn:0   dfail:0   fail:2   skip:60 
fi-ivb-3520m     total:244  pass:219  dwarn:0   dfail:0   fail:0   skip:25 
fi-ivb-3770      total:244  pass:207  dwarn:0   dfail:0   fail:0   skip:37 
fi-skl-6260u     total:244  pass:230  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hq    total:244  pass:221  dwarn:1   dfail:0   fail:0   skip:22 
fi-skl-6700k     total:244  pass:219  dwarn:1   dfail:0   fail:0   skip:24 
fi-skl-6770hq    total:244  pass:228  dwarn:1   dfail:0   fail:1   skip:14 
fi-snb-2520m     total:244  pass:208  dwarn:0   dfail:0   fail:0   skip:36 
fi-snb-2600      total:244  pass:207  dwarn:0   dfail:0   fail:0   skip:37 

Results at /archive/results/CI_IGT_test/Patchwork_2623/

cedbdecff4c878309133e066f696ea63d41cee73 drm-intel-nightly: 2016y-10m-04d-10h-53m-20s UTC integration manifest
02532d3 drm/i915: Remove unused "valid" parameter from pte_encode
945d61e drm/i915/gtt: Free unused lower-level page tables
22b7a6e drm/i915/gtt: Split gen8_ppgtt_clear_pte_range

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range
  2016-10-04 13:54 [PATCH 1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Michał Winiarski
                   ` (2 preceding siblings ...)
  2016-10-04 14:26 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Patchwork
@ 2016-10-05  5:44 ` Joonas Lahtinen
  3 siblings, 0 replies; 9+ messages in thread
From: Joonas Lahtinen @ 2016-10-05  5:44 UTC (permalink / raw)
  To: Michał Winiarski, intel-gfx; +Cc: Mika Kuoppala

I wonder if GCC is doing the right thing, do we have some benchmark for
this, Chris, Mika? The code is much more readable in my eyes after the
changes.

On ti, 2016-10-04 at 15:54 +0200, Michał Winiarski wrote:
> +static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
> +				struct i915_page_table *pt,
> +				uint64_t start,
> +				uint64_t length,
> +				bool use_scratch)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> +
> +	unsigned int pte_start = gen8_pte_index(start);
> +	unsigned int num_entries = min(gen8_pte_count(start, length),
> +				       GEN8_PTES);

i915_pte_count (called by gen8_pte_count) states following;

"the max value would be GEN6_PTES for GEN6, and GEN8_PTES for GEN8."

GEM_BUG_ON(num_entries > GEN8_PTES); would be the standard recipe for
paranoia. But I don't think it's needed here. 

> +	uint64_t pte;
> +

No newlines to the variable block, keep it tight.

> @@ -768,21 +793,13 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
>  				   bool use_scratch)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
> -	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
> -						 I915_CACHE_LLC, use_scratch);
>  
> -	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
> -		gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
> -					   scratch_pte);
> -	} else {
> -		uint64_t pml4e;
> -		struct i915_page_directory_pointer *pdp;
> -
> -		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
> -			gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
> -						   scratch_pte);
> -		}
> -	}
> +	if (!USES_FULL_48BIT_PPGTT(vm->dev))

Now that this is so much simplified, the negation could be removed, and
just just write if (USES_FULL_...)

Other than above, looks good to me.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] drm/i915: Remove unused "valid" parameter from pte_encode
  2016-10-04 13:54 ` [PATCH 3/3] drm/i915: Remove unused "valid" parameter from pte_encode Michał Winiarski
@ 2016-10-05  5:48   ` Joonas Lahtinen
  2016-10-05  8:49   ` Mika Kuoppala
  1 sibling, 0 replies; 9+ messages in thread
From: Joonas Lahtinen @ 2016-10-05  5:48 UTC (permalink / raw)
  To: Michał Winiarski, intel-gfx; +Cc: Mika Kuoppala

On ti, 2016-10-04 at 15:54 +0200, Michał Winiarski wrote:
> We're no longer using any invalid PTEs - everything that's not used
> should be pointing to scratch.
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] drm/i915/gtt: Free unused lower-level page tables
  2016-10-04 13:54 ` [PATCH 2/3] drm/i915/gtt: Free unused lower-level page tables Michał Winiarski
@ 2016-10-05  6:40   ` Joonas Lahtinen
  2016-10-05  9:30     ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Joonas Lahtinen @ 2016-10-05  6:40 UTC (permalink / raw)
  To: Michał Winiarski, intel-gfx; +Cc: Mika Kuoppala

On ti, 2016-10-04 at 15:54 +0200, Michał Winiarski wrote:
> Since "Dynamic page table allocations" were introduced, our page tables
> can grow (being dynamically allocated) with address space range usage.
> Unfortunately, their lifetime is bound to vm. This is not a huge problem
> when we're not using softpin - drm_mm is creating an upper bound on used
> range by causing addresses for our VMAs to eventually be reused.
> 
> With softpin, long lived contexts can drain the system out of memory
> even with a single "small" object. For example:
> 
> bo = bo_alloc(size);
> while(true)
>     offset += size;
>     exec(bo, offset);
> 
> Will cause us to create new allocations until all memory in the system
> is used for tracking GPU pages (even though almost all PTEs in this vm
> are pointing to scratch).
> 
> Let's free unused page tables in clear_range to prevent this - if no
> entries are used, we can safely free it and return this information to
> the caller (so that higher-level entry is pointing to scratch).
> 

Sounds like this could and should have a I-G-T testcase, right?
 
> @@ -706,7 +710,7 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  	return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
>  }
>  
> -static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,

Add comment for non-obvious bool return value.

> +static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  				struct i915_page_table *pt,
>  				uint64_t start,
>  				uint64_t length,
> @@ -724,50 +728,102 @@ static void gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  						 I915_CACHE_LLC, use_scratch);
>  
>  	if (WARN_ON(!px_page(pt)))
> -		return;
> +		return false;
>  
>  	bitmap_clear(pt->used_ptes, pte_start, num_entries);
>  
> +	if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
> +		free_pt(vm->dev, pt);

Maybe the caller should do the free_pt()? If kept here, should at least
be clearly documented.

Other than those, looks like good improvements to me.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] drm/i915: Remove unused "valid" parameter from pte_encode
  2016-10-04 13:54 ` [PATCH 3/3] drm/i915: Remove unused "valid" parameter from pte_encode Michał Winiarski
  2016-10-05  5:48   ` Joonas Lahtinen
@ 2016-10-05  8:49   ` Mika Kuoppala
  1 sibling, 0 replies; 9+ messages in thread
From: Mika Kuoppala @ 2016-10-05  8:49 UTC (permalink / raw)
  To: Michał Winiarski, intel-gfx

Michał Winiarski <michal.winiarski@intel.com> writes:

> We're no longer using any invalid PTEs - everything that's not used
> should be pointing to scratch.
>

I was about to suggest this before patch 2/3 to make it simpler.

For historical context: https://patchwork.kernel.org/patch/9135411/

I would amend the commit message a little. 'We're no longer using'
suggests that we changed recently/by earlier commits how we map
into the invalid ptes.

I think it has always been the case that we map to valid==scratch.

Lets try to distill the reasoning from Chris Wilson's reply
for future reader, something like:

'We never used any invalid ptes, as those were put in place for a
possibility of doing gpu faults. However our batchbuffers are
not restricted in length, so everything needs to be pointing to
something and thus out-of-bounds pointing to scratch. Remove the
valid flag as it is always true.'

That message conveyed with perhaps better wording/english,
and you can add my,
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

-Mika

> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c            |   6 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   3 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c        | 154 +++++++++++------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h        |   5 +-
>  4 files changed, 65 insertions(+), 103 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1418c1c..b344340 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -919,8 +919,7 @@ out_unpin:
>  	if (node.allocated) {
>  		wmb();
>  		ggtt->base.clear_range(&ggtt->base,
> -				       node.start, node.size,
> -				       true);
> +				       node.start, node.size);
>  		i915_gem_object_unpin_pages(obj);
>  		remove_mappable_node(&node);
>  	} else {
> @@ -1228,8 +1227,7 @@ out_unpin:
>  	if (node.allocated) {
>  		wmb();
>  		ggtt->base.clear_range(&ggtt->base,
> -				       node.start, node.size,
> -				       true);
> +				       node.start, node.size);
>  		i915_gem_object_unpin_pages(obj);
>  		remove_mappable_node(&node);
>  	} else {
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 33c8522..54b091b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -370,8 +370,7 @@ static void reloc_cache_fini(struct reloc_cache *cache)
>  
>  			ggtt->base.clear_range(&ggtt->base,
>  					       cache->node.start,
> -					       cache->node.size,
> -					       true);
> +					       cache->node.size);
>  			drm_mm_remove_node(&cache->node);
>  		} else {
>  			i915_vma_unpin((struct i915_vma *)cache->node.mm);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 281e349..fa78bb1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -191,15 +191,13 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
>  {
>  	vma->vm->clear_range(vma->vm,
>  			     vma->node.start,
> -			     vma->size,
> -			     true);
> +			     vma->size);
>  }
>  
>  static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
> -				  enum i915_cache_level level,
> -				  bool valid)
> +				  enum i915_cache_level level)
>  {
> -	gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
> +	gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
>  	pte |= addr;
>  
>  	switch (level) {
> @@ -218,10 +216,9 @@ static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
>  }
>  
>  static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
> -				  const enum i915_cache_level level,
> -				  bool valid)
> +				  const enum i915_cache_level level)
>  {
> -	gen8_pde_t pde = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
> +	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
>  	pde |= addr;
>  	if (level != I915_CACHE_NONE)
>  		pde |= PPAT_CACHED_PDE_INDEX;
> @@ -235,9 +232,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
>  
>  static gen6_pte_t snb_pte_encode(dma_addr_t addr,
>  				 enum i915_cache_level level,
> -				 bool valid, u32 unused)
> +				 u32 unused)
>  {
> -	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
> +	gen6_pte_t pte = GEN6_PTE_VALID;
>  	pte |= GEN6_PTE_ADDR_ENCODE(addr);
>  
>  	switch (level) {
> @@ -257,9 +254,9 @@ static gen6_pte_t snb_pte_encode(dma_addr_t addr,
>  
>  static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
>  				 enum i915_cache_level level,
> -				 bool valid, u32 unused)
> +				 u32 unused)
>  {
> -	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
> +	gen6_pte_t pte = GEN6_PTE_VALID;
>  	pte |= GEN6_PTE_ADDR_ENCODE(addr);
>  
>  	switch (level) {
> @@ -281,9 +278,9 @@ static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
>  
>  static gen6_pte_t byt_pte_encode(dma_addr_t addr,
>  				 enum i915_cache_level level,
> -				 bool valid, u32 flags)
> +				 u32 flags)
>  {
> -	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
> +	gen6_pte_t pte = GEN6_PTE_VALID;
>  	pte |= GEN6_PTE_ADDR_ENCODE(addr);
>  
>  	if (!(flags & PTE_READ_ONLY))
> @@ -297,9 +294,9 @@ static gen6_pte_t byt_pte_encode(dma_addr_t addr,
>  
>  static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
>  				 enum i915_cache_level level,
> -				 bool valid, u32 unused)
> +				 u32 unused)
>  {
> -	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
> +	gen6_pte_t pte = GEN6_PTE_VALID;
>  	pte |= HSW_PTE_ADDR_ENCODE(addr);
>  
>  	if (level != I915_CACHE_NONE)
> @@ -310,9 +307,9 @@ static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
>  
>  static gen6_pte_t iris_pte_encode(dma_addr_t addr,
>  				  enum i915_cache_level level,
> -				  bool valid, u32 unused)
> +				  u32 unused)
>  {
> -	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
> +	gen6_pte_t pte = GEN6_PTE_VALID;
>  	pte |= HSW_PTE_ADDR_ENCODE(addr);
>  
>  	switch (level) {
> @@ -473,7 +470,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
>  	gen8_pte_t scratch_pte;
>  
>  	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
> -				      I915_CACHE_LLC, true);
> +				      I915_CACHE_LLC);
>  
>  	fill_px(vm->dev, pt, scratch_pte);
>  }
> @@ -486,7 +483,7 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
>  	WARN_ON(vm->scratch_page.daddr == 0);
>  
>  	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
> -				     I915_CACHE_LLC, true, 0);
> +				     I915_CACHE_LLC, 0);
>  
>  	fill32_px(vm->dev, pt, scratch_pte);
>  }
> @@ -533,8 +530,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
>  {
>  	gen8_pde_t scratch_pde;
>  
> -	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC,
> -				      true);
> +	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
>  
>  	fill_px(vm->dev, pd, scratch_pde);
>  }
> @@ -615,8 +611,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
>  {
>  	gen8_ppgtt_pdpe_t scratch_pdpe;
>  
> -	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC,
> -					true);
> +	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
>  
>  	fill_px(vm->dev, pdp, scratch_pdpe);
>  }
> @@ -627,7 +622,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
>  	gen8_ppgtt_pml4e_t scratch_pml4e;
>  
>  	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
> -					  I915_CACHE_LLC, true);
> +					  I915_CACHE_LLC);
>  
>  	fill_px(vm->dev, pml4, scratch_pml4e);
>  }
> @@ -644,8 +639,7 @@ gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
>  		return;
>  
>  	page_directorypo = kmap_px(pdp);
> -	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC,
> -						   true);
> +	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
>  	kunmap_px(ppgtt, page_directorypo);
>  }
>  
> @@ -658,7 +652,7 @@ gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
>  	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
>  
>  	WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
> -	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC, true);
> +	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
>  	kunmap_px(ppgtt, pagemap);
>  }
>  
> @@ -713,8 +707,7 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
>  static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  				struct i915_page_table *pt,
>  				uint64_t start,
> -				uint64_t length,
> -				bool use_scratch)
> +				uint64_t length)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  
> @@ -725,7 +718,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  
>  	gen8_pte_t *pt_vaddr;
>  	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
> -						 I915_CACHE_LLC, use_scratch);
> +						 I915_CACHE_LLC);
>  
>  	if (WARN_ON(!px_page(pt)))
>  		return false;
> @@ -750,8 +743,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
>  static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  				struct i915_page_directory *pd,
>  				uint64_t start,
> -				uint64_t length,
> -				bool use_scratch)
> +				uint64_t length)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  
> @@ -760,15 +752,14 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  
>  	gen8_pde_t *pde_vaddr;
>  	gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
> -						 I915_CACHE_LLC, use_scratch);
> +						 I915_CACHE_LLC);
>  	bool reduce;
>  
>  	gen8_for_each_pde(pt, pd, start, length, pde) {
>  		if (WARN_ON(!pd->page_table[pde]))
>  			break;
>  
> -		reduce = gen8_ppgtt_clear_pt(vm, pt, start, length,
> -					     use_scratch);
> +		reduce = gen8_ppgtt_clear_pt(vm, pt, start, length);
>  
>  		if (reduce) {
>  			__clear_bit(pde, pd->used_pdes);
> @@ -789,8 +780,7 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>  				 struct i915_page_directory_pointer *pdp,
>  				 uint64_t start,
> -				 uint64_t length,
> -				 bool use_scratch)
> +				 uint64_t length)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  
> @@ -799,16 +789,14 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>  
>  	gen8_ppgtt_pdpe_t *pdpe_vaddr;
>  	gen8_ppgtt_pdpe_t scratch_pdpe =
> -		gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC,
> -				 use_scratch);
> +		gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
>  	bool reduce;
>  
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
>  		if (WARN_ON(!pdp->page_directory[pdpe]))
>  			break;
>  
> -		reduce = gen8_ppgtt_clear_pd(vm, pd, start, length,
> -					     use_scratch);
> +		reduce = gen8_ppgtt_clear_pd(vm, pd, start, length);
>  
>  		if (reduce) {
>  			__clear_bit(pdpe, pdp->used_pdpes);
> @@ -829,8 +817,7 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>  static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
>  				  struct i915_pml4 *pml4,
>  				  uint64_t start,
> -				  uint64_t length,
> -				  bool use_scratch)
> +				  uint64_t length)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  
> @@ -839,16 +826,14 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
>  
>  	gen8_ppgtt_pml4e_t *pml4e_vaddr;
>  	gen8_ppgtt_pml4e_t scratch_pml4e =
> -		gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC,
> -				  use_scratch);
> +		gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC);
>  	bool reduce;
>  
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
>  		if (WARN_ON(!pml4->pdps[pml4e]))
>  			break;
>  
> -		reduce = gen8_ppgtt_clear_pdp(vm, pdp, start, length,
> -					      use_scratch);
> +		reduce = gen8_ppgtt_clear_pdp(vm, pdp, start, length);
>  
>  		if (reduce) {
>  			__clear_bit(pml4e, pml4->used_pml4es);
> @@ -860,18 +845,14 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
>  }
>  
>  static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
> -				   uint64_t start,
> -				   uint64_t length,
> -				   bool use_scratch)
> +				   uint64_t start, uint64_t length)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  
>  	if (!USES_FULL_48BIT_PPGTT(vm->dev))
> -		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length,
> -				     use_scratch);
> +		gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
>  	else
> -		gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length,
> -				      use_scratch);
> +		gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length);
>  }
>  
>  static void
> @@ -898,7 +879,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
>  
>  		pt_vaddr[pte] =
>  			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
> -					cache_level, true);
> +					cache_level);
>  		if (++pte == GEN8_PTES) {
>  			kunmap_px(ppgtt, pt_vaddr);
>  			pt_vaddr = NULL;
> @@ -1382,8 +1363,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
>  
>  			/* Map the PDE to the page table */
>  			page_directory[pde] = gen8_pde_encode(px_dma(pt),
> -							      I915_CACHE_LLC,
> -							      true);
> +							      I915_CACHE_LLC);
>  			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
>  							gen8_pte_index(start),
>  							gen8_pte_count(start, length),
> @@ -1542,7 +1522,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  	uint64_t start = ppgtt->base.start;
>  	uint64_t length = ppgtt->base.total;
>  	gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
> -						 I915_CACHE_LLC, true);
> +						 I915_CACHE_LLC);
>  
>  	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
>  		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
> @@ -1659,7 +1639,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
>  
>  	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
> -				     I915_CACHE_LLC, true, 0);
> +				     I915_CACHE_LLC, 0);
>  
>  	gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
>  		u32 expected;
> @@ -1872,8 +1852,7 @@ static void gen6_ppgtt_enable(struct drm_device *dev)
>  /* PPGTT support for Sandybdrige/Gen6 and later */
>  static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  				   uint64_t start,
> -				   uint64_t length,
> -				   bool use_scratch)
> +				   uint64_t length)
>  {
>  	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
>  	gen6_pte_t *pt_vaddr, scratch_pte;
> @@ -1884,7 +1863,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  	unsigned last_pte, i;
>  
>  	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
> -				     I915_CACHE_LLC, true, 0);
> +				     I915_CACHE_LLC, 0);
>  
>  	while (num_entries) {
>  		last_pte = first_pte + num_entries;
> @@ -1922,7 +1901,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
>  
>  		pt_vaddr[act_pte] =
> -			vm->pte_encode(addr, cache_level, true, flags);
> +			vm->pte_encode(addr, cache_level, flags);
>  
>  		if (++act_pte == GEN6_PTES) {
>  			kunmap_px(ppgtt, pt_vaddr);
> @@ -2376,8 +2355,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
>  
>  	i915_check_and_clear_faults(dev_priv);
>  
> -	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
> -			     true);
> +	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
>  
>  	i915_ggtt_flush(dev_priv);
>  }
> @@ -2411,7 +2389,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
>  
>  	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
>  
> -	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
> +	gen8_set_pte(pte, gen8_pte_encode(addr, level));
>  
>  	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
>  	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> @@ -2438,7 +2416,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>  	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
>  
>  	for_each_sgt_dma(addr, sgt_iter, st) {
> -		gtt_entry = gen8_pte_encode(addr, level, true);
> +		gtt_entry = gen8_pte_encode(addr, level);
>  		gen8_set_pte(&gtt_entries[i++], gtt_entry);
>  	}
>  
> @@ -2502,7 +2480,7 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
>  
>  	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
>  
> -	iowrite32(vm->pte_encode(addr, level, true, flags), pte);
> +	iowrite32(vm->pte_encode(addr, level, flags), pte);
>  
>  	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
>  	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> @@ -2535,7 +2513,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  	gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
>  
>  	for_each_sgt_dma(addr, sgt_iter, st) {
> -		gtt_entry = vm->pte_encode(addr, level, true, flags);
> +		gtt_entry = vm->pte_encode(addr, level, flags);
>  		iowrite32(gtt_entry, &gtt_entries[i++]);
>  	}
>  
> @@ -2559,16 +2537,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  }
>  
>  static void nop_clear_range(struct i915_address_space *vm,
> -			    uint64_t start,
> -			    uint64_t length,
> -			    bool use_scratch)
> +			    uint64_t start, uint64_t length)
>  {
>  }
>  
>  static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> -				  uint64_t start,
> -				  uint64_t length,
> -				  bool use_scratch)
> +				  uint64_t start, uint64_t length)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(vm->dev);
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> @@ -2588,8 +2562,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
>  		num_entries = max_entries;
>  
>  	scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
> -				      I915_CACHE_LLC,
> -				      use_scratch);
> +				      I915_CACHE_LLC);
>  	for (i = 0; i < num_entries; i++)
>  		gen8_set_pte(&gtt_base[i], scratch_pte);
>  	readl(gtt_base);
> @@ -2599,8 +2572,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
>  
>  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
>  				  uint64_t start,
> -				  uint64_t length,
> -				  bool use_scratch)
> +				  uint64_t length)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(vm->dev);
>  	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> @@ -2620,7 +2592,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
>  		num_entries = max_entries;
>  
>  	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
> -				     I915_CACHE_LLC, use_scratch, 0);
> +				     I915_CACHE_LLC, 0);
>  
>  	for (i = 0; i < num_entries; i++)
>  		iowrite32(scratch_pte, &gtt_base[i]);
> @@ -2667,8 +2639,7 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm,
>  
>  static void i915_ggtt_clear_range(struct i915_address_space *vm,
>  				  uint64_t start,
> -				  uint64_t length,
> -				  bool unused)
> +				  uint64_t length)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(vm->dev);
>  	unsigned first_entry = start >> PAGE_SHIFT;
> @@ -2752,13 +2723,11 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
>  
>  	if (vma->flags & I915_VMA_GLOBAL_BIND)
>  		vma->vm->clear_range(vma->vm,
> -				     vma->node.start, size,
> -				     true);
> +				     vma->node.start, size);
>  
>  	if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
>  		appgtt->base.clear_range(&appgtt->base,
> -					 vma->node.start, size,
> -					 true);
> +					 vma->node.start, size);
>  }
>  
>  void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
> @@ -2819,13 +2788,12 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
>  		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
>  			      hole_start, hole_end);
>  		ggtt->base.clear_range(&ggtt->base, hole_start,
> -				     hole_end - hole_start, true);
> +				       hole_end - hole_start);
>  	}
>  
>  	/* And finally clear the reserved guard page */
>  	ggtt->base.clear_range(&ggtt->base,
> -			       ggtt->base.total - PAGE_SIZE, PAGE_SIZE,
> -			       true);
> +			       ggtt->base.total - PAGE_SIZE, PAGE_SIZE);
>  
>  	if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
>  		struct i915_hw_ppgtt *ppgtt;
> @@ -2851,8 +2819,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
>  
>  		ppgtt->base.clear_range(&ppgtt->base,
>  					ppgtt->base.start,
> -					ppgtt->base.total,
> -					true);
> +					ppgtt->base.total);
>  
>  		dev_priv->mm.aliasing_ppgtt = ppgtt;
>  		WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
> @@ -3327,8 +3294,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
>  	i915_check_and_clear_faults(dev_priv);
>  
>  	/* First fill our portion of the GTT with scratch pages */
> -	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
> -			       true);
> +	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
>  
>  	ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index ec78be2..931d712 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -395,7 +395,7 @@ struct i915_address_space {
>  	/* FIXME: Need a more generic return type */
>  	gen6_pte_t (*pte_encode)(dma_addr_t addr,
>  				 enum i915_cache_level level,
> -				 bool valid, u32 flags); /* Create a valid PTE */
> +				 u32 flags); /* Create a valid PTE */
>  	/* flags for pte_encode */
>  #define PTE_READ_ONLY	(1<<0)
>  	int (*allocate_va_range)(struct i915_address_space *vm,
> @@ -403,8 +403,7 @@ struct i915_address_space {
>  				 uint64_t length);
>  	void (*clear_range)(struct i915_address_space *vm,
>  			    uint64_t start,
> -			    uint64_t length,
> -			    bool use_scratch);
> +			    uint64_t length);
>  	void (*insert_page)(struct i915_address_space *vm,
>  			    dma_addr_t addr,
>  			    uint64_t offset,
> -- 
> 2.7.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] drm/i915/gtt: Free unused lower-level page tables
  2016-10-05  6:40   ` Joonas Lahtinen
@ 2016-10-05  9:30     ` Chris Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2016-10-05  9:30 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx, Mika Kuoppala

On Wed, Oct 05, 2016 at 09:40:48AM +0300, Joonas Lahtinen wrote:
> On ti, 2016-10-04 at 15:54 +0200, Michał Winiarski wrote:
> > Since "Dynamic page table allocations" were introduced, our page tables
> > can grow (being dynamically allocated) with address space range usage.
> > Unfortunately, their lifetime is bound to vm. This is not a huge problem
> > when we're not using softpin - drm_mm is creating an upper bound on used
> > range by causing addresses for our VMAs to eventually be reused.
> > 
> > With softpin, long lived contexts can drain the system out of memory
> > even with a single "small" object. For example:
> > 
> > bo = bo_alloc(size);
> > while(true)
> >     offset += size;
> >     exec(bo, offset);
> > 
> > Will cause us to create new allocations until all memory in the system
> > is used for tracking GPU pages (even though almost all PTEs in this vm
> > are pointing to scratch).
> > 
> > Let's free unused page tables in clear_range to prevent this - if no
> > entries are used, we can safely free it and return this information to
> > the caller (so that higher-level entry is pointing to scratch).
> > 
> 
> Sounds like this could and should have a I-G-T testcase, right?

The problem is that tables are internal to the driver. The user visible
impact is premature oom due to kernel bloat. We could dump the ppgtt to
userpsace and assert that entries we have closed are unused, but that
would be a fragile test for one particular implementation.

gem_exec_alignment will oom quite happily at the moment due to the
bitmap allocation (once we have the u64 alignment fixes reviewed and
upsteamed at least). Simply for that reason I want to completely
erradicate the bitmaps - they are not used for anything. The valid
range intersection we already know, and here the use as to whether a
particular level is empty is a simple counter.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-10-05  9:30 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-04 13:54 [PATCH 1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Michał Winiarski
2016-10-04 13:54 ` [PATCH 2/3] drm/i915/gtt: Free unused lower-level page tables Michał Winiarski
2016-10-05  6:40   ` Joonas Lahtinen
2016-10-05  9:30     ` Chris Wilson
2016-10-04 13:54 ` [PATCH 3/3] drm/i915: Remove unused "valid" parameter from pte_encode Michał Winiarski
2016-10-05  5:48   ` Joonas Lahtinen
2016-10-05  8:49   ` Mika Kuoppala
2016-10-04 14:26 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915/gtt: Split gen8_ppgtt_clear_pte_range Patchwork
2016-10-05  5:44 ` [PATCH 1/3] " Joonas Lahtinen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.