intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: thomas.hellstrom@intel.com,
	Chris Wilson <chris@chris-wilson.co.uk>,
	matthew.auld@intel.com
Subject: [Intel-gfx] [PATCH 3/3] drm/i915/gt: Shrink i915_page_directory's slab bucket
Date: Tue, 28 Jul 2020 16:31:57 +0100	[thread overview]
Message-ID: <20200728153157.27969-3-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20200728153157.27969-1-chris@chris-wilson.co.uk>

kmalloc uses power-of-two slab buckets for small allocations (up to a
few pages). Since i915_page_directory is a page of pointers, plus a
couple more, this is rounded up to 8K, and we waste nearly 50% of that
allocation. Long terms this leads to poor memory utilisation, bloating
the kernel footpoint, but the problem is excerbated by our conservative
preallocation scheme for binding VMA. As we require to allocate all
levels for each vma just in case we need to insert them upon binding,
this leads to a large multiplication factor for single page vma. By
halving the allocation we need for the page directory structure, we
halve the multipliation factor, bringing workloads that once fitted into
memory, hopefully back to fitting into memory.

We maintain the split between i915_page_directory and i915_page_table as
we only need half the allocation for the lowest, most populous, level.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 11 +++++-----
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 26 +++++++++++-----------
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 10 +++++----
 drivers/gpu/drm/i915/gt/intel_ppgtt.c | 31 +++++++++++++++++++++------
 4 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index a823d2e3c39c..8b83693f2b58 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -261,7 +261,7 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
 
 	gen6_for_all_pdes(pt, pd, pde)
 		if (pt)
-			free_px(&ppgtt->base.vm, pt);
+			free_pt(&ppgtt->base.vm, pt);
 }
 
 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
@@ -275,7 +275,8 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 
 	mutex_destroy(&ppgtt->flush);
 	mutex_destroy(&ppgtt->pin_mutex);
-	kfree(ppgtt->base.pd);
+
+	free_pd(&ppgtt->base.vm, ppgtt->base.pd);
 }
 
 static int pd_vma_set_pages(struct i915_vma *vma)
@@ -322,7 +323,7 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
 		if (!pt || atomic_read(&pt->used))
 			continue;
 
-		free_px(&ppgtt->base.vm, pt);
+		free_pt(&ppgtt->base.vm, pt);
 		pd->entry[pde] = NULL;
 	}
 
@@ -447,7 +448,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
 	ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
 	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
 
-	ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
+	ppgtt->base.pd = __alloc_pd(I915_PDES);
 	if (!ppgtt->base.pd) {
 		err = -ENOMEM;
 		goto err_free;
@@ -468,7 +469,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
 err_scratch:
 	free_scratch(&ppgtt->base.vm);
 err_pd:
-	kfree(ppgtt->base.pd);
+	free_pd(&ppgtt->base.vm, ppgtt->base.pd);
 err_free:
 	mutex_destroy(&ppgtt->pin_mutex);
 	kfree(ppgtt);
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index e3afd250cd7f..3423d05068c0 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -181,7 +181,7 @@ static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
 		} while (pde++, --count);
 	}
 
-	free_px(vm, pd);
+	free_px(vm, &pd->pt, lvl);
 }
 
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
@@ -248,7 +248,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
 		}
 
 		if (release_pd_entry(pd, idx, pt, scratch))
-			free_px(vm, pt);
+			free_px(vm, pt, lvl);
 	} while (idx++, --len);
 
 	return start;
@@ -628,7 +628,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 		err = pin_pt_dma(vm, pde->pt.base);
 		if (err) {
 			i915_gem_object_put(pde->pt.base);
-			kfree(pde);
+			free_pd(vm, pde);
 			return err;
 		}
 
@@ -648,28 +648,30 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
 	struct i915_page_directory *pd;
 	int err;
 
-	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
+	GEM_BUG_ON(count > I915_PDES);
 
-	pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
+	pd = __alloc_pd(count);
 	if (unlikely(!pd))
 		return ERR_PTR(-ENOMEM);
 
 	pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
 	if (IS_ERR(pd->pt.base)) {
-		kfree(pd);
-		return ERR_PTR(-ENOMEM);
+		err = PTR_ERR(pd->pt.base);
+		pd->pt.base = NULL;
+		goto err_pd;
 	}
 
 	err = pin_pt_dma(vm, pd->pt.base);
-	if (err) {
-		i915_gem_object_put(pd->pt.base);
-		kfree(pd);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto err_pd;
 
 	fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
 	atomic_inc(px_used(pd)); /* mark as pinned */
 	return pd;
+
+err_pd:
+	free_pd(vm, pd);
+	return ERR_PTR(err);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 6abab2d37b6f..c13c650ced22 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -156,7 +156,7 @@ struct i915_page_table {
 struct i915_page_directory {
 	struct i915_page_table pt;
 	spinlock_t lock;
-	void *entry[512];
+	void **entry;
 };
 
 #define __px_choose_expr(x, type, expr, other) \
@@ -519,12 +519,14 @@ void free_scratch(struct i915_address_space *vm);
 struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz);
 struct i915_page_table *alloc_pt(struct i915_address_space *vm);
 struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
-struct i915_page_directory *__alloc_pd(size_t sz);
+struct i915_page_directory *__alloc_pd(int npde);
 
 int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj);
 
-void free_pt(struct i915_address_space *vm, struct i915_page_table *pt);
-#define free_px(vm, px) free_pt(vm, px_pt(px))
+void free_px(struct i915_address_space *vm,
+	     struct i915_page_table *pt, int lvl);
+#define free_pt(vm, px) free_px(vm, px, 0)
+#define free_pd(vm, px) free_px(vm, px_pt(px), 1)
 
 void
 __set_pd_entry(struct i915_page_directory * const pd,
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index ede6369a9092..46d9aceda64c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -28,14 +28,20 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 	return pt;
 }
 
-struct i915_page_directory *__alloc_pd(size_t sz)
+struct i915_page_directory *__alloc_pd(int count)
 {
 	struct i915_page_directory *pd;
 
-	pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
+	pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
 	if (unlikely(!pd))
 		return NULL;
 
+	pd->entry = kcalloc(count, sizeof(*pd->entry), I915_GFP_ALLOW_FAIL);
+	if (unlikely(!pd->entry)) {
+		kfree(pd);
+		return NULL;
+	}
+
 	spin_lock_init(&pd->lock);
 	return pd;
 }
@@ -44,12 +50,13 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 {
 	struct i915_page_directory *pd;
 
-	pd = __alloc_pd(sizeof(*pd));
+	pd = __alloc_pd(I915_PDES);
 	if (unlikely(!pd))
 		return ERR_PTR(-ENOMEM);
 
 	pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
 	if (IS_ERR(pd->pt.base)) {
+		kfree(pd->entry);
 		kfree(pd);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -57,9 +64,19 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 	return pd;
 }
 
-void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
+void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl)
 {
-	i915_gem_object_put(pt->base);
+	BUILD_BUG_ON(offsetof(struct i915_page_directory, pt));
+
+	if (lvl) {
+		struct i915_page_directory *pd =
+			container_of(pt, typeof(*pd), pt);
+		kfree(pd->entry);
+	}
+
+	if (pt->base)
+		i915_gem_object_put(pt->base);
+
 	kfree(pt);
 }
 
@@ -82,7 +99,7 @@ __set_pd_entry(struct i915_page_directory * const pd,
 	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
 {
 	/* Each thread pre-pins the pd, and we may have a thread per pde. */
-	GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry));
+	GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * I915_PDES);
 
 	atomic_inc(px_used(pd));
 	pd->entry[idx] = to;
@@ -263,7 +280,7 @@ void i915_vm_free_pt_stash(struct i915_address_space *vm,
 	for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
 		while ((pt = stash->pt[n])) {
 			stash->pt[n] = pt->stash;
-			free_px(vm, pt);
+			free_px(vm, pt, n);
 		}
 	}
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2020-07-28 15:32 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-28 15:31 [Intel-gfx] [PATCH 1/3] drm/i915: Preallocate stashes for vma page-directories Chris Wilson
2020-07-28 15:31 ` [Intel-gfx] [PATCH 2/3] drm/i915/gt: Switch to object allocations for page directories Chris Wilson
2020-07-28 15:31 ` Chris Wilson [this message]
2020-07-28 15:41   ` [Intel-gfx] [PATCH 3/3] drm/i915/gt: Shrink i915_page_directory's slab bucket Matthew Auld
2020-07-28 20:43 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [1/3] drm/i915: Preallocate stashes for vma page-directories Patchwork
2020-07-28 21:04 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-07-29  5:51 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200728153157.27969-3-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=matthew.auld@intel.com \
    --cc=thomas.hellstrom@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).