[PATCH 16/38] drm/i915: Share per-timeline HWSP using a slab suballocator

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 16/38] drm/i915: Share per-timeline HWSP using a slab suballocator
Date: Fri, 18 Jan 2019 14:00:47 +0000	[thread overview]
Message-ID: <20190118140109.25261-17-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20190118140109.25261-1-chris@chris-wilson.co.uk>

If we restrict ourselves to only using a cacheline for each timeline's
HWSP (we could go smaller, but want to avoid needless polluting
cachelines on different engines between different contexts), then we can
suballocate a single 4k page into 64 different timeline HWSP. By
treating each fresh allocation as a slab of 64 entries, we can keep it
around for the next 64 allocation attempts until we need to refresh the
slab cache.

John Harrison noted the issue of fragmentation leading to the same worst
case performance of one page per timeline as before, which can be
mitigated by adopting a freelist.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h      |  5 ++
 drivers/gpu/drm/i915/i915_timeline.c | 80 ++++++++++++++++++++++++----
 2 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3913900600b7..d59228dabb6e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1978,6 +1978,11 @@ struct drm_i915_private {
 		struct i915_gt_timelines {
 			struct mutex mutex; /* protects list, tainted by GPU */
 			struct list_head list;
+
+			/* Pack multiple timelines' seqnos into the same page */
+			spinlock_t hwsp_lock;
+			struct i915_vma *hwsp;
+			u64 bitmap;
 		} timelines;
 
 		struct list_head active_rings;
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index a7d902e9eaf1..1579ace82cfd 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -11,26 +11,73 @@
 
 static int hwsp_alloc(struct i915_timeline *timeline)
 {
+#define NBITS BITS_PER_TYPE(typeof(gt->bitmap))
 	struct drm_i915_private *i915 = timeline->i915;
-	struct drm_i915_gem_object *obj;
+	struct i915_gt_timelines *gt = &i915->gt.timelines;
 	struct i915_vma *vma;
+	int offset;
+
+	spin_lock(&gt->hwsp_lock);
+
+restart:
+	offset = find_first_bit((unsigned long *)&gt->bitmap, NBITS);
+	if (offset == NBITS && gt->hwsp) {
+		i915_vma_put(gt->hwsp);
+		gt->hwsp = NULL;
+	}
+
+	vma = gt->hwsp;
+	if (!vma) {
+		struct drm_i915_gem_object *obj;
+
+		spin_unlock(&gt->hwsp_lock);
 
-	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+		BUILD_BUG_ON(NBITS * CACHELINE_BYTES > PAGE_SIZE);
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
 
-	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+		i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
 
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
-	if (IS_ERR(vma)) {
-		i915_gem_object_put(obj);
-		return PTR_ERR(vma);
+		vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+		if (IS_ERR(vma)) {
+			i915_gem_object_put(obj);
+			return PTR_ERR(vma);
+		}
+
+		spin_lock(&gt->hwsp_lock);
+		if (gt->hwsp) {
+			i915_gem_object_put(obj);
+			goto restart;
+		}
+
+		gt->hwsp = vma;
+		gt->bitmap = ~0ull;
+		offset = 0;
 	}
 
-	timeline->hwsp_ggtt = vma;
-	timeline->hwsp_offset = 0;
+	gt->bitmap &= ~BIT_ULL(offset);
+
+	spin_unlock(&gt->hwsp_lock);
+
+	timeline->hwsp_ggtt = i915_vma_get(vma);
+	timeline->hwsp_offset = offset * CACHELINE_BYTES;
 
 	return 0;
+#undef NBITS
+}
+
+static void hwsp_free(struct i915_timeline *timeline)
+{
+	struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
+
+	if (timeline->hwsp_ggtt != gt->hwsp)
+		return;
+
+	spin_lock(&gt->hwsp_lock);
+	if (timeline->hwsp_ggtt == gt->hwsp)
+		gt->bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+	spin_unlock(&gt->hwsp_lock);
 }
 
 int i915_timeline_init(struct drm_i915_private *i915,
@@ -65,6 +112,7 @@ int i915_timeline_init(struct drm_i915_private *i915,
 
 	vaddr = i915_gem_object_pin_map(timeline->hwsp_ggtt->obj, I915_MAP_WB);
 	if (IS_ERR(vaddr)) {
+		hwsp_free(timeline);
 		i915_vma_put(timeline->hwsp_ggtt);
 		return PTR_ERR(vaddr);
 	}
@@ -99,6 +147,8 @@ void i915_timelines_init(struct drm_i915_private *i915)
 	mutex_init(&gt->mutex);
 	INIT_LIST_HEAD(&gt->list);
 
+	spin_lock_init(&gt->hwsp_lock);
+
 	/* via i915_gem_wait_for_idle() */
 	i915_gem_shrinker_taints_mutex(i915, &gt->mutex);
 }
@@ -144,6 +194,9 @@ void i915_timeline_fini(struct i915_timeline *timeline)
 	list_del(&timeline->link);
 	mutex_unlock(&gt->mutex);
 
+	i915_syncmap_free(&timeline->sync);
+	hwsp_free(timeline);
+
 	i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
 	i915_vma_put(timeline->hwsp_ggtt);
 }
@@ -211,9 +264,14 @@ void __i915_timeline_free(struct kref *kref)
 void i915_timelines_fini(struct drm_i915_private *i915)
 {
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
+	struct i915_vma *vma;
 
 	GEM_BUG_ON(!list_empty(&gt->list));
 
+	vma = fetch_and_zero(&i915->gt.timelines.hwsp);
+	if (vma)
+		i915_vma_put(vma);
+
 	mutex_destroy(&gt->mutex);
 }
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx