All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 15/41] drm/i915: Use a radixtree for random access to the object's backing storage
Date: Thu, 20 Oct 2016 16:03:57 +0100	[thread overview]
Message-ID: <20161020150423.4560-16-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20161020150423.4560-1-chris@chris-wilson.co.uk>

A while ago we switched from a contiguous array of pages into an sglist,
for that was both more convenient for mapping to hardware and avoided
the requirement for a vmalloc array of pages on every object. However,
certain GEM API calls (like pwrite, pread as well as performing
relocations) do desire access to individual struct pages. A quick hack
was to introduce a cache of the last access such that finding the
following page was quick - this works so long as the caller desired
sequential access. Walking backwards, or multiple callers, still hits a
slow linear search for each page. One solution is to store each
successful lookup in a radix tree.

v2: Rewrite building the radixtree for clarity, hopefully.

v3: Rearrange execbuf to avoid calling i915_gem_object_get_sg() from
within an atomic section and so relax the allocation context to a simple
GFP_KERNEL and mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |  69 +++++-------
 drivers/gpu/drm/i915/i915_gem.c         | 185 +++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_gem_stolen.c  |   4 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |   4 +-
 4 files changed, 199 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0897f43e7796..e2e48af8d41f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2273,9 +2273,12 @@ struct drm_i915_gem_object {
 
 	struct sg_table *pages;
 	int pages_pin_count;
-	struct get_page {
-		struct scatterlist *sg;
-		int last;
+	struct i915_gem_object_page_iter {
+		struct scatterlist *sg_pos;
+		unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+		struct radix_tree_root radix;
+		struct mutex lock; /* protects this cache */
 	} get_page;
 	void *mapping;
 
@@ -2478,6 +2481,14 @@ static __always_inline struct sgt_iter {
 	return s;
 }
 
+static inline struct scatterlist *____sg_next(struct scatterlist *sg)
+{
+	++sg;
+	if (unlikely(sg_is_chain(sg)))
+		sg = sg_chain_ptr(sg);
+	return sg;
+}
+
 /**
  * __sg_next - return the next scatterlist entry in a list
  * @sg:		The current sg entry
@@ -2492,9 +2503,7 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
 #ifdef CONFIG_DEBUG_SG
 	BUG_ON(sg->sg_magic != SG_MAGIC);
 #endif
-	return sg_is_last(sg) ? NULL :
-		likely(!sg_is_chain(++sg)) ? sg :
-		sg_chain_ptr(sg);
+	return sg_is_last(sg) ? NULL : ____sg_next(sg);
 }
 
 /**
@@ -3172,45 +3181,21 @@ static inline int __sg_page_count(struct scatterlist *sg)
 	return sg->length >> PAGE_SHIFT;
 }
 
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n);
-
-static inline dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
-{
-	if (n < obj->get_page.last) {
-		obj->get_page.sg = obj->pages->sgl;
-		obj->get_page.last = 0;
-	}
-
-	while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
-		obj->get_page.last += __sg_page_count(obj->get_page.sg++);
-		if (unlikely(sg_is_chain(obj->get_page.sg)))
-			obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
-	}
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n, unsigned int *offset);
 
-	return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT);
-}
-
-static inline struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
-{
-	if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
-		return NULL;
-
-	if (n < obj->get_page.last) {
-		obj->get_page.sg = obj->pages->sgl;
-		obj->get_page.last = 0;
-	}
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj,
+			 unsigned int n);
 
-	while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
-		obj->get_page.last += __sg_page_count(obj->get_page.sg++);
-		if (unlikely(sg_is_chain(obj->get_page.sg)))
-			obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
-	}
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n);
 
-	return nth_page(sg_page(obj->get_page.sg), n - obj->get_page.last);
-}
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n);
 
 static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d596b1f9e969..a9ea20d53e23 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2333,6 +2333,15 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 	kfree(obj->pages);
 }
 
+static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
+{
+	struct radix_tree_iter iter;
+	void **slot;
+
+	radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0)
+		radix_tree_delete(&obj->get_page.radix, iter.index);
+}
+
 int
 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
@@ -2365,6 +2374,8 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 		obj->mapping = NULL;
 	}
 
+	__i915_gem_object_reset_page_iter(obj);
+
 	ops->put_pages(obj);
 	obj->pages = NULL;
 
@@ -2534,8 +2545,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 
 	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
 
-	obj->get_page.sg = obj->pages->sgl;
-	obj->get_page.last = 0;
+	obj->get_page.sg_pos = obj->pages->sgl;
+	obj->get_page.sg_idx = 0;
 
 	return 0;
 }
@@ -4338,6 +4349,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
 	obj->madv = I915_MADV_WILLNEED;
+	INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL | __GFP_NOWARN);
+	mutex_init(&obj->get_page.lock);
 
 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
 }
@@ -5031,21 +5044,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 	}
 }
 
-/* Like i915_gem_object_get_page(), but mark the returned page dirty */
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
-{
-	struct page *page;
-
-	/* Only default objects have per-page dirty tracking */
-	if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
-		return NULL;
-
-	page = i915_gem_object_get_page(obj, n);
-	set_page_dirty(page);
-	return page;
-}
-
 /* Allocate a new GEM object and fill it with the supplied data */
 struct drm_i915_gem_object *
 i915_gem_object_create_from_data(struct drm_device *dev,
@@ -5086,3 +5084,156 @@ i915_gem_object_create_from_data(struct drm_device *dev,
 	i915_gem_object_put(obj);
 	return ERR_PTR(ret);
 }
+
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n,
+		       unsigned int *offset)
+{
+	struct i915_gem_object_page_iter *iter = &obj->get_page;
+	struct scatterlist *sg;
+	unsigned int idx, count;
+
+	might_sleep();
+	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
+	GEM_BUG_ON(obj->pages_pin_count == 0);
+
+	/* As we iterate forward through the sg, we record each entry in a
+	 * radixtree for quick repeated (backwards) lookups. If we have seen
+	 * this index previously, we will have an entry for it.
+	 *
+	 * Initial lookup is O(N), but this is amortized to O(1) for
+	 * sequential page access (where each new request is consecutive
+	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
+	 * i.e. O(1) with a large constant!
+	 */
+	if (n < READ_ONCE(iter->sg_idx))
+		goto lookup;
+
+	mutex_lock(&iter->lock);
+
+	/* We prefer to reuse the last sg so that repeated lookup of this
+	 * (or the subsequent) sg are fast - comparing against the last
+	 * sg is faster than going through the radixtree.
+	 */
+
+	sg = iter->sg_pos;
+	idx = iter->sg_idx;
+	count = __sg_page_count(sg);
+
+	while (idx + count <= n) {
+		unsigned long exception, i;
+		int ret;
+
+		/* If we cannot allocate and insert this entry, or the
+		 * individual pages from this range, cancel updating the
+		 * sg_idx so that on this lookup we are forced to linearly
+		 * scan onwards, but on future lookups we will try the
+		 * insertion again (in which case we need to be careful of
+		 * the error return reporting that we have already inserted
+		 * this index).
+		 */
+		ret = radix_tree_insert(&iter->radix, idx, sg);
+		if (ret && ret != -EEXIST)
+			goto scan;
+
+		exception =
+			RADIX_TREE_EXCEPTIONAL_ENTRY |
+			idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
+		for (i = 1; i < count; i++) {
+			ret = radix_tree_insert(&iter->radix, idx + i,
+						(void *)exception);
+			if (ret && ret != -EEXIST)
+				goto scan;
+		}
+
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+scan:
+	iter->sg_pos = sg;
+	iter->sg_idx = idx;
+
+	mutex_unlock(&iter->lock);
+
+	if (unlikely(n < idx)) /* insertion completed by another thread */
+		goto lookup;
+
+	/* In case we failed to insert the entry into the radixtree, we need
+	 * to look beyond the current sg.
+	 */
+	while (idx + count <= n) {
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+	*offset = n - idx;
+	return sg;
+
+lookup:
+	rcu_read_lock();
+
+	sg = radix_tree_lookup(&iter->radix, n);
+	GEM_BUG_ON(!sg);
+
+	/* If this index is in the middle of multi-page sg entry,
+	 * the radixtree will contain an exceptional entry that points
+	 * to the start of that range. We will return the pointer to
+	 * the base page and the offset of this page within the
+	 * sg entry's range.
+	 */
+	*offset = 0;
+	if (unlikely(radix_tree_exception(sg))) {
+		unsigned long base =
+			(unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+
+		sg = radix_tree_lookup(&iter->radix, base);
+		GEM_BUG_ON(!sg);
+
+		*offset = n - base;
+	}
+
+	rcu_read_unlock();
+
+	return sg;
+}
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return nth_page(sg_page(sg), offset);
+}
+
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n)
+{
+	struct page *page;
+
+	page = i915_gem_object_get_page(obj, n);
+	if (!obj->dirty)
+		set_page_dirty(page);
+
+	return page;
+}
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index f4f6d3a48b05..70e61bc35c60 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -595,8 +595,8 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
 	if (obj->pages == NULL)
 		goto cleanup;
 
-	obj->get_page.sg = obj->pages->sgl;
-	obj->get_page.last = 0;
+	obj->get_page.sg_pos = obj->pages->sgl;
+	obj->get_page.sg_idx = 0;
 
 	i915_gem_object_pin_pages(obj);
 	obj->stolen = stolen;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 1c891b92ac80..cb95789da76e 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -526,8 +526,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 			if (ret == 0) {
 				list_add_tail(&obj->global_list,
 					      &to_i915(dev)->mm.unbound_list);
-				obj->get_page.sg = obj->pages->sgl;
-				obj->get_page.last = 0;
+				obj->get_page.sg_pos = obj->pages->sgl;
+				obj->get_page.sg_idx = 0;
 				pinned = 0;
 			}
 		}
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-10-20 15:04 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-20 15:03 Multiple timelines, multiple times Chris Wilson
2016-10-20 15:03 ` [PATCH 01/41] drm/i915: Move user fault tracking to a separate list Chris Wilson
2016-10-20 15:03 ` [PATCH 02/41] drm/i915: Use RPM as the barrier for controlling user mmap access Chris Wilson
2016-10-20 15:03 ` [PATCH 03/41] drm/i915: Remove superfluous locking around userfault_list Chris Wilson
2016-10-20 15:03 ` [PATCH 04/41] drm/i915: Remove RPM sequence checking Chris Wilson
2016-10-21 10:19   ` Imre Deak
2016-10-20 15:03 ` [PATCH 05/41] drm/i915: Move fence cancellation to runtime suspend Chris Wilson
2016-10-21 12:48   ` Imre Deak
2016-10-21 13:52     ` Chris Wilson
2016-10-21 14:05     ` [PATCH v2] " Chris Wilson
2016-10-24  9:55       ` Imre Deak
2016-10-20 15:03 ` [PATCH 06/41] drm/i915: Support asynchronous waits on struct fence from i915_gem_request Chris Wilson
2016-10-20 15:03 ` [PATCH 07/41] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate Chris Wilson
2016-10-20 15:03 ` [PATCH 08/41] drm/i915: Remove superfluous wait_for_error() from throttle-ioctl Chris Wilson
2016-10-20 15:41   ` Joonas Lahtinen
2016-10-20 15:03 ` [PATCH 09/41] drm/i915: Rearrange i915_wait_request() accounting with callers Chris Wilson
2016-10-20 15:03 ` [PATCH 10/41] drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked() Chris Wilson
2016-10-20 15:03 ` [PATCH 11/41] drm/i915: Defer active reference until required Chris Wilson
2016-10-20 15:03 ` [PATCH 12/41] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
2016-10-20 16:22   ` Tvrtko Ursulin
2016-10-20 20:36     ` Chris Wilson
2016-10-21  7:21       ` Tvrtko Ursulin
2016-10-21  7:50         ` Chris Wilson
2016-10-21  7:53           ` Tvrtko Ursulin
2016-10-21  7:56   ` [PATCH v4] " Chris Wilson
2016-10-21  8:07     ` Tvrtko Ursulin
2016-10-20 15:03 ` [PATCH 13/41] drm/i915: Reuse the active golden render state batch Chris Wilson
2016-10-20 15:03 ` [PATCH 14/41] drm/i915: Markup GEM API with lockdep asserts Chris Wilson
2016-10-20 15:03 ` Chris Wilson [this message]
2016-10-20 15:55   ` [PATCH 15/41] drm/i915: Use a radixtree for random access to the object's backing storage Tvrtko Ursulin
2016-10-20 15:03 ` [PATCH 16/41] drm/i915: Use radixtree to jump start intel_partial_pages() Chris Wilson
2016-10-20 15:03 ` [PATCH 17/41] drm/i915: Refactor object page API Chris Wilson
2016-10-20 15:04 ` [PATCH 18/41] drm/i915: Pass around sg_table to get_pages/put_pages backend Chris Wilson
2016-10-20 15:04 ` [PATCH 19/41] drm/i915: Move object backing storage manipulation to its own locking Chris Wilson
2016-10-20 15:04 ` [PATCH 20/41] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex Chris Wilson
2016-10-20 15:04 ` [PATCH 21/41] drm/i915: Implement pread without struct-mutex Chris Wilson
2016-10-20 15:04 ` [PATCH 22/41] drm/i915: Implement pwrite " Chris Wilson
2016-10-20 15:04 ` [PATCH 23/41] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain Chris Wilson
2016-10-20 15:04 ` [PATCH 24/41] drm/i915: Move object release to a freelist + worker Chris Wilson
2016-10-20 15:04 ` [PATCH 25/41] drm/i915: Use lockless object free Chris Wilson
2016-10-20 15:04 ` [PATCH 26/41] drm/i915: Move GEM activity tracking into a common struct reservation_object Chris Wilson
2016-10-20 15:04 ` [PATCH 27/41] drm/i915: Restore nonblocking awaits for modesetting Chris Wilson
2016-10-20 15:04 ` [PATCH 28/41] drm/i915: Combine seqno + tracking into a global timeline struct Chris Wilson
2016-10-20 15:04 ` [PATCH 29/41] drm/i915: Queue the idling context switch after all other timelines Chris Wilson
2016-10-20 15:04 ` [PATCH 30/41] drm/i915: Wait first for submission, before waiting for request completion Chris Wilson
2016-10-20 15:04 ` [PATCH 31/41] drm/i915: Introduce a global_seqno for each request Chris Wilson
2016-10-20 15:04 ` [PATCH 32/41] drm/i915: Rename ->emit_request to ->emit_breadcrumb Chris Wilson
2016-10-20 15:04 ` [PATCH 33/41] drm/i915: Record space required for breadcrumb emission Chris Wilson
2016-10-20 15:04 ` [PATCH 34/41] drm/i915: Defer " Chris Wilson
2016-10-20 15:04 ` [PATCH 35/41] drm/i915: Move the global sync optimisation to the timeline Chris Wilson
2016-10-20 15:04 ` [PATCH 36/41] drm/i915: Create a unique name for the context Chris Wilson
2016-10-20 15:04 ` [PATCH 37/41] drm/i915: Reserve space in the global seqno during request allocation Chris Wilson
2016-10-20 15:04 ` [PATCH 38/41] drm/i915: Defer setting of global seqno on request to submission Chris Wilson
2016-10-20 15:04 ` [PATCH 39/41] drm/i915: Enable multiple timelines Chris Wilson
2016-10-20 15:04 ` [PATCH 40/41] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-10-20 15:04 ` [PATCH 41/41] drm/i915: Support explicit fencing for execbuf Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161020150423.4560-16-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.