Intel-GFX Archive on lore.kernel.org
 help / color / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 02/21] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
Date: Sat, 16 Apr 2011 10:17:26 +0100
Message-ID: <1302945465-32115-3-git-send-email-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <1302945465-32115-1-git-send-email-chris@chris-wilson.co.uk>

Replace the three nearly identical copies of the code with a single
function. And take advantage of the opportunity to do some
micro-optimisation: avoid the vmalloc if at all possible and also avoid
dropping the lock unless we are forced to acquire the mm semaphore.

Measuring the impact of the optimisations, it turns out they are not so
micro after all. Running x11perf -aa10text on PineView:

  before 1.28 Mglyph/sec
  after  1.45 Mglyph/sec

(Glyphs, in general and on PineView in particular, are one of the very
few pwrite rate-limited workloads.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Paul Menzel <paulepanter@users.sourceforge.net>
---
 drivers/gpu/drm/i915/i915_gem.c |  149 ++++++++++++++++++++++++---------------
 1 files changed, 92 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 57bc775..140d324 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -257,6 +257,73 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 		obj->tiling_mode != I915_TILING_NONE;
 }
 
+/**
+ * Magically retrieves the pages for the user addr whilst holding the
+ * dev->struct_mutex.
+ *
+ * Since we can not take the mm semaphore whilst holding our dev->struct_mutex,
+ * due to the pre-existing lock dependency established by i915_gem_fault(),
+ * we have to perform some sleight-of-hand.
+ *
+ * First, we try the lockless variant of get_user_pages(),
+ * __get_user_pages_fast(), whilst continuing to hold the mutex. If that
+ * fails to get all the user pages, then we no have choice but to acquire
+ * the mm semaphore, thus dropping the lock on dev->struct_mutex to do so.
+ * The dev->struct_mutex is then re-acquired before we return.
+ *
+ * Returns: an error code *and* the number of user pages acquired. Even
+ * on an error, you must iterate over the returned pages and release them.
+ */
+static int
+i915_gem_get_user_pages(struct drm_device *dev,
+			unsigned long addr,
+			bool write,
+			int *num_pages,
+			struct page ***pages_out)
+{
+	struct page **pages;
+	int pinned, ret;
+	int n = *num_pages;
+
+	pages = kmalloc(n*sizeof(struct page *),
+			GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (pages == NULL) {
+		pages = drm_malloc_ab(n, sizeof(struct page *));
+		if (pages == NULL) {
+			*pages_out = NULL;
+			*num_pages = 0;
+			return -ENOMEM;
+		}
+	}
+
+	pinned = __get_user_pages_fast(addr, n, write, pages);
+	if (pinned < n) {
+		struct mm_struct *mm = current->mm;
+
+		mutex_unlock(&dev->struct_mutex);
+		down_read(&mm->mmap_sem);
+		ret = get_user_pages(current, mm,
+				     addr + (pinned << PAGE_SHIFT),
+				     n - pinned,
+				     write, 0,
+				     pages + pinned,
+				     NULL);
+		up_read(&mm->mmap_sem);
+		mutex_lock(&dev->struct_mutex);
+		if (ret > 0)
+			pinned += ret;
+	}
+
+	ret = 0;
+	if (pinned < n)
+		ret = -EFAULT;
+
+	*num_pages = pinned;
+	*pages_out = pages;
+	return ret;
+}
+
+
 static inline void
 slow_shmem_copy(struct page *dst_page,
 		int dst_offset,
@@ -398,11 +465,11 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 			  struct drm_file *file)
 {
 	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
-	struct mm_struct *mm = current->mm;
 	struct page **user_pages;
 	ssize_t remain;
-	loff_t offset, pinned_pages, i;
-	loff_t first_data_page, last_data_page, num_pages;
+	loff_t offset;
+	loff_t first_data_page, last_data_page;
+	int num_pages, i;
 	int shmem_page_offset;
 	int data_page_index, data_page_offset;
 	int page_length;
@@ -420,20 +487,10 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 1, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
+	ret = i915_gem_get_user_pages(dev, data_ptr, true,
+				      &num_pages, &user_pages);
+	if (ret)
 		goto out;
-	}
 
 	ret = i915_gem_object_set_cpu_read_domain_range(obj,
 							args->offset,
@@ -494,7 +551,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 	}
 
 out:
-	for (i = 0; i < pinned_pages; i++) {
+	for (i = 0; i < num_pages; i++) {
 		SetPageDirty(user_pages[i]);
 		mark_page_accessed(user_pages[i]);
 		page_cache_release(user_pages[i]);
@@ -679,10 +736,9 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	ssize_t remain;
 	loff_t gtt_page_base, offset;
-	loff_t first_data_page, last_data_page, num_pages;
-	loff_t pinned_pages, i;
+	loff_t first_data_page, last_data_page;
+	int num_pages, i;
 	struct page **user_pages;
-	struct mm_struct *mm = current->mm;
 	int gtt_page_offset, data_page_offset, data_page_index, page_length;
 	int ret;
 	uint64_t data_ptr = args->data_ptr;
@@ -697,28 +753,18 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 0, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
-		goto out_unpin_pages;
-	}
+	ret = i915_gem_get_user_pages(dev, data_ptr, false,
+				      &num_pages, &user_pages);
+	if (ret)
+		goto out;
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 	if (ret)
-		goto out_unpin_pages;
+		goto out;
 
 	ret = i915_gem_object_put_fence(obj);
 	if (ret)
-		goto out_unpin_pages;
+		goto out;
 
 	offset = obj->gtt_offset + args->offset;
 
@@ -753,8 +799,8 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 		data_ptr += page_length;
 	}
 
-out_unpin_pages:
-	for (i = 0; i < pinned_pages; i++)
+out:
+	for (i = 0; i < num_pages; i++)
 		page_cache_release(user_pages[i]);
 	drm_free_large(user_pages);
 
@@ -803,11 +849,11 @@ i915_gem_shmem_pwrite_fast(struct drm_device *dev,
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 
-		vaddr = kmap_atomic(page, KM_USER0);
+		vaddr = kmap_atomic(page);
 		ret = __copy_from_user_inatomic(vaddr + page_offset,
 						user_data,
 						page_length);
-		kunmap_atomic(vaddr, KM_USER0);
+		kunmap_atomic(vaddr);
 
 		set_page_dirty(page);
 		mark_page_accessed(page);
@@ -842,11 +888,10 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 			   struct drm_file *file)
 {
 	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
-	struct mm_struct *mm = current->mm;
 	struct page **user_pages;
 	ssize_t remain;
-	loff_t offset, pinned_pages, i;
-	loff_t first_data_page, last_data_page, num_pages;
+	loff_t first_data_page, last_data_page, offset;
+	int num_pages, i;
 	int shmem_page_offset;
 	int data_page_index,  data_page_offset;
 	int page_length;
@@ -864,20 +909,10 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 0, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
+	ret = i915_gem_get_user_pages(dev, data_ptr, false,
+				      &num_pages, &user_pages);
+	if (ret)
 		goto out;
-	}
 
 	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 	if (ret)
@@ -940,7 +975,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 	}
 
 out:
-	for (i = 0; i < pinned_pages; i++)
+	for (i = 0; i < num_pages; i++)
 		page_cache_release(user_pages[i]);
 	drm_free_large(user_pages);
 
-- 
1.7.4.1

  parent reply index

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-16  9:17 i915 next, post-llc Chris Wilson
2011-04-16  9:17 ` [PATCH 01/21] drm/i915: Cache GT fifo count for SandyBridge Chris Wilson
2011-04-16  9:17 ` Chris Wilson [this message]
2011-04-16  9:17 ` [PATCH 03/21] drm/i915: s/addr & ~PAGE_MASK/offset_in_page(addr)/ Chris Wilson
2011-04-16  9:17 ` [PATCH 04/21] drm/i915: Maintain fenced gpu access until we flush the fence Chris Wilson
2011-04-16  9:17 ` [PATCH 05/21] drm/i915: Invalidate fenced read domains upon flush Chris Wilson
2011-04-16  9:17 ` [PATCH 06/21] drm/i915: Pass the fence register number to be written Chris Wilson
2011-04-16  9:17 ` [PATCH 07/21] drm/i915: Track fence setup separately from fenced object lifetime Chris Wilson
2011-04-16 13:20   ` Daniel Vetter
2011-04-16  9:17 ` [PATCH 08/21] drm/i915: Only print out the actual number of fences for i915_error_state Chris Wilson
2011-04-16  9:17 ` [PATCH 09/21] drm/i915: Implement direct support for 24 bit LVDS pixel format Chris Wilson
2011-04-16  9:17 ` [PATCH 10/21] drm/i915: Implement manual override of LVDS single/dual channel mode Chris Wilson
2011-04-16  9:17 ` [PATCH 11/21] drm/i915/tv: Use a direct pointer for tv_mode Chris Wilson
2011-04-16  9:17 ` [PATCH 12/21] drm/i915: Replace ironlake_compute_wm0 with g4x_compute_wm0 Chris Wilson
2011-04-16  9:17 ` [PATCH 13/21] drm/i915/crt: Explicitly return false if connected to a digital monitor Chris Wilson
2011-04-16  9:17 ` [PATCH 14/21] drm/i915/i2c: Convert from using GMBUS1 + reg_offset idiom to reg + 0 Chris Wilson
2011-04-16  9:17 ` [PATCH 15/21] drm/i915/gmbus: Reset the controller on initialisation Chris Wilson
2011-04-16  9:17 ` [PATCH 16/21] drm/i915: Retire requests before disabling pagefaults Chris Wilson
2011-04-16 13:44   ` Daniel Vetter
2011-04-16  9:17 ` [PATCH 17/21] drm/i915: Repeat retiring of requests until the seqno is stable Chris Wilson
2011-04-16 13:45   ` Daniel Vetter
2011-04-16  9:17 ` [PATCH 18/21] drm/i915: Split out i915_gem_object_move_to_ring() from execbuffer Chris Wilson
2011-04-16 13:54   ` Daniel Vetter
2011-04-16 14:18     ` Chris Wilson
2011-04-16 14:24       ` Daniel Vetter
2011-04-16  9:17 ` [PATCH 19/21] drm/i915: Enable the use of GPU semaphores whilst page-flipping Chris Wilson
2011-04-16 13:58   ` Daniel Vetter
2011-04-16 14:20     ` Chris Wilson
2011-04-16  9:17 ` [PATCH 20/21] drm/i915: Use a slab for object allocation Chris Wilson
2011-04-16 14:07   ` Daniel Vetter
2011-04-16  9:17 ` [PATCH 21/21] drm/i915: Introduce vmap (mapping of user pages into video memory) ioctl Chris Wilson
2011-04-18 14:58   ` Daniel Vetter
2011-04-19  6:20     ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1302945465-32115-3-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Intel-GFX Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/intel-gfx/0 intel-gfx/git/0.git
	git clone --mirror https://lore.kernel.org/intel-gfx/1 intel-gfx/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 intel-gfx intel-gfx/ https://lore.kernel.org/intel-gfx \
		intel-gfx@lists.freedesktop.org
	public-inbox-index intel-gfx

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.freedesktop.lists.intel-gfx


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git