Intel-GFX Archive on lore.kernel.org
 help / color / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages
Date: Tue, 12 Apr 2011 21:31:52 +0100
Message-ID: <1302640318-23165-25-git-send-email-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <1302640318-23165-1-git-send-email-chris@chris-wilson.co.uk>

Replace the three nearly identical copies of the code with a single
function. And take advantage of the opportunity to do some
micro-optimisation: avoid the vmalloc if at all possible and also avoid
dropping the lock unless we are forced to acquire the mm semaphore.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |  132 ++++++++++++++++++++++-----------------
 1 files changed, 75 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 33830c9..0028f3b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -257,6 +257,56 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 		obj->tiling_mode != I915_TILING_NONE;
 }
 
+static int
+i915_gem_get_user_pages(struct drm_device *dev,
+			unsigned long addr,
+			bool write,
+			int *num_pages,
+			struct page ***pages_out)
+{
+	struct page **pages;
+	int pinned, ret;
+	int n = *num_pages;
+
+	pages = kmalloc(n*sizeof(struct page *),
+			GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+	if (pages == NULL) {
+		pages = drm_malloc_ab(n, sizeof(struct page *));
+		if (pages == NULL) {
+			*pages_out = NULL;
+			*num_pages = 0;
+			return -ENOMEM;
+		}
+	}
+
+	pinned = __get_user_pages_fast(addr, n, write, pages);
+	if (pinned < n) {
+		struct mm_struct *mm = current->mm;
+
+		mutex_unlock(&dev->struct_mutex);
+		down_read(&mm->mmap_sem);
+		ret = get_user_pages(current, mm,
+				     addr + (pinned << PAGE_SHIFT),
+				     n - pinned,
+				     write, 0,
+				     pages + pinned,
+				     NULL);
+		up_read(&mm->mmap_sem);
+		mutex_lock(&dev->struct_mutex);
+		if (ret > 0)
+			pinned += ret;
+	}
+
+	ret = 0;
+	if (pinned < n)
+		ret = -EFAULT;
+
+	*num_pages = pinned;
+	*pages_out = pages;
+	return ret;
+}
+
+
 static inline void
 slow_shmem_copy(struct page *dst_page,
 		int dst_offset,
@@ -398,11 +448,11 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 			  struct drm_file *file)
 {
 	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
-	struct mm_struct *mm = current->mm;
 	struct page **user_pages;
 	ssize_t remain;
-	loff_t offset, pinned_pages, i;
-	loff_t first_data_page, last_data_page, num_pages;
+	loff_t offset;
+	loff_t first_data_page, last_data_page;
+	int num_pages, i;
 	int shmem_page_offset;
 	int data_page_index, data_page_offset;
 	int page_length;
@@ -420,20 +470,10 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 1, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
+	ret = i915_gem_get_user_pages(dev, data_ptr, true,
+				      &num_pages, &user_pages);
+	if (ret)
 		goto out;
-	}
 
 	ret = i915_gem_object_set_cpu_read_domain_range(obj,
 							args->offset,
@@ -494,7 +534,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
 	}
 
 out:
-	for (i = 0; i < pinned_pages; i++) {
+	for (i = 0; i < num_pages; i++) {
 		SetPageDirty(user_pages[i]);
 		mark_page_accessed(user_pages[i]);
 		page_cache_release(user_pages[i]);
@@ -679,10 +719,9 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	ssize_t remain;
 	loff_t gtt_page_base, offset;
-	loff_t first_data_page, last_data_page, num_pages;
-	loff_t pinned_pages, i;
+	loff_t first_data_page, last_data_page;
+	int num_pages, i;
 	struct page **user_pages;
-	struct mm_struct *mm = current->mm;
 	int gtt_page_offset, data_page_offset, data_page_index, page_length;
 	int ret;
 	uint64_t data_ptr = args->data_ptr;
@@ -697,28 +736,18 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 0, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
-		goto out_unpin_pages;
-	}
+	ret = i915_gem_get_user_pages(dev, data_ptr, false,
+				      &num_pages, &user_pages);
+	if (ret)
+		goto out;
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 	if (ret)
-		goto out_unpin_pages;
+		goto out;
 
 	ret = i915_gem_object_put_fence(obj);
 	if (ret)
-		goto out_unpin_pages;
+		goto out;
 
 	offset = obj->gtt_offset + args->offset;
 
@@ -753,8 +782,8 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 		data_ptr += page_length;
 	}
 
-out_unpin_pages:
-	for (i = 0; i < pinned_pages; i++)
+out:
+	for (i = 0; i < num_pages; i++)
 		page_cache_release(user_pages[i]);
 	drm_free_large(user_pages);
 
@@ -803,11 +832,11 @@ i915_gem_shmem_pwrite_fast(struct drm_device *dev,
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 
-		vaddr = kmap_atomic(page, KM_USER0);
+		vaddr = kmap_atomic(page);
 		ret = __copy_from_user_inatomic(vaddr + page_offset,
 						user_data,
 						page_length);
-		kunmap_atomic(vaddr, KM_USER0);
+		kunmap_atomic(vaddr);
 
 		set_page_dirty(page);
 		mark_page_accessed(page);
@@ -842,11 +871,10 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 			   struct drm_file *file)
 {
 	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
-	struct mm_struct *mm = current->mm;
 	struct page **user_pages;
 	ssize_t remain;
-	loff_t offset, pinned_pages, i;
-	loff_t first_data_page, last_data_page, num_pages;
+	loff_t first_data_page, last_data_page, offset;
+	int num_pages, i;
 	int shmem_page_offset;
 	int data_page_index,  data_page_offset;
 	int page_length;
@@ -864,20 +892,10 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 	num_pages = last_data_page - first_data_page + 1;
 
-	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
-	if (user_pages == NULL)
-		return -ENOMEM;
-
-	mutex_unlock(&dev->struct_mutex);
-	down_read(&mm->mmap_sem);
-	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 0, 0, user_pages, NULL);
-	up_read(&mm->mmap_sem);
-	mutex_lock(&dev->struct_mutex);
-	if (pinned_pages < num_pages) {
-		ret = -EFAULT;
+	ret = i915_gem_get_user_pages(dev, data_ptr, false,
+				      &num_pages, &user_pages);
+	if (ret)
 		goto out;
-	}
 
 	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 	if (ret)
@@ -940,7 +958,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 	}
 
 out:
-	for (i = 0; i < pinned_pages; i++)
+	for (i = 0; i < num_pages; i++)
 		page_cache_release(user_pages[i]);
 	drm_free_large(user_pages);
 
-- 
1.7.4.1

  parent reply index

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-12 20:31 i915 next Chris Wilson
2011-04-12 20:31 ` [PATCH 01/30] drm/i915: Split the crtc_mode_set function along HAS_PCH_SPLIT() lines Chris Wilson
2011-04-12 20:31 ` [PATCH 02/30] drm/i915: Move the vblank pre/post modeset to the common crtc_mode_set Chris Wilson
2011-04-12 20:31 ` [PATCH 03/30] drm/i915: Remove the PCH paths from the pre-Ironlake crtc_mode_set() Chris Wilson
2011-04-12 20:31 ` [PATCH 04/30] drm/i915: Drop the eDP paths from the pre-Ironlake crtc_mode_set Chris Wilson
2011-04-12 20:31 ` [PATCH 05/30] drm/i915: Drop the remaining bit of Ironlake code from i9xx_crtc_mode_set() Chris Wilson
2011-04-12 20:31 ` [PATCH 06/30] drm/i915: Drop non-HAS_PCH_SPLIT() code from ironlake_crtc_mode_set() Chris Wilson
2011-04-12 20:31 ` [PATCH 07/30] drm/i915: Drop remaining pre-Ironlake " Chris Wilson
2011-04-12 20:31 ` [PATCH 08/30] drm/i915: Clean up leftover DPLL and LVDS register choice from pch split Chris Wilson
2011-04-12 20:31 ` [PATCH 09/30] drm/i915: Fold the DPLL limit defines into the structs that use them Chris Wilson
2011-04-12 20:31 ` [PATCH 10/30] drm/i915: fix ilk rc6 teardown locking Chris Wilson
2011-04-12 20:31 ` [PATCH 11/30] drm/1915: ringbuffer wait for idle function Chris Wilson
2011-04-12 20:31 ` [PATCH 12/30] drm/i915: fix rc6 initialization on Ironlake Chris Wilson
2011-04-12 20:31 ` [PATCH 13/30] drm/i915: re-enable rc6 for ironlake Chris Wilson
2011-04-12 20:31 ` [PATCH 14/30] drm/i915: use i915_enable_rc6 on SNB too Chris Wilson
2011-04-12 20:31 ` [PATCH 15/30] drm/i915: Rename agp_type to cache_level Chris Wilson
2011-04-13 15:57   ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 16/30] drm/i915: Mark the cursor and the overlay as being part of the display planes Chris Wilson
2011-04-13 16:00   ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 17/30] drm/i915: Do not clflush snooped objects Chris Wilson
2011-04-13 16:04   ` Daniel Vetter
2011-04-13 17:34     ` Chris Wilson
2011-04-13 20:47       ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 18/30] drm/i915: Add an interface to dynamically change the cache level Chris Wilson
2011-04-13 18:59   ` Daniel Vetter
2011-04-13 19:21     ` Chris Wilson
2011-04-13 22:27     ` [PATCH 1/3] drm/i915: Introduce i915_gem_object_finish_gpu() Chris Wilson
2011-04-13 22:27       ` [PATCH 2/3] drm/i915: Introduce i915_gem_object_finish_gtt() Chris Wilson
2011-04-13 22:27       ` [PATCH 3/3] drm/i915: Add an interface to dynamically change the cache level Chris Wilson
2011-04-12 20:31 ` [PATCH 19/30] drm/i915: Use the uncached domain for the display planes v2 Chris Wilson
2011-04-12 20:31 ` [PATCH 20/30] drm/i915: Use the CPU domain for snooped pwrites Chris Wilson
2011-04-12 20:31 ` [PATCH 21/30] drm/i915: Redirect GTT mappings to the CPU page if cache-coherent Chris Wilson
2011-04-13 15:57   ` Eric Anholt
2011-04-13 16:19     ` Chris Wilson
2011-04-13 18:35     ` [PATCH] " Chris Wilson
2011-04-13 19:13       ` Daniel Vetter
2011-04-13 19:47         ` Chris Wilson
2011-04-13 20:26         ` [PATCH] drm/i915: Prevent mmap access through the GTT of snooped pages Chris Wilson
2011-04-13 20:51           ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 22/30] drm/i915: Use the LLC mode on gen6 for everything but display Chris Wilson
2011-04-13 19:15   ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 23/30] drm/i915: Cache GT fifo count for SandyBridge Chris Wilson
2011-04-14  2:21   ` Ben Widawsky
2011-04-14  4:48     ` Ben Widawsky
2011-04-12 20:31 ` Chris Wilson [this message]
2011-04-13 15:59   ` [PATCH 24/30] drm/i915: Refactor pwrite/pread to use single copy of get_user_pages Eric Anholt
2011-04-13 17:24     ` Chris Wilson
2011-04-13 19:35       ` Eric Anholt
2011-04-13 19:26   ` Daniel Vetter
2011-04-13 19:56     ` Chris Wilson
2011-04-13 20:56       ` Daniel Vetter
2011-04-14 23:23       ` Ben Widawsky
2011-04-15  9:48         ` Paul Menzel
2011-04-16  8:03           ` Chris Wilson
2011-04-12 20:31 ` [PATCH 25/30] drm/i915: s/addr & ~PAGE_MASK/offset_in_page(addr)/ Chris Wilson
2011-04-12 20:31 ` [PATCH 26/30] drm/i915: Maintain fenced gpu access until we flush the fence Chris Wilson
2011-04-13 19:37   ` Daniel Vetter
2011-04-13 20:15     ` Chris Wilson
2011-04-13 20:58       ` Daniel Vetter
2011-04-13 21:37         ` Chris Wilson
2011-04-12 20:31 ` [PATCH 27/30] drm/i915: Invalidate fenced read domains upon flush Chris Wilson
2011-04-13 19:43   ` Daniel Vetter
2011-04-13 20:38     ` Chris Wilson
2011-04-13 21:02       ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 28/30] drm/i915: Pass the fence register number to be written Chris Wilson
2011-04-13 19:48   ` Daniel Vetter
2011-04-12 20:31 ` [PATCH 29/30] drm/i915: Track fence setup separately from fenced object lifetime Chris Wilson
2011-04-13 20:42   ` Daniel Vetter
2011-04-13 21:56     ` Chris Wilson
2011-04-12 20:31 ` [PATCH 30/30] drm/i915: Only print out the actual number of fences for i915_error_state Chris Wilson
2011-04-13  7:26 ` i915 next Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1302640318-23165-25-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Intel-GFX Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/intel-gfx/0 intel-gfx/git/0.git
	git clone --mirror https://lore.kernel.org/intel-gfx/1 intel-gfx/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 intel-gfx intel-gfx/ https://lore.kernel.org/intel-gfx \
		intel-gfx@lists.freedesktop.org
	public-inbox-index intel-gfx

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.freedesktop.lists.intel-gfx


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git