From: Daniel Vetter <daniel@ffwll.ch>
To: Chris Wilson <chris@chris-wilson.co.uk>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 22/29] drm/i915: Handle stolen objects in pwrite
Date: Mon, 20 Aug 2012 21:56:08 +0200 [thread overview]
Message-ID: <20120820195608.GH5170@phenom.ffwll.local> (raw)
In-Reply-To: <1344696088-24760-23-git-send-email-chris@chris-wilson.co.uk>
On Sat, Aug 11, 2012 at 03:41:21PM +0100, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
What about putting kmap/unmap abstractions into obj->ops (like the dma_buf
interface already has)? Since the pwrite/pread code is already rather
branch heave I hope we don't see the overhead of the indirect call even
in microbenchmarks (haven't checked). And this way we would also neatly
wrap up dma_bufs for pwrite (if anyone ever really wants that ...).
The kmap(_atomic) for stolen mem backed objects would boil down to doing
the pointer arithmetic, kunmap would be just a noop.
Cheers, Daniel
> ---
> drivers/gpu/drm/i915/i915_gem.c | 169 +++++++++++++++++++++++++--------------
> 1 file changed, 111 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 552f95b..a2fb2aa 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -664,19 +664,17 @@ out:
> * needs_clflush_before is set and flushes out any written cachelines after
> * writing if needs_clflush is set. */
> static int
> -shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
> +shmem_pwrite_fast(char *vaddr, int shmem_page_offset, int page_length,
> char __user *user_data,
> bool page_do_bit17_swizzling,
> bool needs_clflush_before,
> bool needs_clflush_after)
> {
> - char *vaddr;
> int ret;
>
> if (unlikely(page_do_bit17_swizzling))
> return -EINVAL;
>
> - vaddr = kmap_atomic(page);
> if (needs_clflush_before)
> drm_clflush_virt_range(vaddr + shmem_page_offset,
> page_length);
> @@ -686,7 +684,6 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
> if (needs_clflush_after)
> drm_clflush_virt_range(vaddr + shmem_page_offset,
> page_length);
> - kunmap_atomic(vaddr);
>
> return ret ? -EFAULT : 0;
> }
> @@ -694,16 +691,14 @@ shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
> /* Only difference to the fast-path function is that this can handle bit17
> * and uses non-atomic copy and kmap functions. */
> static int
> -shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
> +shmem_pwrite_slow(char *vaddr, int shmem_page_offset, int page_length,
> char __user *user_data,
> bool page_do_bit17_swizzling,
> bool needs_clflush_before,
> bool needs_clflush_after)
> {
> - char *vaddr;
> int ret;
>
> - vaddr = kmap(page);
> if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
> shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
> page_length,
> @@ -720,7 +715,6 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
> shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
> page_length,
> page_do_bit17_swizzling);
> - kunmap(page);
>
> return ret ? -EFAULT : 0;
> }
> @@ -731,6 +725,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> struct drm_i915_gem_pwrite *args,
> struct drm_file *file)
> {
> + struct drm_i915_private *dev_priv = dev->dev_private;
> ssize_t remain;
> loff_t offset;
> char __user *user_data;
> @@ -770,74 +765,132 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> if (ret)
> return ret;
>
> - i915_gem_object_pin_pages(obj);
> -
> offset = args->offset;
> obj->dirty = 1;
>
> - for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> - struct page *page;
> - int partial_cacheline_write;
> + if (obj->stolen) {
> + while (remain > 0) {
> + char *vaddr;
> + int partial_cacheline_write;
>
> - if (i < offset >> PAGE_SHIFT)
> - continue;
> + /* Operation in this page
> + *
> + * shmem_page_offset = offset within page in shmem file
> + * page_length = bytes to copy for this page
> + */
> + shmem_page_offset = offset_in_page(offset);
>
> - if (remain <= 0)
> - break;
> + page_length = remain;
> + if ((shmem_page_offset + page_length) > PAGE_SIZE)
> + page_length = PAGE_SIZE - shmem_page_offset;
>
> - /* Operation in this page
> - *
> - * shmem_page_offset = offset within page in shmem file
> - * page_length = bytes to copy for this page
> - */
> - shmem_page_offset = offset_in_page(offset);
> + /* If we don't overwrite a cacheline completely we need to be
> + * careful to have up-to-date data by first clflushing. Don't
> + * overcomplicate things and flush the entire patch. */
> + partial_cacheline_write = needs_clflush_before &&
> + ((shmem_page_offset | page_length)
> + & (boot_cpu_data.x86_clflush_size - 1));
>
> - page_length = remain;
> - if ((shmem_page_offset + page_length) > PAGE_SIZE)
> - page_length = PAGE_SIZE - shmem_page_offset;
> + vaddr = (char *)(dev_priv->mm.stolen_base + obj->stolen->start + offset);
> + page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> + ((uintptr_t)vaddr & (1 << 17)) != 0;
>
> - /* If we don't overwrite a cacheline completely we need to be
> - * careful to have up-to-date data by first clflushing. Don't
> - * overcomplicate things and flush the entire patch. */
> - partial_cacheline_write = needs_clflush_before &&
> - ((shmem_page_offset | page_length)
> - & (boot_cpu_data.x86_clflush_size - 1));
> + ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length,
> + user_data, page_do_bit17_swizzling,
> + partial_cacheline_write,
> + needs_clflush_after);
>
> - page = sg_page(sg);
> - page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> - (page_to_phys(page) & (1 << 17)) != 0;
> + if (ret == 0)
> + goto next_stolen;
>
> - ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
> - user_data, page_do_bit17_swizzling,
> - partial_cacheline_write,
> - needs_clflush_after);
> - if (ret == 0)
> - goto next_page;
> + hit_slowpath = 1;
> + mutex_unlock(&dev->struct_mutex);
>
> - hit_slowpath = 1;
> - mutex_unlock(&dev->struct_mutex);
> - ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
> - user_data, page_do_bit17_swizzling,
> - partial_cacheline_write,
> - needs_clflush_after);
> + ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length,
> + user_data, page_do_bit17_swizzling,
> + partial_cacheline_write,
> + needs_clflush_after);
>
> - mutex_lock(&dev->struct_mutex);
> + mutex_lock(&dev->struct_mutex);
> + if (ret)
> + goto out;
>
> -next_page:
> - set_page_dirty(page);
> - mark_page_accessed(page);
> +next_stolen:
> + remain -= page_length;
> + user_data += page_length;
> + offset += page_length;
> + }
> + } else {
> + i915_gem_object_pin_pages(obj);
>
> - if (ret)
> - goto out;
> + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> + struct page *page;
> + char *vaddr;
> + int partial_cacheline_write;
>
> - remain -= page_length;
> - user_data += page_length;
> - offset += page_length;
> + if (i < offset >> PAGE_SHIFT)
> + continue;
> +
> + if (remain <= 0)
> + break;
> +
> + /* Operation in this page
> + *
> + * shmem_page_offset = offset within page in shmem file
> + * page_length = bytes to copy for this page
> + */
> + shmem_page_offset = offset_in_page(offset);
> +
> + page_length = remain;
> + if ((shmem_page_offset + page_length) > PAGE_SIZE)
> + page_length = PAGE_SIZE - shmem_page_offset;
> +
> + /* If we don't overwrite a cacheline completely we need to be
> + * careful to have up-to-date data by first clflushing. Don't
> + * overcomplicate things and flush the entire patch. */
> + partial_cacheline_write = needs_clflush_before &&
> + ((shmem_page_offset | page_length)
> + & (boot_cpu_data.x86_clflush_size - 1));
> +
> + page = sg_page(sg);
> + page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> + (page_to_phys(page) & (1 << 17)) != 0;
> +
> + vaddr = kmap_atomic(page);
> + ret = shmem_pwrite_fast(vaddr, shmem_page_offset, page_length,
> + user_data, page_do_bit17_swizzling,
> + partial_cacheline_write,
> + needs_clflush_after);
> +
> + kunmap_atomic(vaddr);
> +
> + if (ret == 0)
> + goto next_page;
> +
> + hit_slowpath = 1;
> + mutex_unlock(&dev->struct_mutex);
> +
> + vaddr = kmap(page);
> + ret = shmem_pwrite_slow(vaddr, shmem_page_offset, page_length,
> + user_data, page_do_bit17_swizzling,
> + partial_cacheline_write,
> + needs_clflush_after);
> + kunmap(page);
> +
> + mutex_lock(&dev->struct_mutex);
> + if (ret)
> + goto out_unpin;
> +
> +next_page:
> + remain -= page_length;
> + user_data += page_length;
> + offset += page_length;
> + }
> +out_unpin:
> + i915_gem_object_unpin_pages(obj);
> }
>
> out:
> - i915_gem_object_unpin_pages(obj);
> -
> if (hit_slowpath) {
> /* Fixup: Kill any reinstated backing storage pages */
> if (obj->madv == __I915_MADV_PURGED)
> --
> 1.7.10.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
next prev parent reply other threads:[~2012-08-20 19:55 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-08-11 14:40 Stolen pages, with a little surprise Chris Wilson
2012-08-11 14:41 ` [PATCH 01/29] drm/i915: Track unbound pages Chris Wilson
2012-08-20 9:00 ` [PATCH 1/2] drm/i915: move functions around Daniel Vetter
2012-08-20 9:00 ` [PATCH 2/2] drm/i915: Track unbound pages Daniel Vetter
2012-08-20 9:23 ` [PATCH] Add some sanity checks to unbound tracking Chris Wilson
2012-08-20 9:36 ` [PATCH 2/2] drm/i915: Track unbound pages Chris Wilson
2012-08-20 9:42 ` Daniel Vetter
2012-08-11 14:41 ` [PATCH 02/29] drm/i915: Show (count, size) of purgeable objects in i915_gem_objects Chris Wilson
2012-08-20 9:04 ` Daniel Vetter
2012-08-20 9:17 ` Chris Wilson
2012-08-11 14:41 ` [PATCH 03/29] drm/i915: Show pin count in debugfs Chris Wilson
2012-08-11 14:41 ` [PATCH 04/29] drm/i915: Try harder to allocate an mmap_offset Chris Wilson
2012-08-20 9:37 ` Daniel Vetter
2012-08-20 11:31 ` Chris Wilson
2012-08-11 14:41 ` [PATCH 05/29] drm/i915: Only pwrite through the GTT if there is space in the aperture Chris Wilson
2012-08-11 14:41 ` [PATCH 06/29] drm/i915: Protect private gem objects from truncate (such as imported dmabuf) Chris Wilson
2012-08-11 14:41 ` [PATCH 07/29] drm/i915: Extract general object init routine Chris Wilson
2012-08-24 0:05 ` Daniel Vetter
2012-08-11 14:41 ` [PATCH 08/29] drm/i915: Introduce drm_i915_gem_object_ops Chris Wilson
2012-08-20 19:35 ` Daniel Vetter
2012-08-11 14:41 ` [PATCH 09/29] drm/i915: Pin backing pages whilst exporting through a dmabuf vmap Chris Wilson
2012-08-11 14:41 ` [PATCH 10/29] drm/i915: Pin backing pages for pwrite Chris Wilson
2012-08-11 14:41 ` [PATCH 11/29] drm/i915: Pin backing pages for pread Chris Wilson
2012-08-11 14:41 ` [PATCH 12/29] drm/i915: Replace the array of pages with a scatterlist Chris Wilson
2012-08-11 14:41 ` [PATCH 13/29] drm/i915: Convert the dmabuf object to use the new i915_gem_object_ops Chris Wilson
2012-08-11 14:41 ` [PATCH 14/29] drm: Introduce drm_mm_create_block() Chris Wilson
2012-08-11 14:41 ` [PATCH 15/29] drm/i915: Fix detection of stolen base for gen2 Chris Wilson
2012-08-11 14:41 ` [PATCH 16/29] drm/i915: Fix location of stolen memory register for SandyBridge+ Chris Wilson
2012-08-20 19:38 ` Daniel Vetter
2012-08-22 15:54 ` Chris Wilson
2012-08-11 14:41 ` [PATCH 17/29] drm/i915: Avoid clearing preallocated regions from the GTT Chris Wilson
2012-08-11 14:41 ` [PATCH 18/29] drm/i915: Delay allocation of stolen space for FBC Chris Wilson
2012-08-20 19:51 ` Daniel Vetter
2012-08-22 15:51 ` Chris Wilson
2012-08-11 14:41 ` [PATCH 19/29] drm/i915: Allow objects to be created with no backing pages, but stolen space Chris Wilson
2012-08-11 14:41 ` [PATCH 20/29] drm/i915: Differentiate between prime and stolen objects Chris Wilson
2012-08-11 14:41 ` [PATCH 21/29] drm/i915: Support readback of stolen objects upon error Chris Wilson
2012-08-11 14:41 ` [PATCH 22/29] drm/i915: Handle stolen objects in pwrite Chris Wilson
2012-08-20 19:56 ` Daniel Vetter [this message]
2012-08-22 15:47 ` Chris Wilson
2012-08-30 15:09 ` Chris Wilson
2012-08-11 14:41 ` [PATCH 23/29] drm/i915: Handle stolen objects for pread Chris Wilson
2012-08-11 14:41 ` [PATCH 24/29] drm/i915: Introduce i915_gem_object_create_stolen() Chris Wilson
2012-08-11 14:41 ` [PATCH 25/29] drm/i915: Allocate fbcon from stolen memory Chris Wilson
2012-08-11 14:41 ` [PATCH 26/29] drm/i915: Allocate ringbuffers " Chris Wilson
2012-08-11 14:41 ` [PATCH 27/29] drm/i915: Allocate overlay registers " Chris Wilson
2012-08-20 21:17 ` Daniel Vetter
2012-08-22 15:45 ` Chris Wilson
2012-08-22 16:26 ` Daniel Vetter
2012-08-11 14:41 ` [PATCH 28/29] drm/i915: Use a slab for object allocation Chris Wilson
2012-08-11 14:41 ` [PATCH 29/29] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Chris Wilson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120820195608.GH5170@phenom.ffwll.local \
--to=daniel@ffwll.ch \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).