All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite
@ 2018-07-23  9:02 Chris Wilson
  2018-07-23  9:11 ` Chris Wilson
                   ` (8 more replies)
  0 siblings, 9 replies; 15+ messages in thread
From: Chris Wilson @ 2018-07-23  9:02 UTC (permalink / raw)
  To: intel-gfx

Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
objects was flawed due to the simple fact that we do not always know the
swizzling for a particular page (due to the swizzling varying based on
location in certain unbalanced configurations). Furthermore, the
pread/pwrite paths are now unbalanced in that we are required to use the
GTT as in some cases we do not have direct CPU access to the backing
physical pages (thus some paths trying to account for the swizzle, but
others neglecting, chaos ensues).

There are no known users who do use pread/pwrite into a tiled object
(you need to manually detile anyway, so why now just use mmap and avoid
the copy?) and no user bug reports to indicate that it is being used in
the wild. As no one is hitting the buggy path, we can just remove the
buggy code.

References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 191 +++++---------------------------
 1 file changed, 30 insertions(+), 161 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8b52cb768a67..73a69352e0d4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 	obj->write_domain = 0;
 }
 
-static inline int
-__copy_to_user_swizzled(char __user *cpu_vaddr,
-			const char *gpu_vaddr, int gpu_offset,
-			int length)
-{
-	int ret, cpu_offset = 0;
-
-	while (length > 0) {
-		int cacheline_end = ALIGN(gpu_offset + 1, 64);
-		int this_length = min(cacheline_end - gpu_offset, length);
-		int swizzled_gpu_offset = gpu_offset ^ 64;
-
-		ret = __copy_to_user(cpu_vaddr + cpu_offset,
-				     gpu_vaddr + swizzled_gpu_offset,
-				     this_length);
-		if (ret)
-			return ret + length;
-
-		cpu_offset += this_length;
-		gpu_offset += this_length;
-		length -= this_length;
-	}
-
-	return 0;
-}
-
-static inline int
-__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
-			  const char __user *cpu_vaddr,
-			  int length)
-{
-	int ret, cpu_offset = 0;
-
-	while (length > 0) {
-		int cacheline_end = ALIGN(gpu_offset + 1, 64);
-		int this_length = min(cacheline_end - gpu_offset, length);
-		int swizzled_gpu_offset = gpu_offset ^ 64;
-
-		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
-				       cpu_vaddr + cpu_offset,
-				       this_length);
-		if (ret)
-			return ret + length;
-
-		cpu_offset += this_length;
-		gpu_offset += this_length;
-		length -= this_length;
-	}
-
-	return 0;
-}
-
 /*
  * Pins the specified object's pages and synchronizes the object with
  * GPU accesses. Sets needs_clflush to non-zero if the caller should
@@ -1030,72 +978,29 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 	return ret;
 }
 
-static void
-shmem_clflush_swizzled_range(char *addr, unsigned long length,
-			     bool swizzled)
-{
-	if (unlikely(swizzled)) {
-		unsigned long start = (unsigned long) addr;
-		unsigned long end = (unsigned long) addr + length;
-
-		/* For swizzling simply ensure that we always flush both
-		 * channels. Lame, but simple and it works. Swizzled
-		 * pwrite/pread is far from a hotpath - current userspace
-		 * doesn't use it at all. */
-		start = round_down(start, 128);
-		end = round_up(end, 128);
-
-		drm_clflush_virt_range((void *)start, end - start);
-	} else {
-		drm_clflush_virt_range(addr, length);
-	}
-
-}
-
-/* Only difference to the fast-path function is that this can handle bit17
- * and uses non-atomic copy and kmap functions. */
 static int
-shmem_pread_slow(struct page *page, int offset, int length,
-		 char __user *user_data,
-		 bool page_do_bit17_swizzling, bool needs_clflush)
+shmem_pread(struct page *page, int offset, int len, char __user *user_data,
+	    bool needs_clflush)
 {
 	char *vaddr;
 	int ret;
 
-	vaddr = kmap(page);
-	if (needs_clflush)
-		shmem_clflush_swizzled_range(vaddr + offset, length,
-					     page_do_bit17_swizzling);
-
-	if (page_do_bit17_swizzling)
-		ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
-	else
-		ret = __copy_to_user(user_data, vaddr + offset, length);
-	kunmap(page);
+	vaddr = kmap_atomic(page);
 
-	return ret ? - EFAULT : 0;
-}
+	if (needs_clflush)
+		drm_clflush_virt_range(vaddr + offset, len);
 
-static int
-shmem_pread(struct page *page, int offset, int length, char __user *user_data,
-	    bool page_do_bit17_swizzling, bool needs_clflush)
-{
-	int ret;
+	ret = __copy_to_user_inatomic(user_data, vaddr + offset, len);
 
-	ret = -ENODEV;
-	if (!page_do_bit17_swizzling) {
-		char *vaddr = kmap_atomic(page);
+	kunmap_atomic(vaddr);
 
-		if (needs_clflush)
-			drm_clflush_virt_range(vaddr + offset, length);
-		ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
-		kunmap_atomic(vaddr);
+	if (ret) {
+		vaddr = kmap(page);
+		ret = __copy_to_user(user_data, vaddr + offset, len);
+		kunmap(page);
 	}
-	if (ret == 0)
-		return 0;
 
-	return shmem_pread_slow(page, offset, length, user_data,
-				page_do_bit17_swizzling, needs_clflush);
+	return ret ? - EFAULT : 0;
 }
 
 static int
@@ -1104,15 +1009,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 {
 	char __user *user_data;
 	u64 remain;
-	unsigned int obj_do_bit17_swizzling;
 	unsigned int needs_clflush;
 	unsigned int idx, offset;
 	int ret;
 
-	obj_do_bit17_swizzling = 0;
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		obj_do_bit17_swizzling = BIT(17);
-
 	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
 	if (ret)
 		return ret;
@@ -1134,7 +1034,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 			length = PAGE_SIZE - offset;
 
 		ret = shmem_pread(page, offset, length, user_data,
-				  page_to_phys(page) & obj_do_bit17_swizzling,
 				  needs_clflush);
 		if (ret)
 			break;
@@ -1475,33 +1374,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 	return ret;
 }
 
-static int
-shmem_pwrite_slow(struct page *page, int offset, int length,
-		  char __user *user_data,
-		  bool page_do_bit17_swizzling,
-		  bool needs_clflush_before,
-		  bool needs_clflush_after)
-{
-	char *vaddr;
-	int ret;
-
-	vaddr = kmap(page);
-	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
-		shmem_clflush_swizzled_range(vaddr + offset, length,
-					     page_do_bit17_swizzling);
-	if (page_do_bit17_swizzling)
-		ret = __copy_from_user_swizzled(vaddr, offset, user_data,
-						length);
-	else
-		ret = __copy_from_user(vaddr + offset, user_data, length);
-	if (needs_clflush_after)
-		shmem_clflush_swizzled_range(vaddr + offset, length,
-					     page_do_bit17_swizzling);
-	kunmap(page);
-
-	return ret ? -EFAULT : 0;
-}
-
 /* Per-page copy function for the shmem pwrite fastpath.
  * Flushes invalid cachelines before writing to the target if
  * needs_clflush_before is set and flushes out any written cachelines after
@@ -1509,31 +1381,34 @@ shmem_pwrite_slow(struct page *page, int offset, int length,
  */
 static int
 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
-	     bool page_do_bit17_swizzling,
 	     bool needs_clflush_before,
 	     bool needs_clflush_after)
 {
+	char *vaddr;
 	int ret;
 
-	ret = -ENODEV;
-	if (!page_do_bit17_swizzling) {
-		char *vaddr = kmap_atomic(page);
+	vaddr = kmap_atomic(page);
 
-		if (needs_clflush_before)
-			drm_clflush_virt_range(vaddr + offset, len);
-		ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
-		if (needs_clflush_after)
+	if (needs_clflush_before)
+		drm_clflush_virt_range(vaddr + offset, len);
+
+	ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
+	if (!ret && needs_clflush_after)
+		drm_clflush_virt_range(vaddr + offset, len);
+
+	kunmap_atomic(vaddr);
+
+	if (ret) {
+		vaddr = kmap(page);
+
+		ret = __copy_from_user(vaddr + offset, user_data, len);
+		if (!ret && needs_clflush_after)
 			drm_clflush_virt_range(vaddr + offset, len);
 
-		kunmap_atomic(vaddr);
+		kunmap(page);
 	}
-	if (ret == 0)
-		return ret;
 
-	return shmem_pwrite_slow(page, offset, len, user_data,
-				 page_do_bit17_swizzling,
-				 needs_clflush_before,
-				 needs_clflush_after);
+	return ret ? -EFAULT : 0;
 }
 
 static int
@@ -1543,7 +1418,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	void __user *user_data;
 	u64 remain;
-	unsigned int obj_do_bit17_swizzling;
 	unsigned int partial_cacheline_write;
 	unsigned int needs_clflush;
 	unsigned int offset, idx;
@@ -1558,10 +1432,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	obj_do_bit17_swizzling = 0;
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		obj_do_bit17_swizzling = BIT(17);
-
 	/* If we don't overwrite a cacheline completely we need to be
 	 * careful to have up-to-date data by first clflushing. Don't
 	 * overcomplicate things and flush the entire patch.
@@ -1582,7 +1452,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 			length = PAGE_SIZE - offset;
 
 		ret = shmem_pwrite(page, offset, length, user_data,
-				   page_to_phys(page) & obj_do_bit17_swizzling,
 				   (offset | length) & partial_cacheline_write,
 				   needs_clflush & CLFLUSH_AFTER);
 		if (ret)
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite
  2018-07-23  9:02 [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite Chris Wilson
@ 2018-07-23  9:11 ` Chris Wilson
  2018-07-23  9:49 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2018-07-23  9:11 UTC (permalink / raw)
  To: intel-gfx

Quoting Chris Wilson (2018-07-23 10:02:08)
> Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
> objects was flawed due to the simple fact that we do not always know the
> swizzling for a particular page (due to the swizzling varying based on
> location in certain unbalanced configurations). Furthermore, the
> pread/pwrite paths are now unbalanced in that we are required to use the
> GTT as in some cases we do not have direct CPU access to the backing
> physical pages (thus some paths trying to account for the swizzle, but
> others neglecting, chaos ensues).
> 
> There are no known users who do use pread/pwrite into a tiled object
> (you need to manually detile anyway, so why now just use mmap and avoid
> the copy?) and no user bug reports to indicate that it is being used in
> the wild. As no one is hitting the buggy path, we can just remove the
> buggy code.
> 
> References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

gem_tiled_*pread* shows the problem on gdg, where we do have bit17
swizzling but take different paths through pread/pwrite leaving the
result incorrectly swizzled ~50% of the time.

The easiest way I see to force balance on the universal is to say if the
user wants to be stupid (mix pwrite/pread and tiling & swizzling), they
must pick up _all_ the pieces (rather than just take responsibility for
half the job). No one uses this interface, so punting the entire task to
userspace breaks nobody -- with the exception of teaching the igt tests
that what they ask cannot be given if !known_swizzling.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Remove partial attempt to swizzle on pread/pwrite
  2018-07-23  9:02 [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite Chris Wilson
  2018-07-23  9:11 ` Chris Wilson
@ 2018-07-23  9:49 ` Patchwork
  2018-07-23  9:50 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2018-07-23  9:49 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Remove partial attempt to swizzle on pread/pwrite
URL   : https://patchwork.freedesktop.org/series/47043/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
f91f607dc2fa drm/i915: Remove partial attempt to swizzle on pread/pwrite
-:21: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#21: 
References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")

-:21: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")'
#21: 
References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")

-:169: ERROR:SPACING: space prohibited after that '-' (ctx:WxW)
#169: FILE: drivers/gpu/drm/i915/i915_gem.c:1003:
+	return ret ? - EFAULT : 0;
 	             ^

total: 2 errors, 1 warnings, 0 checks, 270 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915: Remove partial attempt to swizzle on pread/pwrite
  2018-07-23  9:02 [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite Chris Wilson
  2018-07-23  9:11 ` Chris Wilson
  2018-07-23  9:49 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
@ 2018-07-23  9:50 ` Patchwork
  2018-07-23 10:12 ` ✗ Fi.CI.BAT: failure " Patchwork
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2018-07-23  9:50 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Remove partial attempt to swizzle on pread/pwrite
URL   : https://patchwork.freedesktop.org/series/47043/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Commit: drm/i915: Remove partial attempt to swizzle on pread/pwrite
-O:drivers/gpu/drm/i915/i915_gem.c:871:35: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:871:35: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:897:35: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:897:35: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.BAT: failure for drm/i915: Remove partial attempt to swizzle on pread/pwrite
  2018-07-23  9:02 [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite Chris Wilson
                   ` (2 preceding siblings ...)
  2018-07-23  9:50 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-07-23 10:12 ` Patchwork
  2018-10-11 13:38 ` [PATCH] " Tvrtko Ursulin
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2018-07-23 10:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Remove partial attempt to swizzle on pread/pwrite
URL   : https://patchwork.freedesktop.org/series/47043/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4527 -> Patchwork_9745 =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_9745 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_9745, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/47043/revisions/1/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_9745:

  === IGT changes ===

    ==== Possible regressions ====

    igt@gem_tiled_fence_blits@basic:
      fi-gdg-551:         PASS -> FAIL

    
== Known issues ==

  Here are the changes found in Patchwork_9745 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
      fi-glk-j4005:       PASS -> FAIL (fdo#106765)

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-b:
      fi-snb-2520m:       PASS -> INCOMPLETE (fdo#103713)

    
    ==== Possible fixes ====

    igt@kms_flip@basic-plain-flip:
      fi-glk-j4005:       DMESG-WARN (fdo#106097) -> PASS

    igt@kms_frontbuffer_tracking@basic:
      fi-hsw-peppy:       DMESG-FAIL (fdo#106103, fdo#102614) -> PASS

    igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
      fi-glk-j4005:       DMESG-WARN (fdo#106000) -> PASS

    
  fdo#102614 https://bugs.freedesktop.org/show_bug.cgi?id=102614
  fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
  fdo#106000 https://bugs.freedesktop.org/show_bug.cgi?id=106000
  fdo#106097 https://bugs.freedesktop.org/show_bug.cgi?id=106097
  fdo#106103 https://bugs.freedesktop.org/show_bug.cgi?id=106103
  fdo#106765 https://bugs.freedesktop.org/show_bug.cgi?id=106765


== Participating hosts (47 -> 42) ==

  Missing    (5): fi-ctg-p8600 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u 


== Build changes ==

    * Linux: CI_DRM_4527 -> Patchwork_9745

  CI_DRM_4527: 4557d4e14760a3dcb43a6be65432ee6bafacdb9e @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4571: 65fccc149b85968cdce4737266b056059c1510f3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_9745: f91f607dc2fab443a5188b349730b91a24339139 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

f91f607dc2fa drm/i915: Remove partial attempt to swizzle on pread/pwrite

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_9745/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite
  2018-07-23  9:02 [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite Chris Wilson
                   ` (3 preceding siblings ...)
  2018-07-23 10:12 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2018-10-11 13:38 ` Tvrtko Ursulin
  2018-10-11 14:30   ` Chris Wilson
  2018-10-12 11:56 ` [PATCH v3] " Chris Wilson
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 15+ messages in thread
From: Tvrtko Ursulin @ 2018-10-11 13:38 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 23/07/2018 10:02, Chris Wilson wrote:
> Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
> objects was flawed due to the simple fact that we do not always know the
> swizzling for a particular page (due to the swizzling varying based on
> location in certain unbalanced configurations). Furthermore, the
> pread/pwrite paths are now unbalanced in that we are required to use the
> GTT as in some cases we do not have direct CPU access to the backing
> physical pages (thus some paths trying to account for the swizzle, but
> others neglecting, chaos ensues).
> 
> There are no known users who do use pread/pwrite into a tiled object
> (you need to manually detile anyway, so why now just use mmap and avoid
> the copy?) and no user bug reports to indicate that it is being used in
> the wild. As no one is hitting the buggy path, we can just remove the
> buggy code.
> 
> References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 191 +++++---------------------------
>   1 file changed, 30 insertions(+), 161 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8b52cb768a67..73a69352e0d4 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
>   	obj->write_domain = 0;
>   }
>   
> -static inline int
> -__copy_to_user_swizzled(char __user *cpu_vaddr,
> -			const char *gpu_vaddr, int gpu_offset,
> -			int length)
> -{
> -	int ret, cpu_offset = 0;
> -
> -	while (length > 0) {
> -		int cacheline_end = ALIGN(gpu_offset + 1, 64);
> -		int this_length = min(cacheline_end - gpu_offset, length);
> -		int swizzled_gpu_offset = gpu_offset ^ 64;
> -
> -		ret = __copy_to_user(cpu_vaddr + cpu_offset,
> -				     gpu_vaddr + swizzled_gpu_offset,
> -				     this_length);
> -		if (ret)
> -			return ret + length;
> -
> -		cpu_offset += this_length;
> -		gpu_offset += this_length;
> -		length -= this_length;
> -	}
> -
> -	return 0;
> -}
> -
> -static inline int
> -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
> -			  const char __user *cpu_vaddr,
> -			  int length)
> -{
> -	int ret, cpu_offset = 0;
> -
> -	while (length > 0) {
> -		int cacheline_end = ALIGN(gpu_offset + 1, 64);
> -		int this_length = min(cacheline_end - gpu_offset, length);
> -		int swizzled_gpu_offset = gpu_offset ^ 64;
> -
> -		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
> -				       cpu_vaddr + cpu_offset,
> -				       this_length);
> -		if (ret)
> -			return ret + length;
> -
> -		cpu_offset += this_length;
> -		gpu_offset += this_length;
> -		length -= this_length;
> -	}
> -
> -	return 0;
> -}
> -
>   /*
>    * Pins the specified object's pages and synchronizes the object with
>    * GPU accesses. Sets needs_clflush to non-zero if the caller should
> @@ -1030,72 +978,29 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>   	return ret;
>   }
>   
> -static void
> -shmem_clflush_swizzled_range(char *addr, unsigned long length,
> -			     bool swizzled)
> -{
> -	if (unlikely(swizzled)) {
> -		unsigned long start = (unsigned long) addr;
> -		unsigned long end = (unsigned long) addr + length;
> -
> -		/* For swizzling simply ensure that we always flush both
> -		 * channels. Lame, but simple and it works. Swizzled
> -		 * pwrite/pread is far from a hotpath - current userspace
> -		 * doesn't use it at all. */
> -		start = round_down(start, 128);
> -		end = round_up(end, 128);
> -
> -		drm_clflush_virt_range((void *)start, end - start);
> -	} else {
> -		drm_clflush_virt_range(addr, length);
> -	}
> -
> -}
> -
> -/* Only difference to the fast-path function is that this can handle bit17
> - * and uses non-atomic copy and kmap functions. */
>   static int
> -shmem_pread_slow(struct page *page, int offset, int length,
> -		 char __user *user_data,
> -		 bool page_do_bit17_swizzling, bool needs_clflush)
> +shmem_pread(struct page *page, int offset, int len, char __user *user_data,
> +	    bool needs_clflush)
>   {
>   	char *vaddr;
>   	int ret;
>   
> -	vaddr = kmap(page);
> -	if (needs_clflush)
> -		shmem_clflush_swizzled_range(vaddr + offset, length,
> -					     page_do_bit17_swizzling);
> -
> -	if (page_do_bit17_swizzling)
> -		ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
> -	else
> -		ret = __copy_to_user(user_data, vaddr + offset, length);
> -	kunmap(page);
> +	vaddr = kmap_atomic(page);
>   
> -	return ret ? - EFAULT : 0;
> -}
> +	if (needs_clflush)
> +		drm_clflush_virt_range(vaddr + offset, len);
>   
> -static int
> -shmem_pread(struct page *page, int offset, int length, char __user *user_data,
> -	    bool page_do_bit17_swizzling, bool needs_clflush)
> -{
> -	int ret;
> +	ret = __copy_to_user_inatomic(user_data, vaddr + offset, len);

Kerneldoc says userspace memory must be pinned so potential page faults 
are avoided. I don't see that our code does that.

>   
> -	ret = -ENODEV;
> -	if (!page_do_bit17_swizzling) {
> -		char *vaddr = kmap_atomic(page);
> +	kunmap_atomic(vaddr);
>   
> -		if (needs_clflush)
> -			drm_clflush_virt_range(vaddr + offset, length);
> -		ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
> -		kunmap_atomic(vaddr);
> +	if (ret) {
> +		vaddr = kmap(page);
> +		ret = __copy_to_user(user_data, vaddr + offset, len);
> +		kunmap(page);
>   	}
> -	if (ret == 0)
> -		return 0;
>   
> -	return shmem_pread_slow(page, offset, length, user_data,
> -				page_do_bit17_swizzling, needs_clflush);
> +	return ret ? - EFAULT : 0;

Why the space between - and EFAULT? Or why not just return ret?

>   }
>   
>   static int
> @@ -1104,15 +1009,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
>   {
>   	char __user *user_data;
>   	u64 remain;
> -	unsigned int obj_do_bit17_swizzling;
>   	unsigned int needs_clflush;
>   	unsigned int idx, offset;
>   	int ret;
>   
> -	obj_do_bit17_swizzling = 0;
> -	if (i915_gem_object_needs_bit17_swizzle(obj))
> -		obj_do_bit17_swizzling = BIT(17);
> -
>   	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
>   	if (ret)
>   		return ret;
> @@ -1134,7 +1034,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
>   			length = PAGE_SIZE - offset;

It's pre-existing, but there seems to be a potential overflow of int 
length = u64 remain a few lines above this.

>   
>   		ret = shmem_pread(page, offset, length, user_data,
> -				  page_to_phys(page) & obj_do_bit17_swizzling,
>   				  needs_clflush);
>   		if (ret)
>   			break;
> @@ -1475,33 +1374,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>   	return ret;
>   }
>   
> -static int
> -shmem_pwrite_slow(struct page *page, int offset, int length,
> -		  char __user *user_data,
> -		  bool page_do_bit17_swizzling,
> -		  bool needs_clflush_before,
> -		  bool needs_clflush_after)
> -{
> -	char *vaddr;
> -	int ret;
> -
> -	vaddr = kmap(page);
> -	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
> -		shmem_clflush_swizzled_range(vaddr + offset, length,
> -					     page_do_bit17_swizzling);
> -	if (page_do_bit17_swizzling)
> -		ret = __copy_from_user_swizzled(vaddr, offset, user_data,
> -						length);
> -	else
> -		ret = __copy_from_user(vaddr + offset, user_data, length);
> -	if (needs_clflush_after)
> -		shmem_clflush_swizzled_range(vaddr + offset, length,
> -					     page_do_bit17_swizzling);
> -	kunmap(page);
> -
> -	return ret ? -EFAULT : 0;
> -}
> -
>   /* Per-page copy function for the shmem pwrite fastpath.
>    * Flushes invalid cachelines before writing to the target if
>    * needs_clflush_before is set and flushes out any written cachelines after
> @@ -1509,31 +1381,34 @@ shmem_pwrite_slow(struct page *page, int offset, int length,
>    */
>   static int
>   shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
> -	     bool page_do_bit17_swizzling,
>   	     bool needs_clflush_before,
>   	     bool needs_clflush_after)
>   {
> +	char *vaddr;
>   	int ret;
>   
> -	ret = -ENODEV;
> -	if (!page_do_bit17_swizzling) {
> -		char *vaddr = kmap_atomic(page);
> +	vaddr = kmap_atomic(page);
>   
> -		if (needs_clflush_before)
> -			drm_clflush_virt_range(vaddr + offset, len);
> -		ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
> -		if (needs_clflush_after)
> +	if (needs_clflush_before)
> +		drm_clflush_virt_range(vaddr + offset, len);
> +
> +	ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
> +	if (!ret && needs_clflush_after)
> +		drm_clflush_virt_range(vaddr + offset, len);
> +
> +	kunmap_atomic(vaddr);
> +
> +	if (ret) {
> +		vaddr = kmap(page);
> +
> +		ret = __copy_from_user(vaddr + offset, user_data, len);
> +		if (!ret && needs_clflush_after)
>   			drm_clflush_virt_range(vaddr + offset, len);
>   
> -		kunmap_atomic(vaddr);
> +		kunmap(page);
>   	}
> -	if (ret == 0)
> -		return ret;
>   
> -	return shmem_pwrite_slow(page, offset, len, user_data,
> -				 page_do_bit17_swizzling,
> -				 needs_clflush_before,
> -				 needs_clflush_after);
> +	return ret ? -EFAULT : 0;
>   }
>   
>   static int
> @@ -1543,7 +1418,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
>   	struct drm_i915_private *i915 = to_i915(obj->base.dev);
>   	void __user *user_data;
>   	u64 remain;
> -	unsigned int obj_do_bit17_swizzling;
>   	unsigned int partial_cacheline_write;
>   	unsigned int needs_clflush;
>   	unsigned int offset, idx;
> @@ -1558,10 +1432,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		return ret;
>   
> -	obj_do_bit17_swizzling = 0;
> -	if (i915_gem_object_needs_bit17_swizzle(obj))
> -		obj_do_bit17_swizzling = BIT(17);
> -
>   	/* If we don't overwrite a cacheline completely we need to be
>   	 * careful to have up-to-date data by first clflushing. Don't
>   	 * overcomplicate things and flush the entire patch.
> @@ -1582,7 +1452,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
>   			length = PAGE_SIZE - offset;
>   
>   		ret = shmem_pwrite(page, offset, length, user_data,
> -				   page_to_phys(page) & obj_do_bit17_swizzling,
>   				   (offset | length) & partial_cacheline_write,
>   				   needs_clflush & CLFLUSH_AFTER);
>   		if (ret)
> 

Same comments basically as with the pread path.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite
  2018-10-11 13:38 ` [PATCH] " Tvrtko Ursulin
@ 2018-10-11 14:30   ` Chris Wilson
  2018-10-11 14:44     ` Tvrtko Ursulin
  0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2018-10-11 14:30 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx

Quoting Tvrtko Ursulin (2018-10-11 14:38:41)
> 
> On 23/07/2018 10:02, Chris Wilson wrote:
> > Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
> > objects was flawed due to the simple fact that we do not always know the
> > swizzling for a particular page (due to the swizzling varying based on
> > location in certain unbalanced configurations). Furthermore, the
> > pread/pwrite paths are now unbalanced in that we are required to use the
> > GTT as in some cases we do not have direct CPU access to the backing
> > physical pages (thus some paths trying to account for the swizzle, but
> > others neglecting, chaos ensues).
> > 
> > There are no known users who do use pread/pwrite into a tiled object
> > (you need to manually detile anyway, so why now just use mmap and avoid
> > the copy?) and no user bug reports to indicate that it is being used in
> > the wild. As no one is hitting the buggy path, we can just remove the
> > buggy code.
> > 
> > References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_gem.c | 191 +++++---------------------------
> >   1 file changed, 30 insertions(+), 161 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 8b52cb768a67..73a69352e0d4 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
> >       obj->write_domain = 0;
> >   }
> >   
> > -static inline int
> > -__copy_to_user_swizzled(char __user *cpu_vaddr,
> > -                     const char *gpu_vaddr, int gpu_offset,
> > -                     int length)
> > -{
> > -     int ret, cpu_offset = 0;
> > -
> > -     while (length > 0) {
> > -             int cacheline_end = ALIGN(gpu_offset + 1, 64);
> > -             int this_length = min(cacheline_end - gpu_offset, length);
> > -             int swizzled_gpu_offset = gpu_offset ^ 64;
> > -
> > -             ret = __copy_to_user(cpu_vaddr + cpu_offset,
> > -                                  gpu_vaddr + swizzled_gpu_offset,
> > -                                  this_length);
> > -             if (ret)
> > -                     return ret + length;
> > -
> > -             cpu_offset += this_length;
> > -             gpu_offset += this_length;
> > -             length -= this_length;
> > -     }
> > -
> > -     return 0;
> > -}
> > -
> > -static inline int
> > -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
> > -                       const char __user *cpu_vaddr,
> > -                       int length)
> > -{
> > -     int ret, cpu_offset = 0;
> > -
> > -     while (length > 0) {
> > -             int cacheline_end = ALIGN(gpu_offset + 1, 64);
> > -             int this_length = min(cacheline_end - gpu_offset, length);
> > -             int swizzled_gpu_offset = gpu_offset ^ 64;
> > -
> > -             ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
> > -                                    cpu_vaddr + cpu_offset,
> > -                                    this_length);
> > -             if (ret)
> > -                     return ret + length;
> > -
> > -             cpu_offset += this_length;
> > -             gpu_offset += this_length;
> > -             length -= this_length;
> > -     }
> > -
> > -     return 0;
> > -}
> > -
> >   /*
> >    * Pins the specified object's pages and synchronizes the object with
> >    * GPU accesses. Sets needs_clflush to non-zero if the caller should
> > @@ -1030,72 +978,29 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
> >       return ret;
> >   }
> >   
> > -static void
> > -shmem_clflush_swizzled_range(char *addr, unsigned long length,
> > -                          bool swizzled)
> > -{
> > -     if (unlikely(swizzled)) {
> > -             unsigned long start = (unsigned long) addr;
> > -             unsigned long end = (unsigned long) addr + length;
> > -
> > -             /* For swizzling simply ensure that we always flush both
> > -              * channels. Lame, but simple and it works. Swizzled
> > -              * pwrite/pread is far from a hotpath - current userspace
> > -              * doesn't use it at all. */
> > -             start = round_down(start, 128);
> > -             end = round_up(end, 128);
> > -
> > -             drm_clflush_virt_range((void *)start, end - start);
> > -     } else {
> > -             drm_clflush_virt_range(addr, length);
> > -     }
> > -
> > -}
> > -
> > -/* Only difference to the fast-path function is that this can handle bit17
> > - * and uses non-atomic copy and kmap functions. */
> >   static int
> > -shmem_pread_slow(struct page *page, int offset, int length,
> > -              char __user *user_data,
> > -              bool page_do_bit17_swizzling, bool needs_clflush)
> > +shmem_pread(struct page *page, int offset, int len, char __user *user_data,
> > +         bool needs_clflush)
> >   {
> >       char *vaddr;
> >       int ret;
> >   
> > -     vaddr = kmap(page);
> > -     if (needs_clflush)
> > -             shmem_clflush_swizzled_range(vaddr + offset, length,
> > -                                          page_do_bit17_swizzling);
> > -
> > -     if (page_do_bit17_swizzling)
> > -             ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
> > -     else
> > -             ret = __copy_to_user(user_data, vaddr + offset, length);
> > -     kunmap(page);
> > +     vaddr = kmap_atomic(page);
> >   
> > -     return ret ? - EFAULT : 0;
> > -}
> > +     if (needs_clflush)
> > +             drm_clflush_virt_range(vaddr + offset, len);
> >   
> > -static int
> > -shmem_pread(struct page *page, int offset, int length, char __user *user_data,
> > -         bool page_do_bit17_swizzling, bool needs_clflush)
> > -{
> > -     int ret;
> > +     ret = __copy_to_user_inatomic(user_data, vaddr + offset, len);
> 
> Kerneldoc says userspace memory must be pinned so potential page faults 
> are avoided. I don't see that our code does that.

kmap_atomic once upon a time gave assurances that _inatomic was safe.
It's not important, kmap() is good enough (once upon a time there was a
major difference for kmap_atomic as you had to use preallocated slots,
the good old days ;)
 
> > -     ret = -ENODEV;
> > -     if (!page_do_bit17_swizzling) {
> > -             char *vaddr = kmap_atomic(page);
> > +     kunmap_atomic(vaddr);
> >   
> > -             if (needs_clflush)
> > -                     drm_clflush_virt_range(vaddr + offset, length);
> > -             ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
> > -             kunmap_atomic(vaddr);
> > +     if (ret) {
> > +             vaddr = kmap(page);
> > +             ret = __copy_to_user(user_data, vaddr + offset, len);
> > +             kunmap(page);
> >       }
> > -     if (ret == 0)
> > -             return 0;
> >   
> > -     return shmem_pread_slow(page, offset, length, user_data,
> > -                             page_do_bit17_swizzling, needs_clflush);
> > +     return ret ? - EFAULT : 0;
> 
> Why the space between - and EFAULT? Or why not just return ret?

Fat fingers.

> >   }
> >   
> >   static int
> > @@ -1104,15 +1009,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
> >   {
> >       char __user *user_data;
> >       u64 remain;
> > -     unsigned int obj_do_bit17_swizzling;
> >       unsigned int needs_clflush;
> >       unsigned int idx, offset;
> >       int ret;
> >   
> > -     obj_do_bit17_swizzling = 0;
> > -     if (i915_gem_object_needs_bit17_swizzle(obj))
> > -             obj_do_bit17_swizzling = BIT(17);
> > -
> >       ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
> >       if (ret)
> >               return ret;
> > @@ -1134,7 +1034,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
> >                       length = PAGE_SIZE - offset;
> 
> It's pre-existing, but there seems to be a potential overflow of int 
> length = u64 remain a few lines above this.

Hmm. If so, that's one that escaped my perusal. Time for sanity checks
probably.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite
  2018-10-11 14:30   ` Chris Wilson
@ 2018-10-11 14:44     ` Tvrtko Ursulin
  0 siblings, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2018-10-11 14:44 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 11/10/2018 15:30, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-10-11 14:38:41)
>>
>> On 23/07/2018 10:02, Chris Wilson wrote:
>>> Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
>>> objects was flawed due to the simple fact that we do not always know the
>>> swizzling for a particular page (due to the swizzling varying based on
>>> location in certain unbalanced configurations). Furthermore, the
>>> pread/pwrite paths are now unbalanced in that we are required to use the
>>> GTT as in some cases we do not have direct CPU access to the backing
>>> physical pages (thus some paths trying to account for the swizzle, but
>>> others neglecting, chaos ensues).
>>>
>>> There are no known users who do use pread/pwrite into a tiled object
>>> (you need to manually detile anyway, so why now just use mmap and avoid
>>> the copy?) and no user bug reports to indicate that it is being used in
>>> the wild. As no one is hitting the buggy path, we can just remove the
>>> buggy code.
>>>
>>> References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_gem.c | 191 +++++---------------------------
>>>    1 file changed, 30 insertions(+), 161 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index 8b52cb768a67..73a69352e0d4 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
>>>        obj->write_domain = 0;
>>>    }
>>>    
>>> -static inline int
>>> -__copy_to_user_swizzled(char __user *cpu_vaddr,
>>> -                     const char *gpu_vaddr, int gpu_offset,
>>> -                     int length)
>>> -{
>>> -     int ret, cpu_offset = 0;
>>> -
>>> -     while (length > 0) {
>>> -             int cacheline_end = ALIGN(gpu_offset + 1, 64);
>>> -             int this_length = min(cacheline_end - gpu_offset, length);
>>> -             int swizzled_gpu_offset = gpu_offset ^ 64;
>>> -
>>> -             ret = __copy_to_user(cpu_vaddr + cpu_offset,
>>> -                                  gpu_vaddr + swizzled_gpu_offset,
>>> -                                  this_length);
>>> -             if (ret)
>>> -                     return ret + length;
>>> -
>>> -             cpu_offset += this_length;
>>> -             gpu_offset += this_length;
>>> -             length -= this_length;
>>> -     }
>>> -
>>> -     return 0;
>>> -}
>>> -
>>> -static inline int
>>> -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
>>> -                       const char __user *cpu_vaddr,
>>> -                       int length)
>>> -{
>>> -     int ret, cpu_offset = 0;
>>> -
>>> -     while (length > 0) {
>>> -             int cacheline_end = ALIGN(gpu_offset + 1, 64);
>>> -             int this_length = min(cacheline_end - gpu_offset, length);
>>> -             int swizzled_gpu_offset = gpu_offset ^ 64;
>>> -
>>> -             ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
>>> -                                    cpu_vaddr + cpu_offset,
>>> -                                    this_length);
>>> -             if (ret)
>>> -                     return ret + length;
>>> -
>>> -             cpu_offset += this_length;
>>> -             gpu_offset += this_length;
>>> -             length -= this_length;
>>> -     }
>>> -
>>> -     return 0;
>>> -}
>>> -
>>>    /*
>>>     * Pins the specified object's pages and synchronizes the object with
>>>     * GPU accesses. Sets needs_clflush to non-zero if the caller should
>>> @@ -1030,72 +978,29 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>>>        return ret;
>>>    }
>>>    
>>> -static void
>>> -shmem_clflush_swizzled_range(char *addr, unsigned long length,
>>> -                          bool swizzled)
>>> -{
>>> -     if (unlikely(swizzled)) {
>>> -             unsigned long start = (unsigned long) addr;
>>> -             unsigned long end = (unsigned long) addr + length;
>>> -
>>> -             /* For swizzling simply ensure that we always flush both
>>> -              * channels. Lame, but simple and it works. Swizzled
>>> -              * pwrite/pread is far from a hotpath - current userspace
>>> -              * doesn't use it at all. */
>>> -             start = round_down(start, 128);
>>> -             end = round_up(end, 128);
>>> -
>>> -             drm_clflush_virt_range((void *)start, end - start);
>>> -     } else {
>>> -             drm_clflush_virt_range(addr, length);
>>> -     }
>>> -
>>> -}
>>> -
>>> -/* Only difference to the fast-path function is that this can handle bit17
>>> - * and uses non-atomic copy and kmap functions. */
>>>    static int
>>> -shmem_pread_slow(struct page *page, int offset, int length,
>>> -              char __user *user_data,
>>> -              bool page_do_bit17_swizzling, bool needs_clflush)
>>> +shmem_pread(struct page *page, int offset, int len, char __user *user_data,
>>> +         bool needs_clflush)
>>>    {
>>>        char *vaddr;
>>>        int ret;
>>>    
>>> -     vaddr = kmap(page);
>>> -     if (needs_clflush)
>>> -             shmem_clflush_swizzled_range(vaddr + offset, length,
>>> -                                          page_do_bit17_swizzling);
>>> -
>>> -     if (page_do_bit17_swizzling)
>>> -             ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
>>> -     else
>>> -             ret = __copy_to_user(user_data, vaddr + offset, length);
>>> -     kunmap(page);
>>> +     vaddr = kmap_atomic(page);
>>>    
>>> -     return ret ? - EFAULT : 0;
>>> -}
>>> +     if (needs_clflush)
>>> +             drm_clflush_virt_range(vaddr + offset, len);
>>>    
>>> -static int
>>> -shmem_pread(struct page *page, int offset, int length, char __user *user_data,
>>> -         bool page_do_bit17_swizzling, bool needs_clflush)
>>> -{
>>> -     int ret;
>>> +     ret = __copy_to_user_inatomic(user_data, vaddr + offset, len);
>>
>> Kerneldoc says userspace memory must be pinned so potential page faults
>> are avoided. I don't see that our code does that.
> 
> kmap_atomic once upon a time gave assurances that _inatomic was safe.
> It's not important, kmap() is good enough (once upon a time there was a
> major difference for kmap_atomic as you had to use preallocated slots,
> the good old days ;)

kmap_atomic is also the other end of the copy. user_data is which also 
mustn't fault, but never mind, kmap only path sounds indeed fine.

>   
>>> -     ret = -ENODEV;
>>> -     if (!page_do_bit17_swizzling) {
>>> -             char *vaddr = kmap_atomic(page);
>>> +     kunmap_atomic(vaddr);
>>>    
>>> -             if (needs_clflush)
>>> -                     drm_clflush_virt_range(vaddr + offset, length);
>>> -             ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
>>> -             kunmap_atomic(vaddr);
>>> +     if (ret) {
>>> +             vaddr = kmap(page);
>>> +             ret = __copy_to_user(user_data, vaddr + offset, len);
>>> +             kunmap(page);
>>>        }
>>> -     if (ret == 0)
>>> -             return 0;
>>>    
>>> -     return shmem_pread_slow(page, offset, length, user_data,
>>> -                             page_do_bit17_swizzling, needs_clflush);
>>> +     return ret ? - EFAULT : 0;
>>
>> Why the space between - and EFAULT? Or why not just return ret?
> 
> Fat fingers.
> 
>>>    }
>>>    
>>>    static int
>>> @@ -1104,15 +1009,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
>>>    {
>>>        char __user *user_data;
>>>        u64 remain;
>>> -     unsigned int obj_do_bit17_swizzling;
>>>        unsigned int needs_clflush;
>>>        unsigned int idx, offset;
>>>        int ret;
>>>    
>>> -     obj_do_bit17_swizzling = 0;
>>> -     if (i915_gem_object_needs_bit17_swizzle(obj))
>>> -             obj_do_bit17_swizzling = BIT(17);
>>> -
>>>        ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
>>>        if (ret)
>>>                return ret;
>>> @@ -1134,7 +1034,6 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
>>>                        length = PAGE_SIZE - offset;
>>
>> It's pre-existing, but there seems to be a potential overflow of int
>> length = u64 remain a few lines above this.
> 
> Hmm. If so, that's one that escaped my perusal. Time for sanity checks
> probably.

Feel free to leave it for a separate patch/series, it was just an 
observation. Or even see if you can delegate it. :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3] drm/i915: Remove partial attempt to swizzle on pread/pwrite
  2018-07-23  9:02 [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite Chris Wilson
                   ` (4 preceding siblings ...)
  2018-10-11 13:38 ` [PATCH] " Tvrtko Ursulin
@ 2018-10-12 11:56 ` Chris Wilson
  2018-10-12 12:22   ` Tvrtko Ursulin
  2018-11-16 15:02   ` Joonas Lahtinen
  2018-10-12 12:21 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Remove partial attempt to swizzle on pread/pwrite (rev2) Patchwork
                   ` (2 subsequent siblings)
  8 siblings, 2 replies; 15+ messages in thread
From: Chris Wilson @ 2018-10-12 11:56 UTC (permalink / raw)
  To: intel-gfx

Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
objects was flawed due to the simple fact that we do not always know the
swizzling for a particular page (due to the swizzling varying based on
location in certain unbalanced configurations). Furthermore, the
pread/pwrite paths are now unbalanced in that we are required to use the
GTT as in some cases we do not have direct CPU access to the backing
physical pages (thus some paths trying to account for the swizzle, but
others neglecting, chaos ensues).

There are no known users who do use pread/pwrite into a tiled object
(you need to manually detile anyway, so why now just use mmap and avoid
the copy?) and no user bug reports to indicate that it is being used in
the wild. As no one is hitting the buggy path, we can just remove the
buggy code.

v2: Just use the fault allowing kmap() + normal copy_(to|from)_user
v3: Avoid int overflow in computing 'length' from 'remain' (Tvrtko)

References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 195 ++++----------------------------
 1 file changed, 25 insertions(+), 170 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7d45e71100bc..8049458ab6ac 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 	obj->write_domain = 0;
 }
 
-static inline int
-__copy_to_user_swizzled(char __user *cpu_vaddr,
-			const char *gpu_vaddr, int gpu_offset,
-			int length)
-{
-	int ret, cpu_offset = 0;
-
-	while (length > 0) {
-		int cacheline_end = ALIGN(gpu_offset + 1, 64);
-		int this_length = min(cacheline_end - gpu_offset, length);
-		int swizzled_gpu_offset = gpu_offset ^ 64;
-
-		ret = __copy_to_user(cpu_vaddr + cpu_offset,
-				     gpu_vaddr + swizzled_gpu_offset,
-				     this_length);
-		if (ret)
-			return ret + length;
-
-		cpu_offset += this_length;
-		gpu_offset += this_length;
-		length -= this_length;
-	}
-
-	return 0;
-}
-
-static inline int
-__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
-			  const char __user *cpu_vaddr,
-			  int length)
-{
-	int ret, cpu_offset = 0;
-
-	while (length > 0) {
-		int cacheline_end = ALIGN(gpu_offset + 1, 64);
-		int this_length = min(cacheline_end - gpu_offset, length);
-		int swizzled_gpu_offset = gpu_offset ^ 64;
-
-		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
-				       cpu_vaddr + cpu_offset,
-				       this_length);
-		if (ret)
-			return ret + length;
-
-		cpu_offset += this_length;
-		gpu_offset += this_length;
-		length -= this_length;
-	}
-
-	return 0;
-}
-
 /*
  * Pins the specified object's pages and synchronizes the object with
  * GPU accesses. Sets needs_clflush to non-zero if the caller should
@@ -1030,72 +978,23 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 	return ret;
 }
 
-static void
-shmem_clflush_swizzled_range(char *addr, unsigned long length,
-			     bool swizzled)
-{
-	if (unlikely(swizzled)) {
-		unsigned long start = (unsigned long) addr;
-		unsigned long end = (unsigned long) addr + length;
-
-		/* For swizzling simply ensure that we always flush both
-		 * channels. Lame, but simple and it works. Swizzled
-		 * pwrite/pread is far from a hotpath - current userspace
-		 * doesn't use it at all. */
-		start = round_down(start, 128);
-		end = round_up(end, 128);
-
-		drm_clflush_virt_range((void *)start, end - start);
-	} else {
-		drm_clflush_virt_range(addr, length);
-	}
-
-}
-
-/* Only difference to the fast-path function is that this can handle bit17
- * and uses non-atomic copy and kmap functions. */
 static int
-shmem_pread_slow(struct page *page, int offset, int length,
-		 char __user *user_data,
-		 bool page_do_bit17_swizzling, bool needs_clflush)
+shmem_pread(struct page *page, int offset, int len, char __user *user_data,
+	    bool needs_clflush)
 {
 	char *vaddr;
 	int ret;
 
 	vaddr = kmap(page);
-	if (needs_clflush)
-		shmem_clflush_swizzled_range(vaddr + offset, length,
-					     page_do_bit17_swizzling);
 
-	if (page_do_bit17_swizzling)
-		ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
-	else
-		ret = __copy_to_user(user_data, vaddr + offset, length);
-	kunmap(page);
-
-	return ret ? - EFAULT : 0;
-}
-
-static int
-shmem_pread(struct page *page, int offset, int length, char __user *user_data,
-	    bool page_do_bit17_swizzling, bool needs_clflush)
-{
-	int ret;
+	if (needs_clflush)
+		drm_clflush_virt_range(vaddr + offset, len);
 
-	ret = -ENODEV;
-	if (!page_do_bit17_swizzling) {
-		char *vaddr = kmap_atomic(page);
+	ret = __copy_to_user(user_data, vaddr + offset, len);
 
-		if (needs_clflush)
-			drm_clflush_virt_range(vaddr + offset, length);
-		ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
-		kunmap_atomic(vaddr);
-	}
-	if (ret == 0)
-		return 0;
+	kunmap(page);
 
-	return shmem_pread_slow(page, offset, length, user_data,
-				page_do_bit17_swizzling, needs_clflush);
+	return ret ? -EFAULT : 0;
 }
 
 static int
@@ -1104,15 +1003,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 {
 	char __user *user_data;
 	u64 remain;
-	unsigned int obj_do_bit17_swizzling;
 	unsigned int needs_clflush;
 	unsigned int idx, offset;
 	int ret;
 
-	obj_do_bit17_swizzling = 0;
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		obj_do_bit17_swizzling = BIT(17);
-
 	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
 	if (ret)
 		return ret;
@@ -1127,14 +1021,14 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 	offset = offset_in_page(args->offset);
 	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
 		struct page *page = i915_gem_object_get_page(obj, idx);
-		int length;
+		unsigned int length;
 
-		length = remain;
-		if (offset + length > PAGE_SIZE)
+		if (remain > PAGE_SIZE - offset)
 			length = PAGE_SIZE - offset;
+		else
+			length = remain;
 
 		ret = shmem_pread(page, offset, length, user_data,
-				  page_to_phys(page) & obj_do_bit17_swizzling,
 				  needs_clflush);
 		if (ret)
 			break;
@@ -1475,33 +1369,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 	return ret;
 }
 
-static int
-shmem_pwrite_slow(struct page *page, int offset, int length,
-		  char __user *user_data,
-		  bool page_do_bit17_swizzling,
-		  bool needs_clflush_before,
-		  bool needs_clflush_after)
-{
-	char *vaddr;
-	int ret;
-
-	vaddr = kmap(page);
-	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
-		shmem_clflush_swizzled_range(vaddr + offset, length,
-					     page_do_bit17_swizzling);
-	if (page_do_bit17_swizzling)
-		ret = __copy_from_user_swizzled(vaddr, offset, user_data,
-						length);
-	else
-		ret = __copy_from_user(vaddr + offset, user_data, length);
-	if (needs_clflush_after)
-		shmem_clflush_swizzled_range(vaddr + offset, length,
-					     page_do_bit17_swizzling);
-	kunmap(page);
-
-	return ret ? -EFAULT : 0;
-}
-
 /* Per-page copy function for the shmem pwrite fastpath.
  * Flushes invalid cachelines before writing to the target if
  * needs_clflush_before is set and flushes out any written cachelines after
@@ -1509,31 +1376,24 @@ shmem_pwrite_slow(struct page *page, int offset, int length,
  */
 static int
 shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
-	     bool page_do_bit17_swizzling,
 	     bool needs_clflush_before,
 	     bool needs_clflush_after)
 {
+	char *vaddr;
 	int ret;
 
-	ret = -ENODEV;
-	if (!page_do_bit17_swizzling) {
-		char *vaddr = kmap_atomic(page);
+	vaddr = kmap(page);
 
-		if (needs_clflush_before)
-			drm_clflush_virt_range(vaddr + offset, len);
-		ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
-		if (needs_clflush_after)
-			drm_clflush_virt_range(vaddr + offset, len);
+	if (needs_clflush_before)
+		drm_clflush_virt_range(vaddr + offset, len);
 
-		kunmap_atomic(vaddr);
-	}
-	if (ret == 0)
-		return ret;
+	ret = __copy_from_user(vaddr + offset, user_data, len);
+	if (!ret && needs_clflush_after)
+		drm_clflush_virt_range(vaddr + offset, len);
+
+	kunmap(page);
 
-	return shmem_pwrite_slow(page, offset, len, user_data,
-				 page_do_bit17_swizzling,
-				 needs_clflush_before,
-				 needs_clflush_after);
+	return ret ? -EFAULT : 0;
 }
 
 static int
@@ -1543,7 +1403,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	void __user *user_data;
 	u64 remain;
-	unsigned int obj_do_bit17_swizzling;
 	unsigned int partial_cacheline_write;
 	unsigned int needs_clflush;
 	unsigned int offset, idx;
@@ -1558,10 +1417,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	obj_do_bit17_swizzling = 0;
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		obj_do_bit17_swizzling = BIT(17);
-
 	/* If we don't overwrite a cacheline completely we need to be
 	 * careful to have up-to-date data by first clflushing. Don't
 	 * overcomplicate things and flush the entire patch.
@@ -1575,14 +1430,14 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	offset = offset_in_page(args->offset);
 	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
 		struct page *page = i915_gem_object_get_page(obj, idx);
-		int length;
+		unsigned int length;
 
-		length = remain;
-		if (offset + length > PAGE_SIZE)
+		if (remain > PAGE_SIZE - offset)
 			length = PAGE_SIZE - offset;
+		else
+			length = remain;
 
 		ret = shmem_pwrite(page, offset, length, user_data,
-				   page_to_phys(page) & obj_do_bit17_swizzling,
 				   (offset | length) & partial_cacheline_write,
 				   needs_clflush & CLFLUSH_AFTER);
 		if (ret)
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Remove partial attempt to swizzle on pread/pwrite (rev2)
  2018-07-23  9:02 [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite Chris Wilson
                   ` (5 preceding siblings ...)
  2018-10-12 11:56 ` [PATCH v3] " Chris Wilson
@ 2018-10-12 12:21 ` Patchwork
  2018-10-12 12:21 ` ✗ Fi.CI.SPARSE: " Patchwork
  2018-10-12 12:46 ` ✗ Fi.CI.BAT: failure " Patchwork
  8 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2018-10-12 12:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Remove partial attempt to swizzle on pread/pwrite (rev2)
URL   : https://patchwork.freedesktop.org/series/47043/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
1f483603ebf0 drm/i915: Remove partial attempt to swizzle on pread/pwrite
-:24: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#24: 
References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")

-:24: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")'
#24: 
References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")

total: 1 errors, 1 warnings, 0 checks, 279 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915: Remove partial attempt to swizzle on pread/pwrite (rev2)
  2018-07-23  9:02 [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite Chris Wilson
                   ` (6 preceding siblings ...)
  2018-10-12 12:21 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Remove partial attempt to swizzle on pread/pwrite (rev2) Patchwork
@ 2018-10-12 12:21 ` Patchwork
  2018-10-12 12:46 ` ✗ Fi.CI.BAT: failure " Patchwork
  8 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2018-10-12 12:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Remove partial attempt to swizzle on pread/pwrite (rev2)
URL   : https://patchwork.freedesktop.org/series/47043/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: Remove partial attempt to swizzle on pread/pwrite
-O:drivers/gpu/drm/i915/i915_gem.c:871:35: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:871:35: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:897:35: warning: expression using sizeof(void)
-O:drivers/gpu/drm/i915/i915_gem.c:897:35: warning: expression using sizeof(void)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3] drm/i915: Remove partial attempt to swizzle on pread/pwrite
  2018-10-12 11:56 ` [PATCH v3] " Chris Wilson
@ 2018-10-12 12:22   ` Tvrtko Ursulin
  2018-11-16 15:02   ` Joonas Lahtinen
  1 sibling, 0 replies; 15+ messages in thread
From: Tvrtko Ursulin @ 2018-10-12 12:22 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 12/10/2018 12:56, Chris Wilson wrote:
> Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
> objects was flawed due to the simple fact that we do not always know the
> swizzling for a particular page (due to the swizzling varying based on
> location in certain unbalanced configurations). Furthermore, the
> pread/pwrite paths are now unbalanced in that we are required to use the
> GTT as in some cases we do not have direct CPU access to the backing
> physical pages (thus some paths trying to account for the swizzle, but
> others neglecting, chaos ensues).
> 
> There are no known users who do use pread/pwrite into a tiled object
> (you need to manually detile anyway, so why now just use mmap and avoid
> the copy?) and no user bug reports to indicate that it is being used in
> the wild. As no one is hitting the buggy path, we can just remove the
> buggy code.
> 
> v2: Just use the fault allowing kmap() + normal copy_(to|from)_user
> v3: Avoid int overflow in computing 'length' from 'remain' (Tvrtko)
> 
> References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 195 ++++----------------------------
>   1 file changed, 25 insertions(+), 170 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7d45e71100bc..8049458ab6ac 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
>   	obj->write_domain = 0;
>   }
>   
> -static inline int
> -__copy_to_user_swizzled(char __user *cpu_vaddr,
> -			const char *gpu_vaddr, int gpu_offset,
> -			int length)
> -{
> -	int ret, cpu_offset = 0;
> -
> -	while (length > 0) {
> -		int cacheline_end = ALIGN(gpu_offset + 1, 64);
> -		int this_length = min(cacheline_end - gpu_offset, length);
> -		int swizzled_gpu_offset = gpu_offset ^ 64;
> -
> -		ret = __copy_to_user(cpu_vaddr + cpu_offset,
> -				     gpu_vaddr + swizzled_gpu_offset,
> -				     this_length);
> -		if (ret)
> -			return ret + length;
> -
> -		cpu_offset += this_length;
> -		gpu_offset += this_length;
> -		length -= this_length;
> -	}
> -
> -	return 0;
> -}
> -
> -static inline int
> -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
> -			  const char __user *cpu_vaddr,
> -			  int length)
> -{
> -	int ret, cpu_offset = 0;
> -
> -	while (length > 0) {
> -		int cacheline_end = ALIGN(gpu_offset + 1, 64);
> -		int this_length = min(cacheline_end - gpu_offset, length);
> -		int swizzled_gpu_offset = gpu_offset ^ 64;
> -
> -		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
> -				       cpu_vaddr + cpu_offset,
> -				       this_length);
> -		if (ret)
> -			return ret + length;
> -
> -		cpu_offset += this_length;
> -		gpu_offset += this_length;
> -		length -= this_length;
> -	}
> -
> -	return 0;
> -}
> -
>   /*
>    * Pins the specified object's pages and synchronizes the object with
>    * GPU accesses. Sets needs_clflush to non-zero if the caller should
> @@ -1030,72 +978,23 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>   	return ret;
>   }
>   
> -static void
> -shmem_clflush_swizzled_range(char *addr, unsigned long length,
> -			     bool swizzled)
> -{
> -	if (unlikely(swizzled)) {
> -		unsigned long start = (unsigned long) addr;
> -		unsigned long end = (unsigned long) addr + length;
> -
> -		/* For swizzling simply ensure that we always flush both
> -		 * channels. Lame, but simple and it works. Swizzled
> -		 * pwrite/pread is far from a hotpath - current userspace
> -		 * doesn't use it at all. */
> -		start = round_down(start, 128);
> -		end = round_up(end, 128);
> -
> -		drm_clflush_virt_range((void *)start, end - start);
> -	} else {
> -		drm_clflush_virt_range(addr, length);
> -	}
> -
> -}
> -
> -/* Only difference to the fast-path function is that this can handle bit17
> - * and uses non-atomic copy and kmap functions. */
>   static int
> -shmem_pread_slow(struct page *page, int offset, int length,
> -		 char __user *user_data,
> -		 bool page_do_bit17_swizzling, bool needs_clflush)
> +shmem_pread(struct page *page, int offset, int len, char __user *user_data,
> +	    bool needs_clflush)
>   {
>   	char *vaddr;
>   	int ret;
>   
>   	vaddr = kmap(page);
> -	if (needs_clflush)
> -		shmem_clflush_swizzled_range(vaddr + offset, length,
> -					     page_do_bit17_swizzling);
>   
> -	if (page_do_bit17_swizzling)
> -		ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
> -	else
> -		ret = __copy_to_user(user_data, vaddr + offset, length);
> -	kunmap(page);
> -
> -	return ret ? - EFAULT : 0;
> -}
> -
> -static int
> -shmem_pread(struct page *page, int offset, int length, char __user *user_data,
> -	    bool page_do_bit17_swizzling, bool needs_clflush)
> -{
> -	int ret;
> +	if (needs_clflush)
> +		drm_clflush_virt_range(vaddr + offset, len);
>   
> -	ret = -ENODEV;
> -	if (!page_do_bit17_swizzling) {
> -		char *vaddr = kmap_atomic(page);
> +	ret = __copy_to_user(user_data, vaddr + offset, len);
>   
> -		if (needs_clflush)
> -			drm_clflush_virt_range(vaddr + offset, length);
> -		ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
> -		kunmap_atomic(vaddr);
> -	}
> -	if (ret == 0)
> -		return 0;
> +	kunmap(page);
>   
> -	return shmem_pread_slow(page, offset, length, user_data,
> -				page_do_bit17_swizzling, needs_clflush);
> +	return ret ? -EFAULT : 0;
>   }
>   
>   static int
> @@ -1104,15 +1003,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
>   {
>   	char __user *user_data;
>   	u64 remain;
> -	unsigned int obj_do_bit17_swizzling;
>   	unsigned int needs_clflush;
>   	unsigned int idx, offset;
>   	int ret;
>   
> -	obj_do_bit17_swizzling = 0;
> -	if (i915_gem_object_needs_bit17_swizzle(obj))
> -		obj_do_bit17_swizzling = BIT(17);
> -
>   	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
>   	if (ret)
>   		return ret;
> @@ -1127,14 +1021,14 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
>   	offset = offset_in_page(args->offset);
>   	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
>   		struct page *page = i915_gem_object_get_page(obj, idx);
> -		int length;
> +		unsigned int length;
>   
> -		length = remain;
> -		if (offset + length > PAGE_SIZE)
> +		if (remain > PAGE_SIZE - offset)
>   			length = PAGE_SIZE - offset;
> +		else
> +			length = remain;
>   
>   		ret = shmem_pread(page, offset, length, user_data,
> -				  page_to_phys(page) & obj_do_bit17_swizzling,
>   				  needs_clflush);
>   		if (ret)
>   			break;
> @@ -1475,33 +1369,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>   	return ret;
>   }
>   
> -static int
> -shmem_pwrite_slow(struct page *page, int offset, int length,
> -		  char __user *user_data,
> -		  bool page_do_bit17_swizzling,
> -		  bool needs_clflush_before,
> -		  bool needs_clflush_after)
> -{
> -	char *vaddr;
> -	int ret;
> -
> -	vaddr = kmap(page);
> -	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
> -		shmem_clflush_swizzled_range(vaddr + offset, length,
> -					     page_do_bit17_swizzling);
> -	if (page_do_bit17_swizzling)
> -		ret = __copy_from_user_swizzled(vaddr, offset, user_data,
> -						length);
> -	else
> -		ret = __copy_from_user(vaddr + offset, user_data, length);
> -	if (needs_clflush_after)
> -		shmem_clflush_swizzled_range(vaddr + offset, length,
> -					     page_do_bit17_swizzling);
> -	kunmap(page);
> -
> -	return ret ? -EFAULT : 0;
> -}
> -
>   /* Per-page copy function for the shmem pwrite fastpath.
>    * Flushes invalid cachelines before writing to the target if
>    * needs_clflush_before is set and flushes out any written cachelines after
> @@ -1509,31 +1376,24 @@ shmem_pwrite_slow(struct page *page, int offset, int length,
>    */
>   static int
>   shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
> -	     bool page_do_bit17_swizzling,
>   	     bool needs_clflush_before,
>   	     bool needs_clflush_after)
>   {
> +	char *vaddr;
>   	int ret;
>   
> -	ret = -ENODEV;
> -	if (!page_do_bit17_swizzling) {
> -		char *vaddr = kmap_atomic(page);
> +	vaddr = kmap(page);
>   
> -		if (needs_clflush_before)
> -			drm_clflush_virt_range(vaddr + offset, len);
> -		ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
> -		if (needs_clflush_after)
> -			drm_clflush_virt_range(vaddr + offset, len);
> +	if (needs_clflush_before)
> +		drm_clflush_virt_range(vaddr + offset, len);
>   
> -		kunmap_atomic(vaddr);
> -	}
> -	if (ret == 0)
> -		return ret;
> +	ret = __copy_from_user(vaddr + offset, user_data, len);
> +	if (!ret && needs_clflush_after)
> +		drm_clflush_virt_range(vaddr + offset, len);
> +
> +	kunmap(page);
>   
> -	return shmem_pwrite_slow(page, offset, len, user_data,
> -				 page_do_bit17_swizzling,
> -				 needs_clflush_before,
> -				 needs_clflush_after);
> +	return ret ? -EFAULT : 0;
>   }
>   
>   static int
> @@ -1543,7 +1403,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
>   	struct drm_i915_private *i915 = to_i915(obj->base.dev);
>   	void __user *user_data;
>   	u64 remain;
> -	unsigned int obj_do_bit17_swizzling;
>   	unsigned int partial_cacheline_write;
>   	unsigned int needs_clflush;
>   	unsigned int offset, idx;
> @@ -1558,10 +1417,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		return ret;
>   
> -	obj_do_bit17_swizzling = 0;
> -	if (i915_gem_object_needs_bit17_swizzle(obj))
> -		obj_do_bit17_swizzling = BIT(17);
> -
>   	/* If we don't overwrite a cacheline completely we need to be
>   	 * careful to have up-to-date data by first clflushing. Don't
>   	 * overcomplicate things and flush the entire patch.
> @@ -1575,14 +1430,14 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
>   	offset = offset_in_page(args->offset);
>   	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
>   		struct page *page = i915_gem_object_get_page(obj, idx);
> -		int length;
> +		unsigned int length;
>   
> -		length = remain;
> -		if (offset + length > PAGE_SIZE)
> +		if (remain > PAGE_SIZE - offset)
>   			length = PAGE_SIZE - offset;
> +		else
> +			length = remain;
>   
>   		ret = shmem_pwrite(page, offset, length, user_data,
> -				   page_to_phys(page) & obj_do_bit17_swizzling,
>   				   (offset | length) & partial_cacheline_write,
>   				   needs_clflush & CLFLUSH_AFTER);
>   		if (ret)
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

But as mentioned before please gather some more acks concerning the uAPI 
  removal. Well not uAPI removal but behaviour/functionality change.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* ✗ Fi.CI.BAT: failure for drm/i915: Remove partial attempt to swizzle on pread/pwrite (rev2)
  2018-07-23  9:02 [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite Chris Wilson
                   ` (7 preceding siblings ...)
  2018-10-12 12:21 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2018-10-12 12:46 ` Patchwork
  8 siblings, 0 replies; 15+ messages in thread
From: Patchwork @ 2018-10-12 12:46 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Remove partial attempt to swizzle on pread/pwrite (rev2)
URL   : https://patchwork.freedesktop.org/series/47043/
State : failure

== Summary ==

= CI Bug Log - changes from CI_DRM_4976 -> Patchwork_10436 =

== Summary - FAILURE ==

  Serious unknown changes coming with Patchwork_10436 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_10436, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/47043/revisions/2/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in Patchwork_10436:

  === IGT changes ===

    ==== Possible regressions ====

    igt@gem_tiled_fence_blits@basic:
      fi-gdg-551:         PASS -> FAIL

    
== Known issues ==

  Here are the changes found in Patchwork_10436 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_selftest@live_evict:
      fi-bsw-kefka:       PASS -> DMESG-WARN (fdo#107709)

    igt@gem_exec_suspend@basic-s3:
      fi-icl-u:           PASS -> INCOMPLETE (fdo#107713)

    igt@kms_pipe_crc_basic@hang-read-crc-pipe-b:
      fi-skl-6700k2:      PASS -> FAIL (fdo#107362, fdo#103191)

    
    ==== Possible fixes ====

    igt@kms_pipe_crc_basic@read-crc-pipe-c:
      fi-skl-6700k2:      FAIL (fdo#107362, fdo#103191) -> PASS

    igt@pm_rpm@module-reload:
      fi-skl-6600u:       INCOMPLETE (fdo#107807) -> PASS

    
  fdo#103191 https://bugs.freedesktop.org/show_bug.cgi?id=103191
  fdo#107362 https://bugs.freedesktop.org/show_bug.cgi?id=107362
  fdo#107709 https://bugs.freedesktop.org/show_bug.cgi?id=107709
  fdo#107713 https://bugs.freedesktop.org/show_bug.cgi?id=107713
  fdo#107807 https://bugs.freedesktop.org/show_bug.cgi?id=107807


== Participating hosts (45 -> 39) ==

  Missing    (6): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-skl-6260u fi-ctg-p8600 fi-glk-j4005 


== Build changes ==

    * Linux: CI_DRM_4976 -> Patchwork_10436

  CI_DRM_4976: dec9886eff39d38332bb5ea438b9b053d6b2177c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_4674: 93871c6fb3c25e5d350c9faf36ded917174214de @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_10436: 1f483603ebf07e7b1e676a99d7c870cdad8867b7 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

1f483603ebf0 drm/i915: Remove partial attempt to swizzle on pread/pwrite

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_10436/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3] drm/i915: Remove partial attempt to swizzle on pread/pwrite
  2018-10-12 11:56 ` [PATCH v3] " Chris Wilson
  2018-10-12 12:22   ` Tvrtko Ursulin
@ 2018-11-16 15:02   ` Joonas Lahtinen
  2018-11-16 15:59     ` Kenneth Graunke
  1 sibling, 1 reply; 15+ messages in thread
From: Joonas Lahtinen @ 2018-11-16 15:02 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Kenneth Graunke, Jason Ekstrand

Quoting Chris Wilson (2018-10-12 14:56:21)
> Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
> objects was flawed due to the simple fact that we do not always know the
> swizzling for a particular page (due to the swizzling varying based on
> location in certain unbalanced configurations). Furthermore, the
> pread/pwrite paths are now unbalanced in that we are required to use the
> GTT as in some cases we do not have direct CPU access to the backing
> physical pages (thus some paths trying to account for the swizzle, but
> others neglecting, chaos ensues).
> 
> There are no known users who do use pread/pwrite into a tiled object
> (you need to manually detile anyway, so why now just use mmap and avoid
> the copy?) and no user bug reports to indicate that it is being used in
> the wild. As no one is hitting the buggy path, we can just remove the
> buggy code.

Adding a few Mesa folks for an Ack, they should remember if they've done it
in the past.

If we have good enough confidence that it was unused in the userspace
drivers, should be OK to be dropped. And it would have been broken
anyway, which pretty much automatically means it didn't see use, but
doesn't hurt to hear from them.

Regards, Joonas

> 
> v2: Just use the fault allowing kmap() + normal copy_(to|from)_user
> v3: Avoid int overflow in computing 'length' from 'remain' (Tvrtko)
> 
> References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 195 ++++----------------------------
>  1 file changed, 25 insertions(+), 170 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7d45e71100bc..8049458ab6ac 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
>         obj->write_domain = 0;
>  }
>  
> -static inline int
> -__copy_to_user_swizzled(char __user *cpu_vaddr,
> -                       const char *gpu_vaddr, int gpu_offset,
> -                       int length)
> -{
> -       int ret, cpu_offset = 0;
> -
> -       while (length > 0) {
> -               int cacheline_end = ALIGN(gpu_offset + 1, 64);
> -               int this_length = min(cacheline_end - gpu_offset, length);
> -               int swizzled_gpu_offset = gpu_offset ^ 64;
> -
> -               ret = __copy_to_user(cpu_vaddr + cpu_offset,
> -                                    gpu_vaddr + swizzled_gpu_offset,
> -                                    this_length);
> -               if (ret)
> -                       return ret + length;
> -
> -               cpu_offset += this_length;
> -               gpu_offset += this_length;
> -               length -= this_length;
> -       }
> -
> -       return 0;
> -}
> -
> -static inline int
> -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
> -                         const char __user *cpu_vaddr,
> -                         int length)
> -{
> -       int ret, cpu_offset = 0;
> -
> -       while (length > 0) {
> -               int cacheline_end = ALIGN(gpu_offset + 1, 64);
> -               int this_length = min(cacheline_end - gpu_offset, length);
> -               int swizzled_gpu_offset = gpu_offset ^ 64;
> -
> -               ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
> -                                      cpu_vaddr + cpu_offset,
> -                                      this_length);
> -               if (ret)
> -                       return ret + length;
> -
> -               cpu_offset += this_length;
> -               gpu_offset += this_length;
> -               length -= this_length;
> -       }
> -
> -       return 0;
> -}
> -
>  /*
>   * Pins the specified object's pages and synchronizes the object with
>   * GPU accesses. Sets needs_clflush to non-zero if the caller should
> @@ -1030,72 +978,23 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>         return ret;
>  }
>  
> -static void
> -shmem_clflush_swizzled_range(char *addr, unsigned long length,
> -                            bool swizzled)
> -{
> -       if (unlikely(swizzled)) {
> -               unsigned long start = (unsigned long) addr;
> -               unsigned long end = (unsigned long) addr + length;
> -
> -               /* For swizzling simply ensure that we always flush both
> -                * channels. Lame, but simple and it works. Swizzled
> -                * pwrite/pread is far from a hotpath - current userspace
> -                * doesn't use it at all. */
> -               start = round_down(start, 128);
> -               end = round_up(end, 128);
> -
> -               drm_clflush_virt_range((void *)start, end - start);
> -       } else {
> -               drm_clflush_virt_range(addr, length);
> -       }
> -
> -}
> -
> -/* Only difference to the fast-path function is that this can handle bit17
> - * and uses non-atomic copy and kmap functions. */
>  static int
> -shmem_pread_slow(struct page *page, int offset, int length,
> -                char __user *user_data,
> -                bool page_do_bit17_swizzling, bool needs_clflush)
> +shmem_pread(struct page *page, int offset, int len, char __user *user_data,
> +           bool needs_clflush)
>  {
>         char *vaddr;
>         int ret;
>  
>         vaddr = kmap(page);
> -       if (needs_clflush)
> -               shmem_clflush_swizzled_range(vaddr + offset, length,
> -                                            page_do_bit17_swizzling);
>  
> -       if (page_do_bit17_swizzling)
> -               ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
> -       else
> -               ret = __copy_to_user(user_data, vaddr + offset, length);
> -       kunmap(page);
> -
> -       return ret ? - EFAULT : 0;
> -}
> -
> -static int
> -shmem_pread(struct page *page, int offset, int length, char __user *user_data,
> -           bool page_do_bit17_swizzling, bool needs_clflush)
> -{
> -       int ret;
> +       if (needs_clflush)
> +               drm_clflush_virt_range(vaddr + offset, len);
>  
> -       ret = -ENODEV;
> -       if (!page_do_bit17_swizzling) {
> -               char *vaddr = kmap_atomic(page);
> +       ret = __copy_to_user(user_data, vaddr + offset, len);
>  
> -               if (needs_clflush)
> -                       drm_clflush_virt_range(vaddr + offset, length);
> -               ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
> -               kunmap_atomic(vaddr);
> -       }
> -       if (ret == 0)
> -               return 0;
> +       kunmap(page);
>  
> -       return shmem_pread_slow(page, offset, length, user_data,
> -                               page_do_bit17_swizzling, needs_clflush);
> +       return ret ? -EFAULT : 0;
>  }
>  
>  static int
> @@ -1104,15 +1003,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
>  {
>         char __user *user_data;
>         u64 remain;
> -       unsigned int obj_do_bit17_swizzling;
>         unsigned int needs_clflush;
>         unsigned int idx, offset;
>         int ret;
>  
> -       obj_do_bit17_swizzling = 0;
> -       if (i915_gem_object_needs_bit17_swizzle(obj))
> -               obj_do_bit17_swizzling = BIT(17);
> -
>         ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
>         if (ret)
>                 return ret;
> @@ -1127,14 +1021,14 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
>         offset = offset_in_page(args->offset);
>         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
>                 struct page *page = i915_gem_object_get_page(obj, idx);
> -               int length;
> +               unsigned int length;
>  
> -               length = remain;
> -               if (offset + length > PAGE_SIZE)
> +               if (remain > PAGE_SIZE - offset)
>                         length = PAGE_SIZE - offset;
> +               else
> +                       length = remain;
>  
>                 ret = shmem_pread(page, offset, length, user_data,
> -                                 page_to_phys(page) & obj_do_bit17_swizzling,
>                                   needs_clflush);
>                 if (ret)
>                         break;
> @@ -1475,33 +1369,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
>         return ret;
>  }
>  
> -static int
> -shmem_pwrite_slow(struct page *page, int offset, int length,
> -                 char __user *user_data,
> -                 bool page_do_bit17_swizzling,
> -                 bool needs_clflush_before,
> -                 bool needs_clflush_after)
> -{
> -       char *vaddr;
> -       int ret;
> -
> -       vaddr = kmap(page);
> -       if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
> -               shmem_clflush_swizzled_range(vaddr + offset, length,
> -                                            page_do_bit17_swizzling);
> -       if (page_do_bit17_swizzling)
> -               ret = __copy_from_user_swizzled(vaddr, offset, user_data,
> -                                               length);
> -       else
> -               ret = __copy_from_user(vaddr + offset, user_data, length);
> -       if (needs_clflush_after)
> -               shmem_clflush_swizzled_range(vaddr + offset, length,
> -                                            page_do_bit17_swizzling);
> -       kunmap(page);
> -
> -       return ret ? -EFAULT : 0;
> -}
> -
>  /* Per-page copy function for the shmem pwrite fastpath.
>   * Flushes invalid cachelines before writing to the target if
>   * needs_clflush_before is set and flushes out any written cachelines after
> @@ -1509,31 +1376,24 @@ shmem_pwrite_slow(struct page *page, int offset, int length,
>   */
>  static int
>  shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
> -            bool page_do_bit17_swizzling,
>              bool needs_clflush_before,
>              bool needs_clflush_after)
>  {
> +       char *vaddr;
>         int ret;
>  
> -       ret = -ENODEV;
> -       if (!page_do_bit17_swizzling) {
> -               char *vaddr = kmap_atomic(page);
> +       vaddr = kmap(page);
>  
> -               if (needs_clflush_before)
> -                       drm_clflush_virt_range(vaddr + offset, len);
> -               ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
> -               if (needs_clflush_after)
> -                       drm_clflush_virt_range(vaddr + offset, len);
> +       if (needs_clflush_before)
> +               drm_clflush_virt_range(vaddr + offset, len);
>  
> -               kunmap_atomic(vaddr);
> -       }
> -       if (ret == 0)
> -               return ret;
> +       ret = __copy_from_user(vaddr + offset, user_data, len);
> +       if (!ret && needs_clflush_after)
> +               drm_clflush_virt_range(vaddr + offset, len);
> +
> +       kunmap(page);
>  
> -       return shmem_pwrite_slow(page, offset, len, user_data,
> -                                page_do_bit17_swizzling,
> -                                needs_clflush_before,
> -                                needs_clflush_after);
> +       return ret ? -EFAULT : 0;
>  }
>  
>  static int
> @@ -1543,7 +1403,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
>         struct drm_i915_private *i915 = to_i915(obj->base.dev);
>         void __user *user_data;
>         u64 remain;
> -       unsigned int obj_do_bit17_swizzling;
>         unsigned int partial_cacheline_write;
>         unsigned int needs_clflush;
>         unsigned int offset, idx;
> @@ -1558,10 +1417,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
>         if (ret)
>                 return ret;
>  
> -       obj_do_bit17_swizzling = 0;
> -       if (i915_gem_object_needs_bit17_swizzle(obj))
> -               obj_do_bit17_swizzling = BIT(17);
> -
>         /* If we don't overwrite a cacheline completely we need to be
>          * careful to have up-to-date data by first clflushing. Don't
>          * overcomplicate things and flush the entire patch.
> @@ -1575,14 +1430,14 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
>         offset = offset_in_page(args->offset);
>         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
>                 struct page *page = i915_gem_object_get_page(obj, idx);
> -               int length;
> +               unsigned int length;
>  
> -               length = remain;
> -               if (offset + length > PAGE_SIZE)
> +               if (remain > PAGE_SIZE - offset)
>                         length = PAGE_SIZE - offset;
> +               else
> +                       length = remain;
>  
>                 ret = shmem_pwrite(page, offset, length, user_data,
> -                                  page_to_phys(page) & obj_do_bit17_swizzling,
>                                    (offset | length) & partial_cacheline_write,
>                                    needs_clflush & CLFLUSH_AFTER);
>                 if (ret)
> -- 
> 2.19.1
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3] drm/i915: Remove partial attempt to swizzle on pread/pwrite
  2018-11-16 15:02   ` Joonas Lahtinen
@ 2018-11-16 15:59     ` Kenneth Graunke
  0 siblings, 0 replies; 15+ messages in thread
From: Kenneth Graunke @ 2018-11-16 15:59 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx, Ian Romanick


[-- Attachment #1.1: Type: text/plain, Size: 14253 bytes --]

On Friday, November 16, 2018 7:02:04 AM PST Joonas Lahtinen wrote:
> Quoting Chris Wilson (2018-10-12 14:56:21)
> > Our attempt to account for bit17 swizzling of pread/pwrite onto tiled
> > objects was flawed due to the simple fact that we do not always know the
> > swizzling for a particular page (due to the swizzling varying based on
> > location in certain unbalanced configurations). Furthermore, the
> > pread/pwrite paths are now unbalanced in that we are required to use the
> > GTT as in some cases we do not have direct CPU access to the backing
> > physical pages (thus some paths trying to account for the swizzle, but
> > others neglecting, chaos ensues).
> > 
> > There are no known users who do use pread/pwrite into a tiled object
> > (you need to manually detile anyway, so why now just use mmap and avoid
> > the copy?) and no user bug reports to indicate that it is being used in
> > the wild. As no one is hitting the buggy path, we can just remove the
> > buggy code.
> 
> Adding a few Mesa folks for an Ack, they should remember if they've done it
> in the past.

Eheheh...bit17 and 9xx hardware are before my time. :(  I've copied Ian
in case he remembers anything from that era.

It looks like the only users of pwrite and pread in Mesa's i915 driver
are for linear buffer objects, not miptrees which could be tiled.

So I think this is fine...(famous last words...)
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

> If we have good enough confidence that it was unused in the userspace
> drivers, should be OK to be dropped. And it would have been broken
> anyway, which pretty much automatically means it didn't see use, but
> doesn't hurt to hear from them.
> 
> Regards, Joonas
> 
> > 
> > v2: Just use the fault allowing kmap() + normal copy_(to|from)_user
> > v3: Avoid int overflow in computing 'length' from 'remain' (Tvrtko)
> > 
> > References: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c | 195 ++++----------------------------
> >  1 file changed, 25 insertions(+), 170 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 7d45e71100bc..8049458ab6ac 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -859,58 +859,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
> >         obj->write_domain = 0;
> >  }
> >  
> > -static inline int
> > -__copy_to_user_swizzled(char __user *cpu_vaddr,
> > -                       const char *gpu_vaddr, int gpu_offset,
> > -                       int length)
> > -{
> > -       int ret, cpu_offset = 0;
> > -
> > -       while (length > 0) {
> > -               int cacheline_end = ALIGN(gpu_offset + 1, 64);
> > -               int this_length = min(cacheline_end - gpu_offset, length);
> > -               int swizzled_gpu_offset = gpu_offset ^ 64;
> > -
> > -               ret = __copy_to_user(cpu_vaddr + cpu_offset,
> > -                                    gpu_vaddr + swizzled_gpu_offset,
> > -                                    this_length);
> > -               if (ret)
> > -                       return ret + length;
> > -
> > -               cpu_offset += this_length;
> > -               gpu_offset += this_length;
> > -               length -= this_length;
> > -       }
> > -
> > -       return 0;
> > -}
> > -
> > -static inline int
> > -__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
> > -                         const char __user *cpu_vaddr,
> > -                         int length)
> > -{
> > -       int ret, cpu_offset = 0;
> > -
> > -       while (length > 0) {
> > -               int cacheline_end = ALIGN(gpu_offset + 1, 64);
> > -               int this_length = min(cacheline_end - gpu_offset, length);
> > -               int swizzled_gpu_offset = gpu_offset ^ 64;
> > -
> > -               ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
> > -                                      cpu_vaddr + cpu_offset,
> > -                                      this_length);
> > -               if (ret)
> > -                       return ret + length;
> > -
> > -               cpu_offset += this_length;
> > -               gpu_offset += this_length;
> > -               length -= this_length;
> > -       }
> > -
> > -       return 0;
> > -}
> > -
> >  /*
> >   * Pins the specified object's pages and synchronizes the object with
> >   * GPU accesses. Sets needs_clflush to non-zero if the caller should
> > @@ -1030,72 +978,23 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
> >         return ret;
> >  }
> >  
> > -static void
> > -shmem_clflush_swizzled_range(char *addr, unsigned long length,
> > -                            bool swizzled)
> > -{
> > -       if (unlikely(swizzled)) {
> > -               unsigned long start = (unsigned long) addr;
> > -               unsigned long end = (unsigned long) addr + length;
> > -
> > -               /* For swizzling simply ensure that we always flush both
> > -                * channels. Lame, but simple and it works. Swizzled
> > -                * pwrite/pread is far from a hotpath - current userspace
> > -                * doesn't use it at all. */
> > -               start = round_down(start, 128);
> > -               end = round_up(end, 128);
> > -
> > -               drm_clflush_virt_range((void *)start, end - start);
> > -       } else {
> > -               drm_clflush_virt_range(addr, length);
> > -       }
> > -
> > -}
> > -
> > -/* Only difference to the fast-path function is that this can handle bit17
> > - * and uses non-atomic copy and kmap functions. */
> >  static int
> > -shmem_pread_slow(struct page *page, int offset, int length,
> > -                char __user *user_data,
> > -                bool page_do_bit17_swizzling, bool needs_clflush)
> > +shmem_pread(struct page *page, int offset, int len, char __user *user_data,
> > +           bool needs_clflush)
> >  {
> >         char *vaddr;
> >         int ret;
> >  
> >         vaddr = kmap(page);
> > -       if (needs_clflush)
> > -               shmem_clflush_swizzled_range(vaddr + offset, length,
> > -                                            page_do_bit17_swizzling);
> >  
> > -       if (page_do_bit17_swizzling)
> > -               ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
> > -       else
> > -               ret = __copy_to_user(user_data, vaddr + offset, length);
> > -       kunmap(page);
> > -
> > -       return ret ? - EFAULT : 0;
> > -}
> > -
> > -static int
> > -shmem_pread(struct page *page, int offset, int length, char __user *user_data,
> > -           bool page_do_bit17_swizzling, bool needs_clflush)
> > -{
> > -       int ret;
> > +       if (needs_clflush)
> > +               drm_clflush_virt_range(vaddr + offset, len);
> >  
> > -       ret = -ENODEV;
> > -       if (!page_do_bit17_swizzling) {
> > -               char *vaddr = kmap_atomic(page);
> > +       ret = __copy_to_user(user_data, vaddr + offset, len);
> >  
> > -               if (needs_clflush)
> > -                       drm_clflush_virt_range(vaddr + offset, length);
> > -               ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
> > -               kunmap_atomic(vaddr);
> > -       }
> > -       if (ret == 0)
> > -               return 0;
> > +       kunmap(page);
> >  
> > -       return shmem_pread_slow(page, offset, length, user_data,
> > -                               page_do_bit17_swizzling, needs_clflush);
> > +       return ret ? -EFAULT : 0;
> >  }
> >  
> >  static int
> > @@ -1104,15 +1003,10 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
> >  {
> >         char __user *user_data;
> >         u64 remain;
> > -       unsigned int obj_do_bit17_swizzling;
> >         unsigned int needs_clflush;
> >         unsigned int idx, offset;
> >         int ret;
> >  
> > -       obj_do_bit17_swizzling = 0;
> > -       if (i915_gem_object_needs_bit17_swizzle(obj))
> > -               obj_do_bit17_swizzling = BIT(17);
> > -
> >         ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
> >         if (ret)
> >                 return ret;
> > @@ -1127,14 +1021,14 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
> >         offset = offset_in_page(args->offset);
> >         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
> >                 struct page *page = i915_gem_object_get_page(obj, idx);
> > -               int length;
> > +               unsigned int length;
> >  
> > -               length = remain;
> > -               if (offset + length > PAGE_SIZE)
> > +               if (remain > PAGE_SIZE - offset)
> >                         length = PAGE_SIZE - offset;
> > +               else
> > +                       length = remain;
> >  
> >                 ret = shmem_pread(page, offset, length, user_data,
> > -                                 page_to_phys(page) & obj_do_bit17_swizzling,
> >                                   needs_clflush);
> >                 if (ret)
> >                         break;
> > @@ -1475,33 +1369,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
> >         return ret;
> >  }
> >  
> > -static int
> > -shmem_pwrite_slow(struct page *page, int offset, int length,
> > -                 char __user *user_data,
> > -                 bool page_do_bit17_swizzling,
> > -                 bool needs_clflush_before,
> > -                 bool needs_clflush_after)
> > -{
> > -       char *vaddr;
> > -       int ret;
> > -
> > -       vaddr = kmap(page);
> > -       if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
> > -               shmem_clflush_swizzled_range(vaddr + offset, length,
> > -                                            page_do_bit17_swizzling);
> > -       if (page_do_bit17_swizzling)
> > -               ret = __copy_from_user_swizzled(vaddr, offset, user_data,
> > -                                               length);
> > -       else
> > -               ret = __copy_from_user(vaddr + offset, user_data, length);
> > -       if (needs_clflush_after)
> > -               shmem_clflush_swizzled_range(vaddr + offset, length,
> > -                                            page_do_bit17_swizzling);
> > -       kunmap(page);
> > -
> > -       return ret ? -EFAULT : 0;
> > -}
> > -
> >  /* Per-page copy function for the shmem pwrite fastpath.
> >   * Flushes invalid cachelines before writing to the target if
> >   * needs_clflush_before is set and flushes out any written cachelines after
> > @@ -1509,31 +1376,24 @@ shmem_pwrite_slow(struct page *page, int offset, int length,
> >   */
> >  static int
> >  shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
> > -            bool page_do_bit17_swizzling,
> >              bool needs_clflush_before,
> >              bool needs_clflush_after)
> >  {
> > +       char *vaddr;
> >         int ret;
> >  
> > -       ret = -ENODEV;
> > -       if (!page_do_bit17_swizzling) {
> > -               char *vaddr = kmap_atomic(page);
> > +       vaddr = kmap(page);
> >  
> > -               if (needs_clflush_before)
> > -                       drm_clflush_virt_range(vaddr + offset, len);
> > -               ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
> > -               if (needs_clflush_after)
> > -                       drm_clflush_virt_range(vaddr + offset, len);
> > +       if (needs_clflush_before)
> > +               drm_clflush_virt_range(vaddr + offset, len);
> >  
> > -               kunmap_atomic(vaddr);
> > -       }
> > -       if (ret == 0)
> > -               return ret;
> > +       ret = __copy_from_user(vaddr + offset, user_data, len);
> > +       if (!ret && needs_clflush_after)
> > +               drm_clflush_virt_range(vaddr + offset, len);
> > +
> > +       kunmap(page);
> >  
> > -       return shmem_pwrite_slow(page, offset, len, user_data,
> > -                                page_do_bit17_swizzling,
> > -                                needs_clflush_before,
> > -                                needs_clflush_after);
> > +       return ret ? -EFAULT : 0;
> >  }
> >  
> >  static int
> > @@ -1543,7 +1403,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
> >         struct drm_i915_private *i915 = to_i915(obj->base.dev);
> >         void __user *user_data;
> >         u64 remain;
> > -       unsigned int obj_do_bit17_swizzling;
> >         unsigned int partial_cacheline_write;
> >         unsigned int needs_clflush;
> >         unsigned int offset, idx;
> > @@ -1558,10 +1417,6 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
> >         if (ret)
> >                 return ret;
> >  
> > -       obj_do_bit17_swizzling = 0;
> > -       if (i915_gem_object_needs_bit17_swizzle(obj))
> > -               obj_do_bit17_swizzling = BIT(17);
> > -
> >         /* If we don't overwrite a cacheline completely we need to be
> >          * careful to have up-to-date data by first clflushing. Don't
> >          * overcomplicate things and flush the entire patch.
> > @@ -1575,14 +1430,14 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
> >         offset = offset_in_page(args->offset);
> >         for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
> >                 struct page *page = i915_gem_object_get_page(obj, idx);
> > -               int length;
> > +               unsigned int length;
> >  
> > -               length = remain;
> > -               if (offset + length > PAGE_SIZE)
> > +               if (remain > PAGE_SIZE - offset)
> >                         length = PAGE_SIZE - offset;
> > +               else
> > +                       length = remain;
> >  
> >                 ret = shmem_pwrite(page, offset, length, user_data,
> > -                                  page_to_phys(page) & obj_do_bit17_swizzling,
> >                                    (offset | length) & partial_cacheline_write,
> >                                    needs_clflush & CLFLUSH_AFTER);
> >                 if (ret)
> 


[-- Attachment #1.2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2018-11-16 16:06 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-23  9:02 [PATCH] drm/i915: Remove partial attempt to swizzle on pread/pwrite Chris Wilson
2018-07-23  9:11 ` Chris Wilson
2018-07-23  9:49 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2018-07-23  9:50 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-07-23 10:12 ` ✗ Fi.CI.BAT: failure " Patchwork
2018-10-11 13:38 ` [PATCH] " Tvrtko Ursulin
2018-10-11 14:30   ` Chris Wilson
2018-10-11 14:44     ` Tvrtko Ursulin
2018-10-12 11:56 ` [PATCH v3] " Chris Wilson
2018-10-12 12:22   ` Tvrtko Ursulin
2018-11-16 15:02   ` Joonas Lahtinen
2018-11-16 15:59     ` Kenneth Graunke
2018-10-12 12:21 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Remove partial attempt to swizzle on pread/pwrite (rev2) Patchwork
2018-10-12 12:21 ` ✗ Fi.CI.SPARSE: " Patchwork
2018-10-12 12:46 ` ✗ Fi.CI.BAT: failure " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.