All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
@ 2020-01-15 20:52 ` Chris Wilson
  0 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2020-01-15 20:52 UTC (permalink / raw)
  To: dri-devel; +Cc: intel-gfx, David Laight

Since we may try and flush the cachelines associated with large buffers
(an 8K framebuffer is about 128MiB, even before we try HDR), this leads
to unacceptably long latencies (when using a voluntary CONFIG_PREEMPT).
If we call cond_resched() between each sg chunk, that it about every 128
pages, we have a natural break point in which to check if the process
needs to be rescheduled. Naturally, this means that drm_clflush_sg() can
only be called from process context -- which is true at the moment. The
other clflush routines remain usable from atomic context.

Even though flushing large objects takes a demonstrable amount to time
to flush all the cachelines, clflush is still preferred over a
system-wide wbinvd as the latter has unpredictable latencies affecting
the whole system not just the local task.

Reported-by: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: David Laight <David.Laight@ACULAB.COM>
---
 drivers/gpu/drm/drm_cache.c | 49 ++++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 03e01b000f7a..fbd2bb644544 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -112,23 +112,64 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
 }
 EXPORT_SYMBOL(drm_clflush_pages);
 
+static __always_inline struct sgt_iter {
+	struct scatterlist *sgp;
+	unsigned long pfn;
+	unsigned int curr;
+	unsigned int max;
+} __sgt_iter(struct scatterlist *sgl) {
+	struct sgt_iter s = { .sgp = sgl };
+
+	if (s.sgp) {
+		s.max = s.curr = s.sgp->offset;
+		s.max += s.sgp->length;
+		s.pfn = page_to_pfn(sg_page(s.sgp));
+	}
+
+	return s;
+}
+
+static inline struct scatterlist *__sg_next_resched(struct scatterlist *sg)
+{
+	if (sg_is_last(sg))
+		return NULL;
+
+	++sg;
+	if (unlikely(sg_is_chain(sg))) {
+		sg = sg_chain_ptr(sg);
+		cond_resched();
+	}
+	return sg;
+}
+
+#define for_each_sgt_page(__pp, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl);			\
+	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
+	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
+	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next_resched((__iter).sgp)), 0 : 0)
+
 /**
  * drm_clflush_sg - Flush dcache lines pointing to a scather-gather.
  * @st: struct sg_table.
  *
  * Flush every data cache line entry that points to an address in the
- * sg.
+ * sg. This may schedule between scatterlist chunks, in order to keep
+ * the system preemption-latency down for large buffers.
  */
 void
 drm_clflush_sg(struct sg_table *st)
 {
+	might_sleep();
+
 #if defined(CONFIG_X86)
 	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		struct sg_page_iter sg_iter;
+		struct sgt_iter sg_iter;
+		struct page *page;
 
 		mb(); /*CLFLUSH is ordered only by using memory barriers*/
-		for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
-			drm_clflush_page(sg_page_iter_page(&sg_iter));
+		for_each_sgt_page(page, sg_iter, st)
+			drm_clflush_page(page);
 		mb(); /*Make sure that all cache line entry is flushed*/
 
 		return;
-- 
2.25.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Intel-gfx] [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
@ 2020-01-15 20:52 ` Chris Wilson
  0 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2020-01-15 20:52 UTC (permalink / raw)
  To: dri-devel; +Cc: intel-gfx, David Laight

Since we may try and flush the cachelines associated with large buffers
(an 8K framebuffer is about 128MiB, even before we try HDR), this leads
to unacceptably long latencies (when using a voluntary CONFIG_PREEMPT).
If we call cond_resched() between each sg chunk, that it about every 128
pages, we have a natural break point in which to check if the process
needs to be rescheduled. Naturally, this means that drm_clflush_sg() can
only be called from process context -- which is true at the moment. The
other clflush routines remain usable from atomic context.

Even though flushing large objects takes a demonstrable amount to time
to flush all the cachelines, clflush is still preferred over a
system-wide wbinvd as the latter has unpredictable latencies affecting
the whole system not just the local task.

Reported-by: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: David Laight <David.Laight@ACULAB.COM>
---
 drivers/gpu/drm/drm_cache.c | 49 ++++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 03e01b000f7a..fbd2bb644544 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -112,23 +112,64 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
 }
 EXPORT_SYMBOL(drm_clflush_pages);
 
+static __always_inline struct sgt_iter {
+	struct scatterlist *sgp;
+	unsigned long pfn;
+	unsigned int curr;
+	unsigned int max;
+} __sgt_iter(struct scatterlist *sgl) {
+	struct sgt_iter s = { .sgp = sgl };
+
+	if (s.sgp) {
+		s.max = s.curr = s.sgp->offset;
+		s.max += s.sgp->length;
+		s.pfn = page_to_pfn(sg_page(s.sgp));
+	}
+
+	return s;
+}
+
+static inline struct scatterlist *__sg_next_resched(struct scatterlist *sg)
+{
+	if (sg_is_last(sg))
+		return NULL;
+
+	++sg;
+	if (unlikely(sg_is_chain(sg))) {
+		sg = sg_chain_ptr(sg);
+		cond_resched();
+	}
+	return sg;
+}
+
+#define for_each_sgt_page(__pp, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl);			\
+	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
+	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
+	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next_resched((__iter).sgp)), 0 : 0)
+
 /**
  * drm_clflush_sg - Flush dcache lines pointing to a scather-gather.
  * @st: struct sg_table.
  *
  * Flush every data cache line entry that points to an address in the
- * sg.
+ * sg. This may schedule between scatterlist chunks, in order to keep
+ * the system preemption-latency down for large buffers.
  */
 void
 drm_clflush_sg(struct sg_table *st)
 {
+	might_sleep();
+
 #if defined(CONFIG_X86)
 	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		struct sg_page_iter sg_iter;
+		struct sgt_iter sg_iter;
+		struct page *page;
 
 		mb(); /*CLFLUSH is ordered only by using memory barriers*/
-		for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
-			drm_clflush_page(sg_page_iter_page(&sg_iter));
+		for_each_sgt_page(page, sg_iter, st)
+			drm_clflush_page(page);
 		mb(); /*Make sure that all cache line entry is flushed*/
 
 		return;
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-15 20:52 ` [Intel-gfx] " Chris Wilson
  (?)
@ 2020-01-15 21:28 ` Patchwork
  -1 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2020-01-15 21:28 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm: Inject a cond_resched() into long drm_clflush_sg()
URL   : https://patchwork.freedesktop.org/series/72085/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
f1c4f0dc10f6 drm: Inject a cond_resched() into long drm_clflush_sg()
-:41: CHECK:MULTIPLE_ASSIGNMENTS: multiple assignments should be avoided
#41: FILE: drivers/gpu/drm/drm_cache.c:124:
+		s.max = s.curr = s.sgp->offset;

-:62: CHECK:MACRO_ARG_REUSE: Macro argument reuse '__iter' - possible side-effects?
#62: FILE: drivers/gpu/drm/drm_cache.c:145:
+#define for_each_sgt_page(__pp, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl);			\
+	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
+	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
+	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next_resched((__iter).sgp)), 0 : 0)

total: 0 errors, 0 warnings, 2 checks, 68 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-15 20:52 ` [Intel-gfx] " Chris Wilson
  (?)
  (?)
@ 2020-01-15 21:57 ` Patchwork
  -1 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2020-01-15 21:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm: Inject a cond_resched() into long drm_clflush_sg()
URL   : https://patchwork.freedesktop.org/series/72085/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7751 -> Patchwork_16118
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/index.html

Known issues
------------

  Here are the changes found in Patchwork_16118 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_parallel@basic:
    - fi-tgl-y:           [PASS][1] -> [DMESG-WARN][2] ([i915#402]) +2 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-tgl-y/igt@gem_exec_parallel@basic.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-tgl-y/igt@gem_exec_parallel@basic.html

  * igt@gem_exec_suspend@basic-s3:
    - fi-cml-s:           [PASS][3] -> [DMESG-WARN][4] ([fdo#111764])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-cml-s/igt@gem_exec_suspend@basic-s3.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-cml-s/igt@gem_exec_suspend@basic-s3.html

  * igt@kms_chamelium@common-hpd-after-suspend:
    - fi-skl-6700k2:      [PASS][5] -> [INCOMPLETE][6] ([i915#69])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-skl-6700k2/igt@kms_chamelium@common-hpd-after-suspend.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-skl-6700k2/igt@kms_chamelium@common-hpd-after-suspend.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-icl-u2:          [PASS][7] -> [FAIL][8] ([i915#217])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-icl-u2/igt@kms_chamelium@hdmi-hpd-fast.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-icl-u2/igt@kms_chamelium@hdmi-hpd-fast.html

  
#### Possible fixes ####

  * igt@gem_close_race@basic-threads:
    - fi-byt-j1900:       [TIMEOUT][9] ([fdo#112271] / [i915#816]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-byt-j1900/igt@gem_close_race@basic-threads.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-byt-j1900/igt@gem_close_race@basic-threads.html

  * igt@gem_exec_gttfill@basic:
    - {fi-ehl-1}:         [INCOMPLETE][11] ([i915#937]) -> [PASS][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-ehl-1/igt@gem_exec_gttfill@basic.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-ehl-1/igt@gem_exec_gttfill@basic.html

  * igt@i915_getparams_basic@basic-subslice-total:
    - fi-tgl-y:           [DMESG-WARN][13] ([i915#402]) -> [PASS][14] +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-tgl-y/igt@i915_getparams_basic@basic-subslice-total.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-tgl-y/igt@i915_getparams_basic@basic-subslice-total.html

  * igt@i915_module_load@reload-with-fault-injection:
    - fi-cfl-8700k:       [DMESG-WARN][15] ([i915#889]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-cfl-8700k/igt@i915_module_load@reload-with-fault-injection.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-cfl-8700k/igt@i915_module_load@reload-with-fault-injection.html
    - fi-skl-6770hq:      [DMESG-WARN][17] ([i915#889]) -> [PASS][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-skl-6770hq/igt@i915_module_load@reload-with-fault-injection.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-skl-6770hq/igt@i915_module_load@reload-with-fault-injection.html
    - fi-skl-6600u:       [DMESG-WARN][19] ([i915#889]) -> [PASS][20]
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-skl-6600u/igt@i915_module_load@reload-with-fault-injection.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-skl-6600u/igt@i915_module_load@reload-with-fault-injection.html

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-6770hq:      [INCOMPLETE][21] ([i915#151]) -> [PASS][22]
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html

  * igt@i915_selftest@live_execlists:
    - fi-kbl-soraka:      [DMESG-FAIL][23] ([i915#656]) -> [PASS][24]
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-kbl-soraka/igt@i915_selftest@live_execlists.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-kbl-soraka/igt@i915_selftest@live_execlists.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][25] ([fdo#111096] / [i915#323]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#111764]: https://bugs.freedesktop.org/show_bug.cgi?id=111764
  [fdo#112271]: https://bugs.freedesktop.org/show_bug.cgi?id=112271
  [i915#151]: https://gitlab.freedesktop.org/drm/intel/issues/151
  [i915#217]: https://gitlab.freedesktop.org/drm/intel/issues/217
  [i915#323]: https://gitlab.freedesktop.org/drm/intel/issues/323
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
  [i915#656]: https://gitlab.freedesktop.org/drm/intel/issues/656
  [i915#69]: https://gitlab.freedesktop.org/drm/intel/issues/69
  [i915#816]: https://gitlab.freedesktop.org/drm/intel/issues/816
  [i915#889]: https://gitlab.freedesktop.org/drm/intel/issues/889
  [i915#937]: https://gitlab.freedesktop.org/drm/intel/issues/937


Participating hosts (46 -> 45)
------------------------------

  Additional (4): fi-hsw-4770r fi-blb-e6850 fi-byt-n2820 fi-snb-2520m 
  Missing    (5): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-bsw-kefka fi-byt-clapper 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7751 -> Patchwork_16118

  CI-20190529: 20190529
  CI_DRM_7751: bffb5bf41a2e3d84ee5043dcccad49578656a012 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5367: 94af6de4f07487b93c4f5008f3ed04b5fc045200 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_16118: f1c4f0dc10f6c419538bac61eeae8002e3491cf4 @ git://anongit.freedesktop.org/gfx-ci/linux


== Kernel 32bit build ==

Warning: Kernel 32bit buildtest failed:
https://intel-gfx-ci.01.org/Patchwork_16118/build_32bit.log

  CALL    scripts/checksyscalls.sh
  CALL    scripts/atomic/check-atomics.sh
  CHK     include/generated/compile.h
Kernel: arch/x86/boot/bzImage is ready  (#1)
  Building modules, stage 2.
  MODPOST 122 modules
ERROR: "__udivdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
scripts/Makefile.modpost:93: recipe for target '__modpost' failed
make[1]: *** [__modpost] Error 1
Makefile:1282: recipe for target 'modules' failed
make: *** [modules] Error 2


== Linux commits ==

f1c4f0dc10f6 drm: Inject a cond_resched() into long drm_clflush_sg()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BUILD: warning for drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-15 20:52 ` [Intel-gfx] " Chris Wilson
                   ` (2 preceding siblings ...)
  (?)
@ 2020-01-15 21:57 ` Patchwork
  -1 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2020-01-15 21:57 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm: Inject a cond_resched() into long drm_clflush_sg()
URL   : https://patchwork.freedesktop.org/series/72085/
State : warning

== Summary ==

CALL    scripts/checksyscalls.sh
  CALL    scripts/atomic/check-atomics.sh
  CHK     include/generated/compile.h
Kernel: arch/x86/boot/bzImage is ready  (#1)
  Building modules, stage 2.
  MODPOST 122 modules
ERROR: "__udivdi3" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined!
scripts/Makefile.modpost:93: recipe for target '__modpost' failed
make[1]: *** [__modpost] Error 1
Makefile:1282: recipe for target 'modules' failed
make: *** [modules] Error 2

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/build_32bit.log
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-15 20:52 ` [Intel-gfx] " Chris Wilson
@ 2020-01-16  6:52   ` Daniel Vetter
  -1 siblings, 0 replies; 24+ messages in thread
From: Daniel Vetter @ 2020-01-16  6:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, David Laight, dri-devel

On Wed, Jan 15, 2020 at 08:52:45PM +0000, Chris Wilson wrote:
> Since we may try and flush the cachelines associated with large buffers
> (an 8K framebuffer is about 128MiB, even before we try HDR), this leads
> to unacceptably long latencies (when using a voluntary CONFIG_PREEMPT).
> If we call cond_resched() between each sg chunk, that it about every 128
> pages, we have a natural break point in which to check if the process
> needs to be rescheduled. Naturally, this means that drm_clflush_sg() can
> only be called from process context -- which is true at the moment. The
> other clflush routines remain usable from atomic context.
> 
> Even though flushing large objects takes a demonstrable amount to time
> to flush all the cachelines, clflush is still preferred over a
> system-wide wbinvd as the latter has unpredictable latencies affecting
> the whole system not just the local task.
> 
> Reported-by: David Laight <David.Laight@ACULAB.COM>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: David Laight <David.Laight@ACULAB.COM>

The original bug report is complaining about latencies for SCHED_RT
threads, on a system that doesn't even use CONFIG_PREEMPT. I'm not sure
it's terribly valid to cater to that use-case - all the desktop distros
seem a lot more reasonable. So firmly *shrug* from my side ...

Patch itself looks correct, just not seeing the point.
-Daniel


> ---
>  drivers/gpu/drm/drm_cache.c | 49 ++++++++++++++++++++++++++++++++++---
>  1 file changed, 45 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
> index 03e01b000f7a..fbd2bb644544 100644
> --- a/drivers/gpu/drm/drm_cache.c
> +++ b/drivers/gpu/drm/drm_cache.c
> @@ -112,23 +112,64 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
>  }
>  EXPORT_SYMBOL(drm_clflush_pages);
>  
> +static __always_inline struct sgt_iter {
> +	struct scatterlist *sgp;
> +	unsigned long pfn;
> +	unsigned int curr;
> +	unsigned int max;
> +} __sgt_iter(struct scatterlist *sgl) {
> +	struct sgt_iter s = { .sgp = sgl };
> +
> +	if (s.sgp) {
> +		s.max = s.curr = s.sgp->offset;
> +		s.max += s.sgp->length;
> +		s.pfn = page_to_pfn(sg_page(s.sgp));
> +	}
> +
> +	return s;
> +}
> +
> +static inline struct scatterlist *__sg_next_resched(struct scatterlist *sg)
> +{
> +	if (sg_is_last(sg))
> +		return NULL;
> +
> +	++sg;
> +	if (unlikely(sg_is_chain(sg))) {
> +		sg = sg_chain_ptr(sg);
> +		cond_resched();
> +	}
> +	return sg;
> +}
> +
> +#define for_each_sgt_page(__pp, __iter, __sgt)				\
> +	for ((__iter) = __sgt_iter((__sgt)->sgl);			\
> +	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
> +	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
> +	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
> +	     (__iter) = __sgt_iter(__sg_next_resched((__iter).sgp)), 0 : 0)
> +
>  /**
>   * drm_clflush_sg - Flush dcache lines pointing to a scather-gather.
>   * @st: struct sg_table.
>   *
>   * Flush every data cache line entry that points to an address in the
> - * sg.
> + * sg. This may schedule between scatterlist chunks, in order to keep
> + * the system preemption-latency down for large buffers.
>   */
>  void
>  drm_clflush_sg(struct sg_table *st)
>  {
> +	might_sleep();
> +
>  #if defined(CONFIG_X86)
>  	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
> -		struct sg_page_iter sg_iter;
> +		struct sgt_iter sg_iter;
> +		struct page *page;
>  
>  		mb(); /*CLFLUSH is ordered only by using memory barriers*/
> -		for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
> -			drm_clflush_page(sg_page_iter_page(&sg_iter));
> +		for_each_sgt_page(page, sg_iter, st)
> +			drm_clflush_page(page);
>  		mb(); /*Make sure that all cache line entry is flushed*/
>  
>  		return;
> -- 
> 2.25.0
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Intel-gfx] [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
@ 2020-01-16  6:52   ` Daniel Vetter
  0 siblings, 0 replies; 24+ messages in thread
From: Daniel Vetter @ 2020-01-16  6:52 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, David Laight, dri-devel

On Wed, Jan 15, 2020 at 08:52:45PM +0000, Chris Wilson wrote:
> Since we may try and flush the cachelines associated with large buffers
> (an 8K framebuffer is about 128MiB, even before we try HDR), this leads
> to unacceptably long latencies (when using a voluntary CONFIG_PREEMPT).
> If we call cond_resched() between each sg chunk, that it about every 128
> pages, we have a natural break point in which to check if the process
> needs to be rescheduled. Naturally, this means that drm_clflush_sg() can
> only be called from process context -- which is true at the moment. The
> other clflush routines remain usable from atomic context.
> 
> Even though flushing large objects takes a demonstrable amount to time
> to flush all the cachelines, clflush is still preferred over a
> system-wide wbinvd as the latter has unpredictable latencies affecting
> the whole system not just the local task.
> 
> Reported-by: David Laight <David.Laight@ACULAB.COM>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: David Laight <David.Laight@ACULAB.COM>

The original bug report is complaining about latencies for SCHED_RT
threads, on a system that doesn't even use CONFIG_PREEMPT. I'm not sure
it's terribly valid to cater to that use-case - all the desktop distros
seem a lot more reasonable. So firmly *shrug* from my side ...

Patch itself looks correct, just not seeing the point.
-Daniel


> ---
>  drivers/gpu/drm/drm_cache.c | 49 ++++++++++++++++++++++++++++++++++---
>  1 file changed, 45 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
> index 03e01b000f7a..fbd2bb644544 100644
> --- a/drivers/gpu/drm/drm_cache.c
> +++ b/drivers/gpu/drm/drm_cache.c
> @@ -112,23 +112,64 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
>  }
>  EXPORT_SYMBOL(drm_clflush_pages);
>  
> +static __always_inline struct sgt_iter {
> +	struct scatterlist *sgp;
> +	unsigned long pfn;
> +	unsigned int curr;
> +	unsigned int max;
> +} __sgt_iter(struct scatterlist *sgl) {
> +	struct sgt_iter s = { .sgp = sgl };
> +
> +	if (s.sgp) {
> +		s.max = s.curr = s.sgp->offset;
> +		s.max += s.sgp->length;
> +		s.pfn = page_to_pfn(sg_page(s.sgp));
> +	}
> +
> +	return s;
> +}
> +
> +static inline struct scatterlist *__sg_next_resched(struct scatterlist *sg)
> +{
> +	if (sg_is_last(sg))
> +		return NULL;
> +
> +	++sg;
> +	if (unlikely(sg_is_chain(sg))) {
> +		sg = sg_chain_ptr(sg);
> +		cond_resched();
> +	}
> +	return sg;
> +}
> +
> +#define for_each_sgt_page(__pp, __iter, __sgt)				\
> +	for ((__iter) = __sgt_iter((__sgt)->sgl);			\
> +	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
> +	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
> +	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
> +	     (__iter) = __sgt_iter(__sg_next_resched((__iter).sgp)), 0 : 0)
> +
>  /**
>   * drm_clflush_sg - Flush dcache lines pointing to a scather-gather.
>   * @st: struct sg_table.
>   *
>   * Flush every data cache line entry that points to an address in the
> - * sg.
> + * sg. This may schedule between scatterlist chunks, in order to keep
> + * the system preemption-latency down for large buffers.
>   */
>  void
>  drm_clflush_sg(struct sg_table *st)
>  {
> +	might_sleep();
> +
>  #if defined(CONFIG_X86)
>  	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
> -		struct sg_page_iter sg_iter;
> +		struct sgt_iter sg_iter;
> +		struct page *page;
>  
>  		mb(); /*CLFLUSH is ordered only by using memory barriers*/
> -		for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
> -			drm_clflush_page(sg_page_iter_page(&sg_iter));
> +		for_each_sgt_page(page, sg_iter, st)
> +			drm_clflush_page(page);
>  		mb(); /*Make sure that all cache line entry is flushed*/
>  
>  		return;
> -- 
> 2.25.0
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-16  6:52   ` [Intel-gfx] " Daniel Vetter
@ 2020-01-16  7:40     ` Chris Wilson
  -1 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2020-01-16  7:40 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, David Laight, dri-devel

Quoting Daniel Vetter (2020-01-16 06:52:42)
> On Wed, Jan 15, 2020 at 08:52:45PM +0000, Chris Wilson wrote:
> > Since we may try and flush the cachelines associated with large buffers
> > (an 8K framebuffer is about 128MiB, even before we try HDR), this leads
> > to unacceptably long latencies (when using a voluntary CONFIG_PREEMPT).
> > If we call cond_resched() between each sg chunk, that it about every 128
> > pages, we have a natural break point in which to check if the process
> > needs to be rescheduled. Naturally, this means that drm_clflush_sg() can
> > only be called from process context -- which is true at the moment. The
> > other clflush routines remain usable from atomic context.
> > 
> > Even though flushing large objects takes a demonstrable amount to time
> > to flush all the cachelines, clflush is still preferred over a
> > system-wide wbinvd as the latter has unpredictable latencies affecting
> > the whole system not just the local task.
> > 
> > Reported-by: David Laight <David.Laight@ACULAB.COM>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: David Laight <David.Laight@ACULAB.COM>
> 
> The original bug report is complaining about latencies for SCHED_RT
> threads, on a system that doesn't even use CONFIG_PREEMPT. I'm not sure
> it's terribly valid to cater to that use-case - all the desktop distros
> seem a lot more reasonable. So firmly *shrug* from my side ...

Yeah, I had the same immediate response to the complaint), but otoh we've
inserted cond_resched() before when it looks like may consume entire
jiffies inside a loop. At the very minimum, we should have a
might_sleep() here and a reminder that this can be very slow (remember
byt?).
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Intel-gfx] [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
@ 2020-01-16  7:40     ` Chris Wilson
  0 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2020-01-16  7:40 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, David Laight, dri-devel

Quoting Daniel Vetter (2020-01-16 06:52:42)
> On Wed, Jan 15, 2020 at 08:52:45PM +0000, Chris Wilson wrote:
> > Since we may try and flush the cachelines associated with large buffers
> > (an 8K framebuffer is about 128MiB, even before we try HDR), this leads
> > to unacceptably long latencies (when using a voluntary CONFIG_PREEMPT).
> > If we call cond_resched() between each sg chunk, that it about every 128
> > pages, we have a natural break point in which to check if the process
> > needs to be rescheduled. Naturally, this means that drm_clflush_sg() can
> > only be called from process context -- which is true at the moment. The
> > other clflush routines remain usable from atomic context.
> > 
> > Even though flushing large objects takes a demonstrable amount to time
> > to flush all the cachelines, clflush is still preferred over a
> > system-wide wbinvd as the latter has unpredictable latencies affecting
> > the whole system not just the local task.
> > 
> > Reported-by: David Laight <David.Laight@ACULAB.COM>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: David Laight <David.Laight@ACULAB.COM>
> 
> The original bug report is complaining about latencies for SCHED_RT
> threads, on a system that doesn't even use CONFIG_PREEMPT. I'm not sure
> it's terribly valid to cater to that use-case - all the desktop distros
> seem a lot more reasonable. So firmly *shrug* from my side ...

Yeah, I had the same immediate response to the complaint), but otoh we've
inserted cond_resched() before when it looks like may consume entire
jiffies inside a loop. At the very minimum, we should have a
might_sleep() here and a reminder that this can be very slow (remember
byt?).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-16  7:40     ` [Intel-gfx] " Chris Wilson
@ 2020-01-16 12:26       ` David Laight
  -1 siblings, 0 replies; 24+ messages in thread
From: David Laight @ 2020-01-16 12:26 UTC (permalink / raw)
  To: 'Chris Wilson', Daniel Vetter; +Cc: intel-gfx, dri-devel

From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: 16 January 2020 07:40
> Quoting Daniel Vetter (2020-01-16 06:52:42)
> > On Wed, Jan 15, 2020 at 08:52:45PM +0000, Chris Wilson wrote:
> > > Since we may try and flush the cachelines associated with large buffers
> > > (an 8K framebuffer is about 128MiB, even before we try HDR), this leads
> > > to unacceptably long latencies (when using a voluntary CONFIG_PREEMPT).
> > > If we call cond_resched() between each sg chunk, that it about every 128
> > > pages, we have a natural break point in which to check if the process
> > > needs to be rescheduled. Naturally, this means that drm_clflush_sg() can
> > > only be called from process context -- which is true at the moment. The
> > > other clflush routines remain usable from atomic context.
> > >
> > > Even though flushing large objects takes a demonstrable amount to time
> > > to flush all the cachelines, clflush is still preferred over a
> > > system-wide wbinvd as the latter has unpredictable latencies affecting
> > > the whole system not just the local task.
> > >
> > > Reported-by: David Laight <David.Laight@ACULAB.COM>
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: David Laight <David.Laight@ACULAB.COM>
> >
> > The original bug report is complaining about latencies for SCHED_RT
> > threads, on a system that doesn't even use CONFIG_PREEMPT. I'm not sure
> > it's terribly valid to cater to that use-case - all the desktop distros
> > seem a lot more reasonable. So firmly *shrug* from my side ...
> 
> Yeah, I had the same immediate response to the complaint), but otoh we've
> inserted cond_resched() before when it looks like may consume entire
> jiffies inside a loop. At the very minimum, we should have a
> might_sleep() here and a reminder that this can be very slow (remember
> byt?).

I'm using RT to get more deterministic scheduling to look for long
scheduling delays, rather than because we need very tight scheduling.
Delays of several 100us aren't a real problem.

The problem with CONFIG_PREEMPT is that the distros don't
enable it and it isn't a command line option.
So it is really useless unless you are able/willing to build your
own kernel.

I could run the code under the normal scheduler with 'nice -19'.
I stlll wouldn't expect to have all but one cpu idle when I've just
done a cv_broadcast() to wake up a lot of threads.

I've added 'if (!(++i & 31)) cond_resched();' after the drm_clfulsh_page()
call in drm_cflush_sg().
In my case that it 3600/32 reschedules in 3.3ms - plenty.

However there is a call from __i915_gem_objet_set_pages() that
is preceded by a lockdep_assert_held() check - so mustn't sleep.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Intel-gfx] [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
@ 2020-01-16 12:26       ` David Laight
  0 siblings, 0 replies; 24+ messages in thread
From: David Laight @ 2020-01-16 12:26 UTC (permalink / raw)
  To: 'Chris Wilson', Daniel Vetter; +Cc: intel-gfx, dri-devel

From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: 16 January 2020 07:40
> Quoting Daniel Vetter (2020-01-16 06:52:42)
> > On Wed, Jan 15, 2020 at 08:52:45PM +0000, Chris Wilson wrote:
> > > Since we may try and flush the cachelines associated with large buffers
> > > (an 8K framebuffer is about 128MiB, even before we try HDR), this leads
> > > to unacceptably long latencies (when using a voluntary CONFIG_PREEMPT).
> > > If we call cond_resched() between each sg chunk, that it about every 128
> > > pages, we have a natural break point in which to check if the process
> > > needs to be rescheduled. Naturally, this means that drm_clflush_sg() can
> > > only be called from process context -- which is true at the moment. The
> > > other clflush routines remain usable from atomic context.
> > >
> > > Even though flushing large objects takes a demonstrable amount to time
> > > to flush all the cachelines, clflush is still preferred over a
> > > system-wide wbinvd as the latter has unpredictable latencies affecting
> > > the whole system not just the local task.
> > >
> > > Reported-by: David Laight <David.Laight@ACULAB.COM>
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: David Laight <David.Laight@ACULAB.COM>
> >
> > The original bug report is complaining about latencies for SCHED_RT
> > threads, on a system that doesn't even use CONFIG_PREEMPT. I'm not sure
> > it's terribly valid to cater to that use-case - all the desktop distros
> > seem a lot more reasonable. So firmly *shrug* from my side ...
> 
> Yeah, I had the same immediate response to the complaint), but otoh we've
> inserted cond_resched() before when it looks like may consume entire
> jiffies inside a loop. At the very minimum, we should have a
> might_sleep() here and a reminder that this can be very slow (remember
> byt?).

I'm using RT to get more deterministic scheduling to look for long
scheduling delays, rather than because we need very tight scheduling.
Delays of several 100us aren't a real problem.

The problem with CONFIG_PREEMPT is that the distros don't
enable it and it isn't a command line option.
So it is really useless unless you are able/willing to build your
own kernel.

I could run the code under the normal scheduler with 'nice -19'.
I stlll wouldn't expect to have all but one cpu idle when I've just
done a cv_broadcast() to wake up a lot of threads.

I've added 'if (!(++i & 31)) cond_resched();' after the drm_clfulsh_page()
call in drm_cflush_sg().
In my case that it 3600/32 reschedules in 3.3ms - plenty.

However there is a call from __i915_gem_objet_set_pages() that
is preceded by a lockdep_assert_held() check - so mustn't sleep.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-16 12:26       ` [Intel-gfx] " David Laight
@ 2020-01-16 12:28         ` Chris Wilson
  -1 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2020-01-16 12:28 UTC (permalink / raw)
  To: Daniel Vetter, David Laight; +Cc: intel-gfx, dri-devel

Quoting David Laight (2020-01-16 12:26:45)
> However there is a call from __i915_gem_objet_set_pages() that
> is preceded by a lockdep_assert_held() check - so mustn't sleep.

That is a mutex; it's allowed to sleep. The might_sleep() here should
help assuage your fears.
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Intel-gfx] [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
@ 2020-01-16 12:28         ` Chris Wilson
  0 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2020-01-16 12:28 UTC (permalink / raw)
  To: Daniel Vetter, David Laight; +Cc: intel-gfx, dri-devel

Quoting David Laight (2020-01-16 12:26:45)
> However there is a call from __i915_gem_objet_set_pages() that
> is preceded by a lockdep_assert_held() check - so mustn't sleep.

That is a mutex; it's allowed to sleep. The might_sleep() here should
help assuage your fears.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-16 12:28         ` [Intel-gfx] " Chris Wilson
@ 2020-01-16 13:58           ` David Laight
  -1 siblings, 0 replies; 24+ messages in thread
From: David Laight @ 2020-01-16 13:58 UTC (permalink / raw)
  To: 'Chris Wilson', Daniel Vetter; +Cc: intel-gfx, dri-devel

From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: 16 January 2020 12:29
> 
> Quoting David Laight (2020-01-16 12:26:45)
> > However there is a call from __i915_gem_objet_set_pages() that
> > is preceded by a lockdep_assert_held() check - so mustn't sleep.
> 
> That is a mutex; it's allowed to sleep. The might_sleep() here should
> help assuage your fears.

Gah. Having a mutex called mm.lock isn't entirely obvious when quickly
reading code.

From what I was reading earlier it seems that clflush() (and cflushopt)
are fast (well not stupidly slow) for buffers under 4k.
I suspect then can do a 'mark pending', but for larger buffers have to
wait for earlier requests to complete (although the graph carries on
increasing to the 16k RH margin.

If may well be that adding a cond_resched() after every 4k page
will have ~0 performance impact because the first few clflush
will be a bit faster (less slow).

I'll do some measurements later this afternoon.

FWIW does this code need to actually invalidate the cache lines?
Or is it just forcing a writeback?

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Intel-gfx] [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
@ 2020-01-16 13:58           ` David Laight
  0 siblings, 0 replies; 24+ messages in thread
From: David Laight @ 2020-01-16 13:58 UTC (permalink / raw)
  To: 'Chris Wilson', Daniel Vetter; +Cc: intel-gfx, dri-devel

From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: 16 January 2020 12:29
> 
> Quoting David Laight (2020-01-16 12:26:45)
> > However there is a call from __i915_gem_objet_set_pages() that
> > is preceded by a lockdep_assert_held() check - so mustn't sleep.
> 
> That is a mutex; it's allowed to sleep. The might_sleep() here should
> help assuage your fears.

Gah. Having a mutex called mm.lock isn't entirely obvious when quickly
reading code.

From what I was reading earlier it seems that clflush() (and cflushopt)
are fast (well not stupidly slow) for buffers under 4k.
I suspect then can do a 'mark pending', but for larger buffers have to
wait for earlier requests to complete (although the graph carries on
increasing to the 16k RH margin.

If may well be that adding a cond_resched() after every 4k page
will have ~0 performance impact because the first few clflush
will be a bit faster (less slow).

I'll do some measurements later this afternoon.

FWIW does this code need to actually invalidate the cache lines?
Or is it just forcing a writeback?

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-16 13:58           ` [Intel-gfx] " David Laight
@ 2020-01-16 14:01             ` Chris Wilson
  -1 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2020-01-16 14:01 UTC (permalink / raw)
  To: Daniel Vetter, David Laight; +Cc: intel-gfx, dri-devel

Quoting David Laight (2020-01-16 13:58:44)
> From: Chris Wilson <chris@chris-wilson.co.uk>
> > Sent: 16 January 2020 12:29
> > 
> > Quoting David Laight (2020-01-16 12:26:45)
> > > However there is a call from __i915_gem_objet_set_pages() that
> > > is preceded by a lockdep_assert_held() check - so mustn't sleep.
> > 
> > That is a mutex; it's allowed to sleep. The might_sleep() here should
> > help assuage your fears.
> 
> Gah. Having a mutex called mm.lock isn't entirely obvious when quickly
> reading code.
> 
> From what I was reading earlier it seems that clflush() (and cflushopt)
> are fast (well not stupidly slow) for buffers under 4k.
> I suspect then can do a 'mark pending', but for larger buffers have to
> wait for earlier requests to complete (although the graph carries on
> increasing to the 16k RH margin.
> 
> If may well be that adding a cond_resched() after every 4k page
> will have ~0 performance impact because the first few clflush
> will be a bit faster (less slow).
> 
> I'll do some measurements later this afternoon.
> 
> FWIW does this code need to actually invalidate the cache lines?
> Or is it just forcing a writeback?

It is used for both.
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Intel-gfx] [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
@ 2020-01-16 14:01             ` Chris Wilson
  0 siblings, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2020-01-16 14:01 UTC (permalink / raw)
  To: Daniel Vetter, David Laight; +Cc: intel-gfx, dri-devel

Quoting David Laight (2020-01-16 13:58:44)
> From: Chris Wilson <chris@chris-wilson.co.uk>
> > Sent: 16 January 2020 12:29
> > 
> > Quoting David Laight (2020-01-16 12:26:45)
> > > However there is a call from __i915_gem_objet_set_pages() that
> > > is preceded by a lockdep_assert_held() check - so mustn't sleep.
> > 
> > That is a mutex; it's allowed to sleep. The might_sleep() here should
> > help assuage your fears.
> 
> Gah. Having a mutex called mm.lock isn't entirely obvious when quickly
> reading code.
> 
> From what I was reading earlier it seems that clflush() (and cflushopt)
> are fast (well not stupidly slow) for buffers under 4k.
> I suspect then can do a 'mark pending', but for larger buffers have to
> wait for earlier requests to complete (although the graph carries on
> increasing to the 16k RH margin.
> 
> If may well be that adding a cond_resched() after every 4k page
> will have ~0 performance impact because the first few clflush
> will be a bit faster (less slow).
> 
> I'll do some measurements later this afternoon.
> 
> FWIW does this code need to actually invalidate the cache lines?
> Or is it just forcing a writeback?

It is used for both.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-16 13:58           ` [Intel-gfx] " David Laight
@ 2020-01-16 14:40             ` David Laight
  -1 siblings, 0 replies; 24+ messages in thread
From: David Laight @ 2020-01-16 14:40 UTC (permalink / raw)
  To: 'Chris Wilson', 'Daniel Vetter'
  Cc: 'intel-gfx@lists.freedesktop.org',
	'dri-devel@lists.freedesktop.org'

> I'll do some measurements later this afternoon.

This is an Ivy bridge cpu, so clflush (not clflushopt).
With a cond_resched for every page I get:
(Note these calls are every 10 seconds....)

# tracer: function_graph
#
# CPU  DURATION                  FUNCTION CALLS
# |     |   |                     |   |   |   |
 0) # 3155.126 us |  drm_clflush_sg [drm]();
 1) # 3067.382 us |  drm_clflush_sg [drm]();
 2) # 3063.766 us |  drm_clflush_sg [drm]();
 3) # 3092.302 us |  drm_clflush_sg [drm]();
 0) # 3209.486 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 0)   kworker-7    =>  kworker-319
 ------------------------------------------

 0) # 3633.803 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 1)   kworker-7    =>  kworker-319
 ------------------------------------------

 1) # 3090.278 us |  drm_clflush_sg [drm]();
 2) # 3828.108 us |  drm_clflush_sg [drm]();
 3) # 3049.836 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 3)  kworker-319   =>   kworker-7
 ------------------------------------------

 3) # 3295.017 us |  drm_clflush_sg [drm]();
 3) # 3064.077 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 0)  kworker-319   =>   kworker-7
 ------------------------------------------

 0) # 3182.034 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 3)   kworker-7    =>  kworker-319
 ------------------------------------------

 3) # 3065.754 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 2)  kworker-319   =>   kworker-7
 ------------------------------------------

 2) # 3562.513 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 3)  kworker-319   =>   kworker-7
 ------------------------------------------

 3) # 3048.914 us |  drm_clflush_sg [drm]();
 3) # 3062.469 us |  drm_clflush_sg [drm]();
 3) # 3055.727 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 0)   kworker-7    =>  kworker-319
 ------------------------------------------

Without the cond_sched I suspect more of them are 3.0ms.
Not really a significant difference.
I guess the longer times are the scheduler looking for work?
I don't understand the extra traces - I'm guessing they are process switch related.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Intel-gfx] [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
@ 2020-01-16 14:40             ` David Laight
  0 siblings, 0 replies; 24+ messages in thread
From: David Laight @ 2020-01-16 14:40 UTC (permalink / raw)
  To: 'Chris Wilson', 'Daniel Vetter'
  Cc: 'intel-gfx@lists.freedesktop.org',
	'dri-devel@lists.freedesktop.org'

> I'll do some measurements later this afternoon.

This is an Ivy bridge cpu, so clflush (not clflushopt).
With a cond_resched for every page I get:
(Note these calls are every 10 seconds....)

# tracer: function_graph
#
# CPU  DURATION                  FUNCTION CALLS
# |     |   |                     |   |   |   |
 0) # 3155.126 us |  drm_clflush_sg [drm]();
 1) # 3067.382 us |  drm_clflush_sg [drm]();
 2) # 3063.766 us |  drm_clflush_sg [drm]();
 3) # 3092.302 us |  drm_clflush_sg [drm]();
 0) # 3209.486 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 0)   kworker-7    =>  kworker-319
 ------------------------------------------

 0) # 3633.803 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 1)   kworker-7    =>  kworker-319
 ------------------------------------------

 1) # 3090.278 us |  drm_clflush_sg [drm]();
 2) # 3828.108 us |  drm_clflush_sg [drm]();
 3) # 3049.836 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 3)  kworker-319   =>   kworker-7
 ------------------------------------------

 3) # 3295.017 us |  drm_clflush_sg [drm]();
 3) # 3064.077 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 0)  kworker-319   =>   kworker-7
 ------------------------------------------

 0) # 3182.034 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 3)   kworker-7    =>  kworker-319
 ------------------------------------------

 3) # 3065.754 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 2)  kworker-319   =>   kworker-7
 ------------------------------------------

 2) # 3562.513 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 3)  kworker-319   =>   kworker-7
 ------------------------------------------

 3) # 3048.914 us |  drm_clflush_sg [drm]();
 3) # 3062.469 us |  drm_clflush_sg [drm]();
 3) # 3055.727 us |  drm_clflush_sg [drm]();
 ------------------------------------------
 0)   kworker-7    =>  kworker-319
 ------------------------------------------

Without the cond_sched I suspect more of them are 3.0ms.
Not really a significant difference.
I guess the longer times are the scheduler looking for work?
I don't understand the extra traces - I'm guessing they are process switch related.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-16 14:40             ` [Intel-gfx] " David Laight
@ 2020-01-16 15:04               ` David Laight
  -1 siblings, 0 replies; 24+ messages in thread
From: David Laight @ 2020-01-16 15:04 UTC (permalink / raw)
  To: 'Chris Wilson', 'Daniel Vetter'
  Cc: 'intel-gfx@lists.freedesktop.org',
	'dri-devel@lists.freedesktop.org'

From: David Laight
> Sent: 16 January 2020 14:41
> > I'll do some measurements later this afternoon.
> 
> This is an Ivy bridge cpu, so clflush (not clflushopt).
> With a cond_resched for every page I get:
> (Note these calls are every 10 seconds....)

For comparison some times booted with the original drm.ko

 1) # 3125.116 us |  drm_clflush_sg [drm]();
 0) # 3181.705 us |  drm_clflush_sg [drm]();
 1) # 3108.863 us |  drm_clflush_sg [drm]();
 1) # 3051.926 us |  drm_clflush_sg [drm]();
 2) # 3088.468 us |  drm_clflush_sg [drm]();
 2) # 3012.729 us |  drm_clflush_sg [drm]();
 2) # 3191.268 us |  drm_clflush_sg [drm]();
 3) # 3044.294 us |  drm_clflush_sg [drm]();
 0) # 3163.916 us |  drm_clflush_sg [drm]();
 2) # 3029.307 us |  drm_clflush_sg [drm]();
 2) # 3116.360 us |  drm_clflush_sg [drm]();
 2) # 3031.620 us |  drm_clflush_sg [drm]();
 0) # 3349.706 us |  drm_clflush_sg [drm]();

Probably nothing really significant.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Intel-gfx] [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
@ 2020-01-16 15:04               ` David Laight
  0 siblings, 0 replies; 24+ messages in thread
From: David Laight @ 2020-01-16 15:04 UTC (permalink / raw)
  To: 'Chris Wilson', 'Daniel Vetter'
  Cc: 'intel-gfx@lists.freedesktop.org',
	'dri-devel@lists.freedesktop.org'

From: David Laight
> Sent: 16 January 2020 14:41
> > I'll do some measurements later this afternoon.
> 
> This is an Ivy bridge cpu, so clflush (not clflushopt).
> With a cond_resched for every page I get:
> (Note these calls are every 10 seconds....)

For comparison some times booted with the original drm.ko

 1) # 3125.116 us |  drm_clflush_sg [drm]();
 0) # 3181.705 us |  drm_clflush_sg [drm]();
 1) # 3108.863 us |  drm_clflush_sg [drm]();
 1) # 3051.926 us |  drm_clflush_sg [drm]();
 2) # 3088.468 us |  drm_clflush_sg [drm]();
 2) # 3012.729 us |  drm_clflush_sg [drm]();
 2) # 3191.268 us |  drm_clflush_sg [drm]();
 3) # 3044.294 us |  drm_clflush_sg [drm]();
 0) # 3163.916 us |  drm_clflush_sg [drm]();
 2) # 3029.307 us |  drm_clflush_sg [drm]();
 2) # 3116.360 us |  drm_clflush_sg [drm]();
 2) # 3031.620 us |  drm_clflush_sg [drm]();
 0) # 3349.706 us |  drm_clflush_sg [drm]();

Probably nothing really significant.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-15 20:52 ` [Intel-gfx] " Chris Wilson
                   ` (4 preceding siblings ...)
  (?)
@ 2020-01-18 10:45 ` Patchwork
  -1 siblings, 0 replies; 24+ messages in thread
From: Patchwork @ 2020-01-18 10:45 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm: Inject a cond_resched() into long drm_clflush_sg()
URL   : https://patchwork.freedesktop.org/series/72085/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7751_full -> Patchwork_16118_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Known issues
------------

  Here are the changes found in Patchwork_16118_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_busy@busy-vcs1:
    - shard-iclb:         [PASS][1] -> [SKIP][2] ([fdo#112080]) +6 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb2/igt@gem_busy@busy-vcs1.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb5/igt@gem_busy@busy-vcs1.html

  * igt@gem_ctx_persistence@vcs1-queued:
    - shard-iclb:         [PASS][3] -> [SKIP][4] ([fdo#109276] / [fdo#112080]) +3 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb1/igt@gem_ctx_persistence@vcs1-queued.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb5/igt@gem_ctx_persistence@vcs1-queued.html

  * igt@gem_ctx_shared@q-smoketest-all:
    - shard-tglb:         [PASS][5] -> [INCOMPLETE][6] ([fdo#111735] / [i915#472]) +1 similar issue
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb5/igt@gem_ctx_shared@q-smoketest-all.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb8/igt@gem_ctx_shared@q-smoketest-all.html

  * igt@gem_exec_await@wide-contexts:
    - shard-tglb:         [PASS][7] -> [INCOMPLETE][8] ([fdo#111736] / [i915#472])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb5/igt@gem_exec_await@wide-contexts.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb3/igt@gem_exec_await@wide-contexts.html

  * igt@gem_exec_create@forked:
    - shard-glk:          [PASS][9] -> [INCOMPLETE][10] ([i915#58] / [k.org#198133])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-glk5/igt@gem_exec_create@forked.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-glk9/igt@gem_exec_create@forked.html

  * igt@gem_exec_parallel@basic:
    - shard-tglb:         [PASS][11] -> [INCOMPLETE][12] ([i915#472] / [i915#476]) +1 similar issue
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb7/igt@gem_exec_parallel@basic.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb4/igt@gem_exec_parallel@basic.html

  * igt@gem_exec_schedule@preempt-contexts-bsd2:
    - shard-iclb:         [PASS][13] -> [SKIP][14] ([fdo#109276]) +20 similar issues
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb2/igt@gem_exec_schedule@preempt-contexts-bsd2.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb6/igt@gem_exec_schedule@preempt-contexts-bsd2.html

  * igt@gem_exec_schedule@preemptive-hang-bsd:
    - shard-iclb:         [PASS][15] -> [SKIP][16] ([fdo#112146]) +9 similar issues
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb6/igt@gem_exec_schedule@preemptive-hang-bsd.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb4/igt@gem_exec_schedule@preemptive-hang-bsd.html

  * igt@gem_exec_schedule@smoketest-all:
    - shard-tglb:         [PASS][17] -> [INCOMPLETE][18] ([i915#463] / [i915#472])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb8/igt@gem_exec_schedule@smoketest-all.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb1/igt@gem_exec_schedule@smoketest-all.html

  * igt@gem_exec_schedule@smoketest-bsd2:
    - shard-tglb:         [PASS][19] -> [INCOMPLETE][20] ([i915#472] / [i915#707])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb5/igt@gem_exec_schedule@smoketest-bsd2.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb6/igt@gem_exec_schedule@smoketest-bsd2.html

  * igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive:
    - shard-tglb:         [PASS][21] -> [TIMEOUT][22] ([fdo#112126] / [fdo#112271] / [i915#530])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb7/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb4/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html
    - shard-skl:          [PASS][23] -> [TIMEOUT][24] ([fdo#112271] / [i915#530])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-skl4/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-skl9/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrash-inactive.html

  * igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing:
    - shard-hsw:          [PASS][25] -> [TIMEOUT][26] ([fdo#112271] / [i915#530])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-hsw7/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-hsw5/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html

  * igt@gem_persistent_relocs@forked-interruptible-thrashing:
    - shard-apl:          [PASS][27] -> [TIMEOUT][28] ([fdo#112271] / [i915#530])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-apl1/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-apl4/igt@gem_persistent_relocs@forked-interruptible-thrashing.html

  * igt@gem_ppgtt@flink-and-close-vma-leak:
    - shard-glk:          [PASS][29] -> [FAIL][30] ([i915#644])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-glk9/igt@gem_ppgtt@flink-and-close-vma-leak.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-glk2/igt@gem_ppgtt@flink-and-close-vma-leak.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [PASS][31] -> [FAIL][32] ([i915#454])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb2/igt@i915_pm_dc@dc6-psr.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb6/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_pm_rpm@dpms-lpsp:
    - shard-skl:          [PASS][33] -> [SKIP][34] ([fdo#109271])
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-skl2/igt@i915_pm_rpm@dpms-lpsp.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-skl10/igt@i915_pm_rpm@dpms-lpsp.html

  * igt@i915_pm_rps@reset:
    - shard-iclb:         [PASS][35] -> [FAIL][36] ([i915#413])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb2/igt@i915_pm_rps@reset.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb6/igt@i915_pm_rps@reset.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-blt:
    - shard-tglb:         [PASS][37] -> [FAIL][38] ([i915#49])
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb7/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-blt.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb5/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-blt.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - shard-kbl:          [PASS][39] -> [DMESG-WARN][40] ([i915#180]) +2 similar issues
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-kbl4/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-kbl6/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  * igt@kms_setmode@basic:
    - shard-skl:          [PASS][41] -> [FAIL][42] ([i915#31])
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-skl1/igt@kms_setmode@basic.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-skl2/igt@kms_setmode@basic.html

  
#### Possible fixes ####

  * igt@gem_ctx_isolation@vcs1-reset:
    - shard-iclb:         [SKIP][43] ([fdo#109276] / [fdo#112080]) -> [PASS][44] +1 similar issue
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb6/igt@gem_ctx_isolation@vcs1-reset.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb2/igt@gem_ctx_isolation@vcs1-reset.html

  * igt@gem_ctx_persistence@vecs0-mixed-process:
    - shard-apl:          [FAIL][45] ([i915#679]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-apl4/igt@gem_ctx_persistence@vecs0-mixed-process.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-apl6/igt@gem_ctx_persistence@vecs0-mixed-process.html

  * igt@gem_eio@wait-immediate:
    - shard-tglb:         [INCOMPLETE][47] ([i915#472]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb6/igt@gem_eio@wait-immediate.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb6/igt@gem_eio@wait-immediate.html

  * igt@gem_exec_gttfill@basic:
    - shard-tglb:         [INCOMPLETE][49] ([fdo#111593] / [i915#472]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb6/igt@gem_exec_gttfill@basic.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb4/igt@gem_exec_gttfill@basic.html

  * igt@gem_exec_schedule@pi-distinct-iova-bsd:
    - shard-iclb:         [SKIP][51] ([i915#677]) -> [PASS][52] +2 similar issues
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb1/igt@gem_exec_schedule@pi-distinct-iova-bsd.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb5/igt@gem_exec_schedule@pi-distinct-iova-bsd.html

  * igt@gem_exec_schedule@preempt-other-chain-bsd:
    - shard-iclb:         [SKIP][53] ([fdo#112146]) -> [PASS][54] +2 similar issues
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb2/igt@gem_exec_schedule@preempt-other-chain-bsd.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb6/igt@gem_exec_schedule@preempt-other-chain-bsd.html

  * igt@gem_exec_schedule@preempt-queue-contexts-chain-render:
    - shard-tglb:         [INCOMPLETE][55] ([fdo#111677] / [i915#472]) -> [PASS][56]
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb6/igt@gem_exec_schedule@preempt-queue-contexts-chain-render.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb8/igt@gem_exec_schedule@preempt-queue-contexts-chain-render.html

  * igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing:
    - shard-kbl:          [TIMEOUT][57] ([fdo#112271] / [i915#530]) -> [PASS][58]
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-kbl1/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-kbl1/igt@gem_persistent_relocs@forked-interruptible-faulting-reloc-thrashing.html

  * igt@gem_persistent_relocs@forked-interruptible-thrash-inactive:
    - shard-skl:          [TIMEOUT][59] ([fdo#112271] / [i915#530]) -> [PASS][60]
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-skl6/igt@gem_persistent_relocs@forked-interruptible-thrash-inactive.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-skl10/igt@gem_persistent_relocs@forked-interruptible-thrash-inactive.html
    - shard-snb:          [TIMEOUT][61] ([fdo#112271] / [i915#530]) -> [PASS][62]
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-snb7/igt@gem_persistent_relocs@forked-interruptible-thrash-inactive.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-snb2/igt@gem_persistent_relocs@forked-interruptible-thrash-inactive.html

  * igt@gem_persistent_relocs@forked-interruptible-thrashing:
    - shard-iclb:         [FAIL][63] ([i915#520]) -> [PASS][64]
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb8/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb1/igt@gem_persistent_relocs@forked-interruptible-thrashing.html

  * igt@gem_persistent_relocs@forked-thrashing:
    - shard-kbl:          [INCOMPLETE][65] ([fdo#103665] / [i915#530]) -> [PASS][66] +1 similar issue
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-kbl3/igt@gem_persistent_relocs@forked-thrashing.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-kbl3/igt@gem_persistent_relocs@forked-thrashing.html

  * igt@gem_ppgtt@flink-and-close-vma-leak:
    - shard-apl:          [FAIL][67] ([i915#644]) -> [PASS][68]
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-apl6/igt@gem_ppgtt@flink-and-close-vma-leak.html
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-apl7/igt@gem_ppgtt@flink-and-close-vma-leak.html

  * igt@gem_workarounds@suspend-resume:
    - shard-apl:          [DMESG-WARN][69] ([i915#180]) -> [PASS][70] +1 similar issue
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-apl6/igt@gem_workarounds@suspend-resume.html
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-apl7/igt@gem_workarounds@suspend-resume.html

  * igt@kms_color@pipe-a-ctm-0-75:
    - shard-skl:          [DMESG-WARN][71] ([i915#109]) -> [PASS][72]
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-skl2/igt@kms_color@pipe-a-ctm-0-75.html
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-skl3/igt@kms_color@pipe-a-ctm-0-75.html

  * igt@kms_cursor_crc@pipe-b-cursor-suspend:
    - shard-skl:          [INCOMPLETE][73] ([i915#300]) -> [PASS][74]
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-skl8/igt@kms_cursor_crc@pipe-b-cursor-suspend.html
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-skl4/igt@kms_cursor_crc@pipe-b-cursor-suspend.html

  * igt@kms_flip@flip-vs-suspend:
    - shard-skl:          [INCOMPLETE][75] ([i915#221]) -> [PASS][76]
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-skl2/igt@kms_flip@flip-vs-suspend.html
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-skl1/igt@kms_flip@flip-vs-suspend.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-kbl:          [DMESG-WARN][77] ([i915#180]) -> [PASS][78] +3 similar issues
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-kbl2/igt@kms_flip@flip-vs-suspend-interruptible.html
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-kbl6/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_frontbuffer_tracking@fbc-stridechange:
    - shard-tglb:         [FAIL][79] ([i915#49]) -> [PASS][80] +1 similar issue
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb1/igt@kms_frontbuffer_tracking@fbc-stridechange.html
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb1/igt@kms_frontbuffer_tracking@fbc-stridechange.html

  * igt@kms_plane_multiple@atomic-pipe-b-tiling-y:
    - shard-skl:          [DMESG-WARN][81] ([IGT#6]) -> [PASS][82]
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-skl2/igt@kms_plane_multiple@atomic-pipe-b-tiling-y.html
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-skl1/igt@kms_plane_multiple@atomic-pipe-b-tiling-y.html

  * igt@kms_psr@psr2_cursor_blt:
    - shard-iclb:         [SKIP][83] ([fdo#109441]) -> [PASS][84] +1 similar issue
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb6/igt@kms_psr@psr2_cursor_blt.html
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb2/igt@kms_psr@psr2_cursor_blt.html

  * igt@perf_pmu@busy-no-semaphores-vcs1:
    - shard-iclb:         [SKIP][85] ([fdo#112080]) -> [PASS][86] +10 similar issues
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb5/igt@perf_pmu@busy-no-semaphores-vcs1.html
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb1/igt@perf_pmu@busy-no-semaphores-vcs1.html

  * igt@prime_busy@hang-bsd2:
    - shard-iclb:         [SKIP][87] ([fdo#109276]) -> [PASS][88] +15 similar issues
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb8/igt@prime_busy@hang-bsd2.html
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb1/igt@prime_busy@hang-bsd2.html

  
#### Warnings ####

  * igt@gem_ctx_isolation@vcs1-nonpriv-switch:
    - shard-iclb:         [SKIP][89] ([fdo#109276] / [fdo#112080]) -> [FAIL][90] ([IGT#28])
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb5/igt@gem_ctx_isolation@vcs1-nonpriv-switch.html
   [90]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb1/igt@gem_ctx_isolation@vcs1-nonpriv-switch.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-tglb:         [FAIL][91] ([i915#454]) -> [SKIP][92] ([i915#468])
   [91]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-tglb5/igt@i915_pm_dc@dc6-psr.html
   [92]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-tglb3/igt@i915_pm_dc@dc6-psr.html

  * igt@kms_dp_dsc@basic-dsc-enable-edp:
    - shard-iclb:         [SKIP][93] ([fdo#109349]) -> [DMESG-WARN][94] ([fdo#107724])
   [93]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-iclb6/igt@kms_dp_dsc@basic-dsc-enable-edp.html
   [94]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-iclb2/igt@kms_dp_dsc@basic-dsc-enable-edp.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes:
    - shard-kbl:          [DMESG-WARN][95] ([i915#56]) -> [DMESG-WARN][96] ([i915#180])
   [95]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7751/shard-kbl2/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html
   [96]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/shard-kbl7/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-a-planes.html

  
  [IGT#28]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/28
  [IGT#6]: https://gitlab.freedesktop.org/drm/igt-gpu-tools/issues/6
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109349]: https://bugs.freedesktop.org/show_bug.cgi?id=109349
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#111593]: https://bugs.freedesktop.org/show_bug.cgi?id=111593
  [fdo#111677]: https://bugs.freedesktop.org/show_bug.cgi?id=111677
  [fdo#111735]: https://bugs.freedesktop.org/show_bug.cgi?id=111735
  [fdo#111736]: https://bugs.freedesktop.org/show_bug.cgi?id=111736
  [fdo#112080]: https://bugs.freedesktop.org/show_bug.cgi?id=112080
  [fdo#112126]: https://bugs.freedesktop.org/show_bug.cgi?id=112126
  [fdo#112146]: https://bugs.freedesktop.org/show_bug.cgi?id=112146
  [fdo#112271]: https://bugs.freedesktop.org/show_bug.cgi?id=112271
  [i915#109]: https://gitlab.freedesktop.org/drm/intel/issues/109
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#221]: https://gitlab.freedesktop.org/drm/intel/issues/221
  [i915#300]: https://gitlab.freedesktop.org/drm/intel/issues/300
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#413]: https://gitlab.freedesktop.org/drm/intel/issues/413
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#463]: https://gitlab.freedesktop.org/drm/intel/issues/463
  [i915#468]: https://gitlab.freedesktop.org/drm/intel/issues/468
  [i915#472]: https://gitlab.freedesktop.org/drm/intel/issues/472
  [i915#476]: https://gitlab.freedesktop.org/drm/intel/issues/476
  [i915#49]: https://gitlab.freedesktop.org/drm/intel/issues/49
  [i915#520]: https://gitlab.freedesktop.org/drm/intel/issues/520
  [i915#530]: https://gitlab.freedesktop.org/drm/intel/issues/530
  [i915#56]: https://gitlab.freedesktop.org/drm/intel/issues/56
  [i915#58]: https://gitlab.freedesktop.org/drm/intel/issues/58
  [i915#644]: https://gitlab.freedesktop.org/drm/intel/issues/644
  [i915#677]: https://gitlab.freedesktop.org/drm/intel/issues/677
  [i915#679]: https://gitlab.freedesktop.org/drm/intel/issues/679
  [i915#707]: https://gitlab.freedesktop.org/drm/intel/issues/707
  [k.org#198133]: https://bugzilla.kernel.org/show_bug.cgi?id=198133


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7751 -> Patchwork_16118

  CI-20190529: 20190529
  CI_DRM_7751: bffb5bf41a2e3d84ee5043dcccad49578656a012 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5367: 94af6de4f07487b93c4f5008f3ed04b5fc045200 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_16118: f1c4f0dc10f6c419538bac61eeae8002e3491cf4 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16118/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

* RE: [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
  2020-01-15 20:52 ` [Intel-gfx] " Chris Wilson
@ 2020-01-23 14:36   ` David Laight
  -1 siblings, 0 replies; 24+ messages in thread
From: David Laight @ 2020-01-23 14:36 UTC (permalink / raw)
  To: 'Chris Wilson', dri-devel; +Cc: intel-gfx

From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: 15 January 2020 20:53
> 
> Since we may try and flush the cachelines associated with large buffers
> (an 8K framebuffer is about 128MiB, even before we try HDR), this leads
> to unacceptably long latencies (when using a voluntary CONFIG_PREEMPT).
> If we call cond_resched() between each sg chunk, that it about every 128
> pages, we have a natural break point in which to check if the process
> needs to be rescheduled. Naturally, this means that drm_clflush_sg() can
> only be called from process context -- which is true at the moment. The
> other clflush routines remain usable from atomic context.
> 
> Even though flushing large objects takes a demonstrable amount to time
> to flush all the cachelines, clflush is still preferred over a
> system-wide wbinvd as the latter has unpredictable latencies affecting
> the whole system not just the local task.

Any progress on this.
I think the patch itself has it's whitespace messed up.

I've just done a measurement on a newer system that supports clflushopt.
drm_clflush_sg() took 400us for a 1920x1080 display.
No idea how fast the cpus were running, somewhere between 800MHz and 4GHz
depending on the whim of the hardware (probable at the low end).
Considerably faster, and enough that calling cond_resched() every 4k
is probably noticable.
So every 128 pages is probably a reasonable compromise.

	David


> Reported-by: David Laight <David.Laight@ACULAB.COM>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: David Laight <David.Laight@ACULAB.COM>
> ---
>  drivers/gpu/drm/drm_cache.c | 49 ++++++++++++++++++++++++++++++++++---
>  1 file changed, 45 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
> index 03e01b000f7a..fbd2bb644544 100644
> --- a/drivers/gpu/drm/drm_cache.c
> +++ b/drivers/gpu/drm/drm_cache.c
> @@ -112,23 +112,64 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
>  }
>  EXPORT_SYMBOL(drm_clflush_pages);
> 
> +static __always_inline struct sgt_iter {
> +struct scatterlist *sgp;
> +unsigned long pfn;
> +unsigned int curr;
> +unsigned int max;
> +} __sgt_iter(struct scatterlist *sgl) {
> +struct sgt_iter s = { .sgp = sgl };
> +
> +if (s.sgp) {
> +s.max = s.curr = s.sgp->offset;
> +s.max += s.sgp->length;
> +s.pfn = page_to_pfn(sg_page(s.sgp));
> +}
> +
> +return s;
> +}
> +
> +static inline struct scatterlist *__sg_next_resched(struct scatterlist *sg)
> +{
> +if (sg_is_last(sg))
> +return NULL;
> +
> +++sg;
> +if (unlikely(sg_is_chain(sg))) {
> +sg = sg_chain_ptr(sg);
> +cond_resched();
> +}
> +return sg;
> +}
> +
> +#define for_each_sgt_page(__pp, __iter, __sgt)\
> +for ((__iter) = __sgt_iter((__sgt)->sgl);\
> +     ((__pp) = (__iter).pfn == 0 ? NULL :\
> +      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
> +     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?\
> +     (__iter) = __sgt_iter(__sg_next_resched((__iter).sgp)), 0 : 0)
> +
>  /**
>   * drm_clflush_sg - Flush dcache lines pointing to a scather-gather.
>   * @st: struct sg_table.
>   *
>   * Flush every data cache line entry that points to an address in the
> - * sg.
> + * sg. This may schedule between scatterlist chunks, in order to keep
> + * the system preemption-latency down for large buffers.
>   */
>  void
>  drm_clflush_sg(struct sg_table *st)
>  {
> +might_sleep();
> +
>  #if defined(CONFIG_X86)
>  if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
> -struct sg_page_iter sg_iter;
> +struct sgt_iter sg_iter;
> +struct page *page;
> 
>  mb(); /*CLFLUSH is ordered only by using memory barriers*/
> -for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
> -drm_clflush_page(sg_page_iter_page(&sg_iter));
> +for_each_sgt_page(page, sg_iter, st)
> +drm_clflush_page(page);
>  mb(); /*Make sure that all cache line entry is flushed*/
> 
>  return;
> --
> 2.25.0
> 

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Intel-gfx] [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg()
@ 2020-01-23 14:36   ` David Laight
  0 siblings, 0 replies; 24+ messages in thread
From: David Laight @ 2020-01-23 14:36 UTC (permalink / raw)
  To: 'Chris Wilson', dri-devel; +Cc: intel-gfx

From: Chris Wilson <chris@chris-wilson.co.uk>
> Sent: 15 January 2020 20:53
> 
> Since we may try and flush the cachelines associated with large buffers
> (an 8K framebuffer is about 128MiB, even before we try HDR), this leads
> to unacceptably long latencies (when using a voluntary CONFIG_PREEMPT).
> If we call cond_resched() between each sg chunk, that it about every 128
> pages, we have a natural break point in which to check if the process
> needs to be rescheduled. Naturally, this means that drm_clflush_sg() can
> only be called from process context -- which is true at the moment. The
> other clflush routines remain usable from atomic context.
> 
> Even though flushing large objects takes a demonstrable amount to time
> to flush all the cachelines, clflush is still preferred over a
> system-wide wbinvd as the latter has unpredictable latencies affecting
> the whole system not just the local task.

Any progress on this.
I think the patch itself has it's whitespace messed up.

I've just done a measurement on a newer system that supports clflushopt.
drm_clflush_sg() took 400us for a 1920x1080 display.
No idea how fast the cpus were running, somewhere between 800MHz and 4GHz
depending on the whim of the hardware (probable at the low end).
Considerably faster, and enough that calling cond_resched() every 4k
is probably noticable.
So every 128 pages is probably a reasonable compromise.

	David


> Reported-by: David Laight <David.Laight@ACULAB.COM>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: David Laight <David.Laight@ACULAB.COM>
> ---
>  drivers/gpu/drm/drm_cache.c | 49 ++++++++++++++++++++++++++++++++++---
>  1 file changed, 45 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
> index 03e01b000f7a..fbd2bb644544 100644
> --- a/drivers/gpu/drm/drm_cache.c
> +++ b/drivers/gpu/drm/drm_cache.c
> @@ -112,23 +112,64 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
>  }
>  EXPORT_SYMBOL(drm_clflush_pages);
> 
> +static __always_inline struct sgt_iter {
> +struct scatterlist *sgp;
> +unsigned long pfn;
> +unsigned int curr;
> +unsigned int max;
> +} __sgt_iter(struct scatterlist *sgl) {
> +struct sgt_iter s = { .sgp = sgl };
> +
> +if (s.sgp) {
> +s.max = s.curr = s.sgp->offset;
> +s.max += s.sgp->length;
> +s.pfn = page_to_pfn(sg_page(s.sgp));
> +}
> +
> +return s;
> +}
> +
> +static inline struct scatterlist *__sg_next_resched(struct scatterlist *sg)
> +{
> +if (sg_is_last(sg))
> +return NULL;
> +
> +++sg;
> +if (unlikely(sg_is_chain(sg))) {
> +sg = sg_chain_ptr(sg);
> +cond_resched();
> +}
> +return sg;
> +}
> +
> +#define for_each_sgt_page(__pp, __iter, __sgt)\
> +for ((__iter) = __sgt_iter((__sgt)->sgl);\
> +     ((__pp) = (__iter).pfn == 0 ? NULL :\
> +      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
> +     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?\
> +     (__iter) = __sgt_iter(__sg_next_resched((__iter).sgp)), 0 : 0)
> +
>  /**
>   * drm_clflush_sg - Flush dcache lines pointing to a scather-gather.
>   * @st: struct sg_table.
>   *
>   * Flush every data cache line entry that points to an address in the
> - * sg.
> + * sg. This may schedule between scatterlist chunks, in order to keep
> + * the system preemption-latency down for large buffers.
>   */
>  void
>  drm_clflush_sg(struct sg_table *st)
>  {
> +might_sleep();
> +
>  #if defined(CONFIG_X86)
>  if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
> -struct sg_page_iter sg_iter;
> +struct sgt_iter sg_iter;
> +struct page *page;
> 
>  mb(); /*CLFLUSH is ordered only by using memory barriers*/
> -for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
> -drm_clflush_page(sg_page_iter_page(&sg_iter));
> +for_each_sgt_page(page, sg_iter, st)
> +drm_clflush_page(page);
>  mb(); /*Make sure that all cache line entry is flushed*/
> 
>  return;
> --
> 2.25.0
> 

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2020-01-24  8:52 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-15 20:52 [PATCH] drm: Inject a cond_resched() into long drm_clflush_sg() Chris Wilson
2020-01-15 20:52 ` [Intel-gfx] " Chris Wilson
2020-01-15 21:28 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2020-01-15 21:57 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-01-15 21:57 ` [Intel-gfx] ✗ Fi.CI.BUILD: warning " Patchwork
2020-01-16  6:52 ` [PATCH] " Daniel Vetter
2020-01-16  6:52   ` [Intel-gfx] " Daniel Vetter
2020-01-16  7:40   ` Chris Wilson
2020-01-16  7:40     ` [Intel-gfx] " Chris Wilson
2020-01-16 12:26     ` David Laight
2020-01-16 12:26       ` [Intel-gfx] " David Laight
2020-01-16 12:28       ` Chris Wilson
2020-01-16 12:28         ` [Intel-gfx] " Chris Wilson
2020-01-16 13:58         ` David Laight
2020-01-16 13:58           ` [Intel-gfx] " David Laight
2020-01-16 14:01           ` Chris Wilson
2020-01-16 14:01             ` [Intel-gfx] " Chris Wilson
2020-01-16 14:40           ` David Laight
2020-01-16 14:40             ` [Intel-gfx] " David Laight
2020-01-16 15:04             ` David Laight
2020-01-16 15:04               ` [Intel-gfx] " David Laight
2020-01-18 10:45 ` [Intel-gfx] ✓ Fi.CI.IGT: success for " Patchwork
2020-01-23 14:36 ` [PATCH] " David Laight
2020-01-23 14:36   ` [Intel-gfx] " David Laight

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.