* [PATCH] drm/i915: Split obj->cache_coherent to track r/w
@ 2017-08-10 16:20 Chris Wilson
2017-08-10 16:53 ` ✓ Fi.CI.BAT: success for " Patchwork
` (7 more replies)
0 siblings, 8 replies; 12+ messages in thread
From: Chris Wilson @ 2017-08-10 16:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Dongwon Kim
Another month, another story in the cache coherency saga. This time, we
come to the realisation that i915_gem_object_is_coherent() has been
reporting whether we can read from the target without requiring a cache
invalidate; but we were using it in places for testing whether we could
write into the object without requiring a cache flush. So split the
tracking into two, one to decide before reads, one after writes.
See commit e27ab73d17ef ("drm/i915: Mark CPU cache as dirty on every
transition for CPU writes") for the previous step in this saga.
Testcase: igt/kms_mmap_write_crc
Testcase: igt/kms_pwrite_crc
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_drv.h | 6 ----
drivers/gpu/drm/i915/i915_gem.c | 23 ++++++-------
drivers/gpu/drm/i915/i915_gem_clflush.c | 2 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +-
drivers/gpu/drm/i915/i915_gem_internal.c | 7 ++--
drivers/gpu/drm/i915/i915_gem_object.c | 44 ++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_object.h | 8 ++++-
drivers/gpu/drm/i915/i915_gem_stolen.c | 5 +--
drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +--
drivers/gpu/drm/i915/selftests/huge_gem_object.c | 6 ++--
11 files changed, 76 insertions(+), 32 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_gem_object.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f8227318dcaf..892f52b53060 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -39,6 +39,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_gtt.o \
i915_gem_internal.o \
i915_gem.o \
+ i915_gem_object.o \
i915_gem_render_state.o \
i915_gem_request.o \
i915_gem_shrinker.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 907603cba447..6698c706118a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4317,10 +4317,4 @@ int remap_io_mapping(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, unsigned long size,
struct io_mapping *iomap);
-static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj)
-{
- return (obj->cache_level != I915_CACHE_NONE ||
- HAS_LLC(to_i915(obj->base.dev)));
-}
-
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 000a764ee8d9..3f7273f9370d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
if (obj->cache_dirty)
return false;
- if (!obj->cache_coherent)
+ if (!obj->cache_coherent_w)
return true;
return obj->pin_display;
@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
if (needs_clflush &&
(obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
- !obj->cache_coherent)
+ !obj->cache_coherent_r)
drm_clflush_sg(pages);
__start_cpu_write(obj);
@@ -800,7 +800,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent_r || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
goto err_unpin;
@@ -852,7 +852,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent_w || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
goto err_unpin;
@@ -3673,8 +3673,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
list_for_each_entry(vma, &obj->vma_list, obj_link)
vma->node.color = cache_level;
- obj->cache_level = cache_level;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
+ i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true; /* Always invalidate stale cachelines */
return 0;
@@ -4279,6 +4278,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
{
struct drm_i915_gem_object *obj;
struct address_space *mapping;
+ unsigned int cache_level;
gfp_t mask;
int ret;
@@ -4317,7 +4317,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- if (HAS_LLC(dev_priv)) {
+ if (HAS_LLC(dev_priv))
/* On some devices, we can have the GPU use the LLC (the CPU
* cache) for about a 10% performance improvement
* compared to uncached. Graphics requests other than
@@ -4330,12 +4330,11 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
* However, we maintain the display planes as UC, and so
* need to rebind when first used as such.
*/
- obj->cache_level = I915_CACHE_LLC;
- } else
- obj->cache_level = I915_CACHE_NONE;
+ cache_level = I915_CACHE_LLC;
+ else
+ cache_level = I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
trace_i915_gem_object_create(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index 348b29a845c9..20778eff38c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -139,7 +139,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
- if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
+ if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent_r)
return false;
trace_i915_gem_object_clflush(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 5fa44767c29e..c48bedebe275 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1842,7 +1842,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
eb->request->capture_list = capture;
}
- if (unlikely(obj->cache_dirty && !obj->cache_coherent)) {
+ if (unlikely(obj->cache_dirty && !obj->cache_coherent_w)) {
if (i915_gem_clflush_object(obj, 0))
entry->flags &= ~EXEC_OBJECT_ASYNC;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index 568bf83af1f5..c1f64ddaf8aa 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -174,6 +174,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
phys_addr_t size)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
GEM_BUG_ON(!size);
GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
@@ -190,9 +191,9 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+
+ cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
return obj;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.c b/drivers/gpu/drm/i915/i915_gem_object.c
new file mode 100644
index 000000000000..94d21035cf8f
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_object.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+
+static inline bool
+i915_gem_object_is_coherent(struct drm_i915_gem_object *obj, bool write)
+{
+ if (obj->cache_level != I915_CACHE_NONE)
+ return true;
+
+ return !write && HAS_LLC(to_i915(obj->base.dev));
+}
+
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+ unsigned int cache_level)
+{
+ obj->cache_level = cache_level;
+ obj->cache_coherent_r = i915_gem_object_is_coherent(obj, false);
+ obj->cache_coherent_w = i915_gem_object_is_coherent(obj, true);
+ obj->cache_dirty = !obj->cache_coherent_w;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 5b19a4916a4d..73b76c0f8fc8 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -33,8 +33,11 @@
#include <drm/i915_drm.h>
+#include "i915_gem_request.h"
#include "i915_selftest.h"
+struct drm_i915_gem_object;
+
struct drm_i915_gem_object_ops {
unsigned int flags;
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
@@ -119,7 +122,8 @@ struct drm_i915_gem_object {
unsigned long gt_ro:1;
unsigned int cache_level:3;
unsigned int cache_dirty:1;
- unsigned int cache_coherent:1;
+ unsigned int cache_coherent_r:1;
+ unsigned int cache_coherent_w:1;
atomic_t frontbuffer_bits;
unsigned int frontbuffer_ggtt_origin; /* write once */
@@ -391,6 +395,8 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
return engine;
}
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+ unsigned int cache_level);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index c11c915382e7..507c9f0d8df1 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -580,6 +580,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
struct drm_mm_node *stolen)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
obj = i915_gem_object_alloc(dev_priv);
if (obj == NULL)
@@ -590,8 +591,8 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
obj->stolen = stolen;
obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
- obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = true; /* assumptions! more like cache_oblivious */
+ cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
if (i915_gem_object_pin_pages(obj))
goto cleanup;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index ccd09e8419f5..f152a38d7079 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -804,9 +804,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
i915_gem_object_init(obj, &i915_gem_userptr_ops);
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = I915_CACHE_LLC;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
obj->userptr.ptr = args->user_ptr;
obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
index caf76af36aba..c5c7e8efbdd3 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
@@ -111,6 +111,7 @@ huge_gem_object(struct drm_i915_private *i915,
dma_addr_t dma_size)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
GEM_BUG_ON(!phys_size || phys_size > dma_size);
GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE));
@@ -128,9 +129,8 @@ huge_gem_object(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
obj->scratch = phys_size;
return obj;
--
2.13.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 12+ messages in thread
* ✓ Fi.CI.BAT: success for drm/i915: Split obj->cache_coherent to track r/w
2017-08-10 16:20 [PATCH] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
@ 2017-08-10 16:53 ` Patchwork
2017-08-11 7:26 ` [PATCH] " Maarten Lankhorst
` (6 subsequent siblings)
7 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2017-08-10 16:53 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: Split obj->cache_coherent to track r/w
URL : https://patchwork.freedesktop.org/series/28641/
State : success
== Summary ==
Series 28641v1 drm/i915: Split obj->cache_coherent to track r/w
https://patchwork.freedesktop.org/api/1.0/series/28641/revisions/1/mbox/
Test gem_ringfill:
Subgroup basic-default:
pass -> SKIP (fi-bsw-n3050) fdo#101915
Test kms_cursor_legacy:
Subgroup basic-busy-flip-before-cursor-atomic:
fail -> PASS (fi-snb-2600) fdo#100215 +1
fdo#101915 https://bugs.freedesktop.org/show_bug.cgi?id=101915
fdo#100215 https://bugs.freedesktop.org/show_bug.cgi?id=100215
fi-bdw-5557u total:279 pass:268 dwarn:0 dfail:0 fail:0 skip:11 time:440s
fi-bdw-gvtdvm total:279 pass:265 dwarn:0 dfail:0 fail:0 skip:14 time:416s
fi-blb-e6850 total:279 pass:224 dwarn:1 dfail:0 fail:0 skip:54 time:359s
fi-bsw-n3050 total:279 pass:242 dwarn:0 dfail:0 fail:0 skip:37 time:494s
fi-bxt-j4205 total:279 pass:260 dwarn:0 dfail:0 fail:0 skip:19 time:499s
fi-byt-j1900 total:279 pass:254 dwarn:1 dfail:0 fail:0 skip:24 time:519s
fi-byt-n2820 total:279 pass:251 dwarn:0 dfail:0 fail:0 skip:28 time:509s
fi-glk-2a total:279 pass:260 dwarn:0 dfail:0 fail:0 skip:19 time:592s
fi-hsw-4770 total:279 pass:263 dwarn:0 dfail:0 fail:0 skip:16 time:434s
fi-hsw-4770r total:279 pass:263 dwarn:0 dfail:0 fail:0 skip:16 time:409s
fi-ilk-650 total:279 pass:229 dwarn:0 dfail:0 fail:0 skip:50 time:416s
fi-ivb-3520m total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:509s
fi-ivb-3770 total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:485s
fi-kbl-7500u total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:460s
fi-kbl-7560u total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:569s
fi-kbl-r total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:577s
fi-pnv-d510 total:279 pass:223 dwarn:1 dfail:0 fail:0 skip:55 time:520s
fi-skl-6260u total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:450s
fi-skl-6700k total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:649s
fi-skl-6770hq total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:463s
fi-skl-gvtdvm total:279 pass:266 dwarn:0 dfail:0 fail:0 skip:13 time:424s
fi-skl-x1585l total:279 pass:268 dwarn:0 dfail:0 fail:0 skip:11 time:468s
fi-snb-2520m total:279 pass:251 dwarn:0 dfail:0 fail:0 skip:28 time:548s
fi-snb-2600 total:279 pass:249 dwarn:0 dfail:0 fail:1 skip:29 time:408s
0f3edcc5d40749f2ddddd26f5ada55a145e20350 drm-tip: 2017y-08m-10d-11h-21m-35s UTC integration manifest
2b246521c9a2 drm/i915: Split obj->cache_coherent to track r/w
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5368/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] drm/i915: Split obj->cache_coherent to track r/w
2017-08-10 16:20 [PATCH] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
2017-08-10 16:53 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2017-08-11 7:26 ` Maarten Lankhorst
2017-08-11 8:08 ` Joonas Lahtinen
` (5 subsequent siblings)
7 siblings, 0 replies; 12+ messages in thread
From: Maarten Lankhorst @ 2017-08-11 7:26 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Dongwon Kim
Op 10-08-17 om 18:20 schreef Chris Wilson:
> Another month, another story in the cache coherency saga. This time, we
> come to the realisation that i915_gem_object_is_coherent() has been
> reporting whether we can read from the target without requiring a cache
> invalidate; but we were using it in places for testing whether we could
> write into the object without requiring a cache flush. So split the
> tracking into two, one to decide before reads, one after writes.
>
> See commit e27ab73d17ef ("drm/i915: Mark CPU cache as dirty on every
> transition for CPU writes") for the previous step in this saga.
>
> Testcase: igt/kms_mmap_write_crc
> Testcase: igt/kms_pwrite_crc
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Dongwon Kim <dongwon.kim@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101109
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101555
Tested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] drm/i915: Split obj->cache_coherent to track r/w
2017-08-10 16:20 [PATCH] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
2017-08-10 16:53 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-08-11 7:26 ` [PATCH] " Maarten Lankhorst
@ 2017-08-11 8:08 ` Joonas Lahtinen
2017-08-11 10:11 ` [PATCH v2] " Chris Wilson
` (4 subsequent siblings)
7 siblings, 0 replies; 12+ messages in thread
From: Joonas Lahtinen @ 2017-08-11 8:08 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Dongwon Kim
On to, 2017-08-10 at 17:20 +0100, Chris Wilson wrote:
> Another month, another story in the cache coherency saga. This time, we
> come to the realisation that i915_gem_object_is_coherent() has been
> reporting whether we can read from the target without requiring a cache
> invalidate; but we were using it in places for testing whether we could
> write into the object without requiring a cache flush. So split the
> tracking into two, one to decide before reads, one after writes.
>
> See commit e27ab73d17ef ("drm/i915: Mark CPU cache as dirty on every
> transition for CPU writes") for the previous step in this saga.
>
> Testcase: igt/kms_mmap_write_crc
> Testcase: igt/kms_pwrite_crc
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Dongwon Kim <dongwon.kim@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
<SNIP>
> +++ b/drivers/gpu/drm/i915/i915_gem_object.c
> @@ -0,0 +1,44 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "i915_drv.h"
> +#include "i915_gem_object.h"
> +
Kerneldoc for the two following functions needed.
> +static inline bool
> +i915_gem_object_is_coherent(struct drm_i915_gem_object *obj, bool write)
> +{
> + if (obj->cache_level != I915_CACHE_NONE)
> + return true;
> +
> + return !write && HAS_LLC(to_i915(obj->base.dev));
> +}
> +
> +void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
> + unsigned int cache_level)
> +{
> + obj->cache_level = cache_level;
> + obj->cache_coherent_r = i915_gem_object_is_coherent(obj, false);
> + obj->cache_coherent_w = i915_gem_object_is_coherent(obj, true);
> + obj->cache_dirty = !obj->cache_coherent_w;
> +}
>
Other than that, it seems OK. Although READ / WRITE might be more
explicit than true / false.
Regards, Joonas
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2] drm/i915: Split obj->cache_coherent to track r/w
2017-08-10 16:20 [PATCH] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
` (2 preceding siblings ...)
2017-08-11 8:08 ` Joonas Lahtinen
@ 2017-08-11 10:11 ` Chris Wilson
2017-08-11 11:12 ` Chris Wilson
2017-08-11 10:23 ` [PATCH v3] " Chris Wilson
` (3 subsequent siblings)
7 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-08-11 10:11 UTC (permalink / raw)
To: intel-gfx; +Cc: Dongwon Kim
Another month, another story in the cache coherency saga. This time, we
come to the realisation that i915_gem_object_is_coherent() has been
reporting whether we can read from the target without requiring a cache
invalidate; but we were using it in places for testing whether we could
write into the object without requiring a cache flush. So split the
tracking into two, one to decide before reads, one after writes.
See commit e27ab73d17ef ("drm/i915: Mark CPU cache as dirty on every
transition for CPU writes") for the previous entry in this saga.
v2: Be verbose
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101109
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101555
Testcase: igt/kms_mmap_write_crc
Testcase: igt/kms_pwrite_crc
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Tested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_drv.h | 6 ---
drivers/gpu/drm/i915/i915_gem.c | 25 +++++-----
drivers/gpu/drm/i915/i915_gem_clflush.c | 3 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 +++-
drivers/gpu/drm/i915/i915_gem_internal.c | 7 +--
drivers/gpu/drm/i915/i915_gem_object.c | 58 ++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_object.h | 9 +++-
drivers/gpu/drm/i915/i915_gem_stolen.c | 5 +-
drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +-
drivers/gpu/drm/i915/selftests/huge_gem_object.c | 6 +--
11 files changed, 100 insertions(+), 32 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_gem_object.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f8227318dcaf..892f52b53060 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -39,6 +39,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_gtt.o \
i915_gem_internal.o \
i915_gem.o \
+ i915_gem_object.o \
i915_gem_render_state.o \
i915_gem_request.o \
i915_gem_shrinker.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 907603cba447..6698c706118a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4317,10 +4317,4 @@ int remap_io_mapping(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, unsigned long size,
struct io_mapping *iomap);
-static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj)
-{
- return (obj->cache_level != I915_CACHE_NONE ||
- HAS_LLC(to_i915(obj->base.dev)));
-}
-
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 000a764ee8d9..887fff281f4e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
if (obj->cache_dirty)
return false;
- if (!obj->cache_coherent)
+ if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
return true;
return obj->pin_display;
@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
if (needs_clflush &&
(obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
- !obj->cache_coherent)
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
drm_clflush_sg(pages);
__start_cpu_write(obj);
@@ -800,7 +800,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
goto err_unpin;
@@ -852,7 +853,8 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
goto err_unpin;
@@ -3673,8 +3675,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
list_for_each_entry(vma, &obj->vma_list, obj_link)
vma->node.color = cache_level;
- obj->cache_level = cache_level;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
+ i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true; /* Always invalidate stale cachelines */
return 0;
@@ -4279,6 +4280,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
{
struct drm_i915_gem_object *obj;
struct address_space *mapping;
+ unsigned int cache_level;
gfp_t mask;
int ret;
@@ -4317,7 +4319,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- if (HAS_LLC(dev_priv)) {
+ if (HAS_LLC(dev_priv))
/* On some devices, we can have the GPU use the LLC (the CPU
* cache) for about a 10% performance improvement
* compared to uncached. Graphics requests other than
@@ -4330,12 +4332,11 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
* However, we maintain the display planes as UC, and so
* need to rebind when first used as such.
*/
- obj->cache_level = I915_CACHE_LLC;
- } else
- obj->cache_level = I915_CACHE_NONE;
+ cache_level = I915_CACHE_LLC;
+ else
+ cache_level = I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
trace_i915_gem_object_create(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index 348b29a845c9..8a04d33055be 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -139,7 +139,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
- if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
+ if (!(flags & I915_CLFLUSH_FORCE) &&
+ obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
return false;
trace_i915_gem_object_clflush(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 5fa44767c29e..9d808838a1ba 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1842,7 +1842,13 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
eb->request->capture_list = capture;
}
- if (unlikely(obj->cache_dirty && !obj->cache_coherent)) {
+ /*
+ * If the GPU is not _reading_ through the CPU cache, we need
+ * to make sure that any writes (both previous GPU writes from
+ * before a change in snooping levels and normal CPU writes)
+ * caught in that cache are flushed to main memory.
+ */
+ if (unlikely(obj->cache_coherent & obj->cache_dirty)) {
if (i915_gem_clflush_object(obj, 0))
entry->flags &= ~EXEC_OBJECT_ASYNC;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index 568bf83af1f5..c1f64ddaf8aa 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -174,6 +174,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
phys_addr_t size)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
GEM_BUG_ON(!size);
GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
@@ -190,9 +191,9 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+
+ cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
return obj;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.c b/drivers/gpu/drm/i915/i915_gem_object.c
new file mode 100644
index 000000000000..67fdd611d262
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_object.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+
+static inline bool
+i915_gem_object_is_coherent(struct drm_i915_gem_object *obj, bool write)
+{
+ if (obj->cache_level != I915_CACHE_NONE)
+ return true;
+
+ return !write && HAS_LLC(to_i915(obj->base.dev));
+}
+
+/**
+ * Mark up the object's coherency levels for a given cache_level
+ * @obj: #drm_i915_gem_object
+ * @cache_level: cache level
+ */
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+ unsigned int cache_level)
+{
+ obj->cache_level = cache_level;
+
+ if (cache_level != I915_CACHE_NONE) {
+ obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
+ I915_BO_CACHE_COHERENT_FOR_WRITE);
+ } else if (HAS_LLC(to_i915(obj->base.dev))) {
+ obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
+ } else {
+ obj->cache_coherent = 0;
+ }
+
+ obj->cache_dirty =
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 5b19a4916a4d..3baa341432db 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -33,8 +33,11 @@
#include <drm/i915_drm.h>
+#include "i915_gem_request.h"
#include "i915_selftest.h"
+struct drm_i915_gem_object;
+
struct drm_i915_gem_object_ops {
unsigned int flags;
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
@@ -118,8 +121,10 @@ struct drm_i915_gem_object {
*/
unsigned long gt_ro:1;
unsigned int cache_level:3;
+ unsigned int cache_coherent:2;
+#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
+#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
unsigned int cache_dirty:1;
- unsigned int cache_coherent:1;
atomic_t frontbuffer_bits;
unsigned int frontbuffer_ggtt_origin; /* write once */
@@ -391,6 +396,8 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
return engine;
}
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+ unsigned int cache_level);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index c11c915382e7..507c9f0d8df1 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -580,6 +580,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
struct drm_mm_node *stolen)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
obj = i915_gem_object_alloc(dev_priv);
if (obj == NULL)
@@ -590,8 +591,8 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
obj->stolen = stolen;
obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
- obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = true; /* assumptions! more like cache_oblivious */
+ cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
if (i915_gem_object_pin_pages(obj))
goto cleanup;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index ccd09e8419f5..f152a38d7079 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -804,9 +804,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
i915_gem_object_init(obj, &i915_gem_userptr_ops);
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = I915_CACHE_LLC;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
obj->userptr.ptr = args->user_ptr;
obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
index caf76af36aba..c5c7e8efbdd3 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
@@ -111,6 +111,7 @@ huge_gem_object(struct drm_i915_private *i915,
dma_addr_t dma_size)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
GEM_BUG_ON(!phys_size || phys_size > dma_size);
GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE));
@@ -128,9 +129,8 @@ huge_gem_object(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
obj->scratch = phys_size;
return obj;
--
2.13.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v3] drm/i915: Split obj->cache_coherent to track r/w
2017-08-10 16:20 [PATCH] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
` (3 preceding siblings ...)
2017-08-11 10:11 ` [PATCH v2] " Chris Wilson
@ 2017-08-11 10:23 ` Chris Wilson
2017-08-11 10:55 ` ✗ Fi.CI.BAT: failure for drm/i915: Split obj->cache_coherent to track r/w (rev3) Patchwork
` (2 subsequent siblings)
7 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2017-08-11 10:23 UTC (permalink / raw)
To: intel-gfx; +Cc: Dongwon Kim
Another month, another story in the cache coherency saga. This time, we
come to the realisation that i915_gem_object_is_coherent() has been
reporting whether we can read from the target without requiring a cache
invalidate; but we were using it in places for testing whether we could
write into the object without requiring a cache flush. So split the
tracking into two, one to decide before reads, one after writes.
See commit e27ab73d17ef ("drm/i915: Mark CPU cache as dirty on every
transition for CPU writes") for the previous entry in this saga.
v2: Be verbose
v3: Remove unused function (i915_gem_object_is_coherent)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101109
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101555
Testcase: igt/kms_mmap_write_crc
Testcase: igt/kms_pwrite_crc
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Tested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_drv.h | 6 ---
drivers/gpu/drm/i915/i915_gem.c | 25 ++++++------
drivers/gpu/drm/i915/i915_gem_clflush.c | 3 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 +++-
drivers/gpu/drm/i915/i915_gem_internal.c | 7 ++--
drivers/gpu/drm/i915/i915_gem_object.c | 48 ++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_object.h | 9 ++++-
drivers/gpu/drm/i915/i915_gem_stolen.c | 5 ++-
drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +-
drivers/gpu/drm/i915/selftests/huge_gem_object.c | 6 +--
11 files changed, 90 insertions(+), 32 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_gem_object.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f8227318dcaf..892f52b53060 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -39,6 +39,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_gtt.o \
i915_gem_internal.o \
i915_gem.o \
+ i915_gem_object.o \
i915_gem_render_state.o \
i915_gem_request.o \
i915_gem_shrinker.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 907603cba447..6698c706118a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4317,10 +4317,4 @@ int remap_io_mapping(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, unsigned long size,
struct io_mapping *iomap);
-static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj)
-{
- return (obj->cache_level != I915_CACHE_NONE ||
- HAS_LLC(to_i915(obj->base.dev)));
-}
-
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 000a764ee8d9..887fff281f4e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
if (obj->cache_dirty)
return false;
- if (!obj->cache_coherent)
+ if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
return true;
return obj->pin_display;
@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
if (needs_clflush &&
(obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
- !obj->cache_coherent)
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
drm_clflush_sg(pages);
__start_cpu_write(obj);
@@ -800,7 +800,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
goto err_unpin;
@@ -852,7 +853,8 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
goto err_unpin;
@@ -3673,8 +3675,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
list_for_each_entry(vma, &obj->vma_list, obj_link)
vma->node.color = cache_level;
- obj->cache_level = cache_level;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
+ i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true; /* Always invalidate stale cachelines */
return 0;
@@ -4279,6 +4280,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
{
struct drm_i915_gem_object *obj;
struct address_space *mapping;
+ unsigned int cache_level;
gfp_t mask;
int ret;
@@ -4317,7 +4319,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- if (HAS_LLC(dev_priv)) {
+ if (HAS_LLC(dev_priv))
/* On some devices, we can have the GPU use the LLC (the CPU
* cache) for about a 10% performance improvement
* compared to uncached. Graphics requests other than
@@ -4330,12 +4332,11 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
* However, we maintain the display planes as UC, and so
* need to rebind when first used as such.
*/
- obj->cache_level = I915_CACHE_LLC;
- } else
- obj->cache_level = I915_CACHE_NONE;
+ cache_level = I915_CACHE_LLC;
+ else
+ cache_level = I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
trace_i915_gem_object_create(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index 348b29a845c9..8a04d33055be 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -139,7 +139,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
- if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
+ if (!(flags & I915_CLFLUSH_FORCE) &&
+ obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
return false;
trace_i915_gem_object_clflush(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 5fa44767c29e..9d808838a1ba 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1842,7 +1842,13 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
eb->request->capture_list = capture;
}
- if (unlikely(obj->cache_dirty && !obj->cache_coherent)) {
+ /*
+ * If the GPU is not _reading_ through the CPU cache, we need
+ * to make sure that any writes (both previous GPU writes from
+ * before a change in snooping levels and normal CPU writes)
+ * caught in that cache are flushed to main memory.
+ */
+ if (unlikely(obj->cache_coherent & obj->cache_dirty)) {
if (i915_gem_clflush_object(obj, 0))
entry->flags &= ~EXEC_OBJECT_ASYNC;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index 568bf83af1f5..c1f64ddaf8aa 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -174,6 +174,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
phys_addr_t size)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
GEM_BUG_ON(!size);
GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
@@ -190,9 +191,9 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+
+ cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
return obj;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.c b/drivers/gpu/drm/i915/i915_gem_object.c
new file mode 100644
index 000000000000..aab8cdd80e6d
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_object.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+
+/**
+ * Mark up the object's coherency levels for a given cache_level
+ * @obj: #drm_i915_gem_object
+ * @cache_level: cache level
+ */
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+ unsigned int cache_level)
+{
+ obj->cache_level = cache_level;
+
+ if (cache_level != I915_CACHE_NONE)
+ obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
+ I915_BO_CACHE_COHERENT_FOR_WRITE);
+ else if (HAS_LLC(to_i915(obj->base.dev)))
+ obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
+ else
+ obj->cache_coherent = 0;
+
+ obj->cache_dirty =
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 5b19a4916a4d..3baa341432db 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -33,8 +33,11 @@
#include <drm/i915_drm.h>
+#include "i915_gem_request.h"
#include "i915_selftest.h"
+struct drm_i915_gem_object;
+
struct drm_i915_gem_object_ops {
unsigned int flags;
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
@@ -118,8 +121,10 @@ struct drm_i915_gem_object {
*/
unsigned long gt_ro:1;
unsigned int cache_level:3;
+ unsigned int cache_coherent:2;
+#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
+#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
unsigned int cache_dirty:1;
- unsigned int cache_coherent:1;
atomic_t frontbuffer_bits;
unsigned int frontbuffer_ggtt_origin; /* write once */
@@ -391,6 +396,8 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
return engine;
}
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+ unsigned int cache_level);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index c11c915382e7..507c9f0d8df1 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -580,6 +580,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
struct drm_mm_node *stolen)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
obj = i915_gem_object_alloc(dev_priv);
if (obj == NULL)
@@ -590,8 +591,8 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
obj->stolen = stolen;
obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
- obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = true; /* assumptions! more like cache_oblivious */
+ cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
if (i915_gem_object_pin_pages(obj))
goto cleanup;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index ccd09e8419f5..f152a38d7079 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -804,9 +804,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
i915_gem_object_init(obj, &i915_gem_userptr_ops);
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = I915_CACHE_LLC;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
obj->userptr.ptr = args->user_ptr;
obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
index caf76af36aba..c5c7e8efbdd3 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
@@ -111,6 +111,7 @@ huge_gem_object(struct drm_i915_private *i915,
dma_addr_t dma_size)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
GEM_BUG_ON(!phys_size || phys_size > dma_size);
GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE));
@@ -128,9 +129,8 @@ huge_gem_object(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
obj->scratch = phys_size;
return obj;
--
2.13.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 12+ messages in thread
* ✗ Fi.CI.BAT: failure for drm/i915: Split obj->cache_coherent to track r/w (rev3)
2017-08-10 16:20 [PATCH] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
` (4 preceding siblings ...)
2017-08-11 10:23 ` [PATCH v3] " Chris Wilson
@ 2017-08-11 10:55 ` Patchwork
2017-08-11 11:11 ` [PATCH v4] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
2017-08-11 11:44 ` ✓ Fi.CI.BAT: success for drm/i915: Split obj->cache_coherent to track r/w (rev4) Patchwork
7 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2017-08-11 10:55 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: Split obj->cache_coherent to track r/w (rev3)
URL : https://patchwork.freedesktop.org/series/28641/
State : failure
== Summary ==
Series 28641v3 drm/i915: Split obj->cache_coherent to track r/w
https://patchwork.freedesktop.org/api/1.0/series/28641/revisions/3/mbox/
Test core_auth:
Subgroup basic-auth:
pass -> INCOMPLETE (fi-bxt-j4205)
Test drv_getparams_basic:
Subgroup basic-eu-total:
pass -> INCOMPLETE (fi-pnv-d510)
Test drv_hangman:
Subgroup error-state-basic:
pass -> FAIL (fi-blb-e6850)
Test gem_busy:
Subgroup basic-busy-default:
pass -> FAIL (fi-ilk-650)
pass -> FAIL (fi-byt-n2820)
pass -> FAIL (fi-bsw-n3050)
pass -> SKIP (fi-glk-2a)
Subgroup basic-hang-default:
pass -> SKIP (fi-glk-2a)
Test gem_close_race:
Subgroup basic-threads:
pass -> DMESG-WARN (fi-blb-e6850)
Test gem_cpu_reloc:
Subgroup basic:
pass -> INCOMPLETE (fi-ilk-650)
pass -> FAIL (fi-byt-n2820)
pass -> INCOMPLETE (fi-bsw-n3050)
Test gem_cs_tlb:
Subgroup basic-default:
pass -> INCOMPLETE (fi-glk-2a)
Test gem_ctx_switch:
Subgroup basic-default-heavy:
pass -> FAIL (fi-byt-n2820)
Test gem_exec_create:
Subgroup basic:
pass -> FAIL (fi-blb-e6850)
pass -> FAIL (fi-byt-n2820)
Test gem_exec_fence:
Subgroup basic-busy-default:
pass -> FAIL (fi-byt-n2820)
Subgroup basic-wait-default:
pass -> FAIL (fi-byt-n2820)
Subgroup basic-await-default:
pass -> FAIL (fi-blb-e6850)
Subgroup nb-await-default:
pass -> FAIL (fi-blb-e6850)
Test gem_exec_flush:
Subgroup basic-batch-kernel-default-cmd:
pass -> FAIL (fi-byt-n2820)
Subgroup basic-batch-kernel-default-uc:
pass -> FAIL (fi-blb-e6850)
pass -> FAIL (fi-snb-2600) fdo#100007
pass -> FAIL (fi-byt-n2820)
Subgroup basic-batch-kernel-default-wb:
pass -> FAIL (fi-blb-e6850)
pass -> FAIL (fi-byt-n2820)
Subgroup basic-uc-pro-default:
pass -> FAIL (fi-blb-e6850)
Subgroup basic-uc-prw-default:
pass -> FAIL (fi-byt-n2820)
Subgroup basic-uc-set-default:
pass -> FAIL (fi-byt-n2820)
Subgroup basic-wb-pro-default:
pass -> FAIL (fi-blb-e6850)
pass -> FAIL (fi-byt-n2820)
Subgroup basic-wb-ro-before-default:
pass -> FAIL (fi-blb-e6850)
Subgroup basic-wb-rw-before-default:
pass -> FAIL (fi-blb-e6850)
pass -> FAIL (fi-byt-n2820)
Subgroup basic-wb-rw-default:
pass -> FAIL (fi-blb-e6850)
Subgroup basic-wb-set-default:
pass -> FAIL (fi-byt-n2820)
Test gem_exec_gttfill:
Subgroup basic:
pass -> FAIL (fi-blb-e6850)
pass -> FAIL (fi-byt-n2820)
Test gem_exec_nop:
Subgroup basic-parallel:
pass -> FAIL (fi-blb-e6850)
Subgroup basic-series:
pass -> FAIL (fi-blb-e6850)
pass -> FAIL (fi-byt-n2820)
Test gem_exec_parallel:
Subgroup basic:
pass -> FAIL (fi-blb-e6850)
pass -> FAIL (fi-byt-n2820)
Test gem_exec_parse:
Subgroup basic-allowed:
pass -> FAIL (fi-byt-n2820)
Test gem_exec_reloc:
Subgroup basic-cpu-gtt:
pass -> FAIL (fi-blb-e6850)
pass -> FAIL (fi-byt-n2820)
Subgroup basic-write-gtt:
pass -> FAIL (fi-blb-e6850)
pass -> FAIL (fi-byt-n2820)
Subgroup basic-cpu-gtt-noreloc:
WARNING: Long output truncated
fbb8288699ef622bbfc6e10bdca6773a16f93fac drm-tip: 2017y-08m-11d-09h-03m-47s UTC integration manifest
cd60f3a64b03 drm/i915: Split obj->cache_coherent to track r/w
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5378/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v4] drm/i915: Split obj->cache_coherent to track r/w
2017-08-10 16:20 [PATCH] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
` (5 preceding siblings ...)
2017-08-11 10:55 ` ✗ Fi.CI.BAT: failure for drm/i915: Split obj->cache_coherent to track r/w (rev3) Patchwork
@ 2017-08-11 11:11 ` Chris Wilson
2017-08-15 14:34 ` Joonas Lahtinen
2017-08-11 11:44 ` ✓ Fi.CI.BAT: success for drm/i915: Split obj->cache_coherent to track r/w (rev4) Patchwork
7 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2017-08-11 11:11 UTC (permalink / raw)
To: intel-gfx; +Cc: Dongwon Kim
Another month, another story in the cache coherency saga. This time, we
come to the realisation that i915_gem_object_is_coherent() has been
reporting whether we can read from the target without requiring a cache
invalidate; but we were using it in places for testing whether we could
write into the object without requiring a cache flush. So split the
tracking into two, one to decide before reads, one after writes.
See commit e27ab73d17ef ("drm/i915: Mark CPU cache as dirty on every
transition for CPU writes") for the previous entry in this saga.
v2: Be verbose
v3: Remove unused function (i915_gem_object_is_coherent)
v4: Fix inverted coherency check prior to execbuf (from v2)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101109
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101555
Testcase: igt/kms_mmap_write_crc
Testcase: igt/kms_pwrite_crc
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Tested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_drv.h | 6 ---
drivers/gpu/drm/i915/i915_gem.c | 25 ++++++------
drivers/gpu/drm/i915/i915_gem_clflush.c | 3 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 +++-
drivers/gpu/drm/i915/i915_gem_internal.c | 7 ++--
drivers/gpu/drm/i915/i915_gem_object.c | 48 ++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_object.h | 9 ++++-
drivers/gpu/drm/i915/i915_gem_stolen.c | 5 ++-
drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +-
drivers/gpu/drm/i915/selftests/huge_gem_object.c | 6 +--
11 files changed, 90 insertions(+), 32 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_gem_object.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f8227318dcaf..892f52b53060 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -39,6 +39,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_gtt.o \
i915_gem_internal.o \
i915_gem.o \
+ i915_gem_object.o \
i915_gem_render_state.o \
i915_gem_request.o \
i915_gem_shrinker.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4f9f7b6ac276..85fdb884ef02 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4322,10 +4322,4 @@ int remap_io_mapping(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, unsigned long size,
struct io_mapping *iomap);
-static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj)
-{
- return (obj->cache_level != I915_CACHE_NONE ||
- HAS_LLC(to_i915(obj->base.dev)));
-}
-
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dbda7d078245..5a3f3bb3f21d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
if (obj->cache_dirty)
return false;
- if (!obj->cache_coherent)
+ if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
return true;
return obj->pin_display;
@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
if (needs_clflush &&
(obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
- !obj->cache_coherent)
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
drm_clflush_sg(pages);
__start_cpu_write(obj);
@@ -800,7 +800,8 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
goto err_unpin;
@@ -852,7 +853,8 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
goto err_unpin;
@@ -3677,8 +3679,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
list_for_each_entry(vma, &obj->vma_list, obj_link)
vma->node.color = cache_level;
- obj->cache_level = cache_level;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
+ i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true; /* Always invalidate stale cachelines */
return 0;
@@ -4283,6 +4284,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
{
struct drm_i915_gem_object *obj;
struct address_space *mapping;
+ unsigned int cache_level;
gfp_t mask;
int ret;
@@ -4321,7 +4323,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
- if (HAS_LLC(dev_priv)) {
+ if (HAS_LLC(dev_priv))
/* On some devices, we can have the GPU use the LLC (the CPU
* cache) for about a 10% performance improvement
* compared to uncached. Graphics requests other than
@@ -4334,12 +4336,11 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
* However, we maintain the display planes as UC, and so
* need to rebind when first used as such.
*/
- obj->cache_level = I915_CACHE_LLC;
- } else
- obj->cache_level = I915_CACHE_NONE;
+ cache_level = I915_CACHE_LLC;
+ else
+ cache_level = I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
trace_i915_gem_object_create(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index 348b29a845c9..8a04d33055be 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -139,7 +139,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
- if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
+ if (!(flags & I915_CLFLUSH_FORCE) &&
+ obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
return false;
trace_i915_gem_object_clflush(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 5fa44767c29e..cc67fa182ff8 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1842,7 +1842,13 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
eb->request->capture_list = capture;
}
- if (unlikely(obj->cache_dirty && !obj->cache_coherent)) {
+ /*
+ * If the GPU is not _reading_ through the CPU cache, we need
+ * to make sure that any writes (both previous GPU writes from
+ * before a change in snooping levels and normal CPU writes)
+ * caught in that cache are flushed to main memory.
+ */
+ if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
if (i915_gem_clflush_object(obj, 0))
entry->flags &= ~EXEC_OBJECT_ASYNC;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index 568bf83af1f5..c1f64ddaf8aa 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -174,6 +174,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
phys_addr_t size)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
GEM_BUG_ON(!size);
GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
@@ -190,9 +191,9 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+
+ cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
return obj;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.c b/drivers/gpu/drm/i915/i915_gem_object.c
new file mode 100644
index 000000000000..aab8cdd80e6d
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_object.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+
+/**
+ * Mark up the object's coherency levels for a given cache_level
+ * @obj: #drm_i915_gem_object
+ * @cache_level: cache level
+ */
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+ unsigned int cache_level)
+{
+ obj->cache_level = cache_level;
+
+ if (cache_level != I915_CACHE_NONE)
+ obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
+ I915_BO_CACHE_COHERENT_FOR_WRITE);
+ else if (HAS_LLC(to_i915(obj->base.dev)))
+ obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
+ else
+ obj->cache_coherent = 0;
+
+ obj->cache_dirty =
+ !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 5b19a4916a4d..3baa341432db 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -33,8 +33,11 @@
#include <drm/i915_drm.h>
+#include "i915_gem_request.h"
#include "i915_selftest.h"
+struct drm_i915_gem_object;
+
struct drm_i915_gem_object_ops {
unsigned int flags;
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
@@ -118,8 +121,10 @@ struct drm_i915_gem_object {
*/
unsigned long gt_ro:1;
unsigned int cache_level:3;
+ unsigned int cache_coherent:2;
+#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
+#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
unsigned int cache_dirty:1;
- unsigned int cache_coherent:1;
atomic_t frontbuffer_bits;
unsigned int frontbuffer_ggtt_origin; /* write once */
@@ -391,6 +396,8 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
return engine;
}
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+ unsigned int cache_level);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index c11c915382e7..507c9f0d8df1 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -580,6 +580,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
struct drm_mm_node *stolen)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
obj = i915_gem_object_alloc(dev_priv);
if (obj == NULL)
@@ -590,8 +591,8 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
obj->stolen = stolen;
obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
- obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = true; /* assumptions! more like cache_oblivious */
+ cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
if (i915_gem_object_pin_pages(obj))
goto cleanup;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index ccd09e8419f5..f152a38d7079 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -804,9 +804,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
i915_gem_object_init(obj, &i915_gem_userptr_ops);
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = I915_CACHE_LLC;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
obj->userptr.ptr = args->user_ptr;
obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
index caf76af36aba..c5c7e8efbdd3 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
@@ -111,6 +111,7 @@ huge_gem_object(struct drm_i915_private *i915,
dma_addr_t dma_size)
{
struct drm_i915_gem_object *obj;
+ unsigned int cache_level;
GEM_BUG_ON(!phys_size || phys_size > dma_size);
GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE));
@@ -128,9 +129,8 @@ huge_gem_object(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_coherent = i915_gem_object_is_coherent(obj);
- obj->cache_dirty = !obj->cache_coherent;
+ cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ i915_gem_object_set_cache_coherency(obj, cache_level);
obj->scratch = phys_size;
return obj;
--
2.13.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2] drm/i915: Split obj->cache_coherent to track r/w
2017-08-11 10:11 ` [PATCH v2] " Chris Wilson
@ 2017-08-11 11:12 ` Chris Wilson
0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2017-08-11 11:12 UTC (permalink / raw)
To: intel-gfx; +Cc: Dongwon Kim
Quoting Chris Wilson (2017-08-11 11:11:31)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 5fa44767c29e..9d808838a1ba 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1842,7 +1842,13 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
> eb->request->capture_list = capture;
> }
>
> - if (unlikely(obj->cache_dirty && !obj->cache_coherent)) {
> + /*
> + * If the GPU is not _reading_ through the CPU cache, we need
> + * to make sure that any writes (both previous GPU writes from
> + * before a change in snooping levels and normal CPU writes)
> + * caught in that cache are flushed to main memory.
> + */
> + if (unlikely(obj->cache_coherent & obj->cache_dirty)) {
Failure in caffeination.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* ✓ Fi.CI.BAT: success for drm/i915: Split obj->cache_coherent to track r/w (rev4)
2017-08-10 16:20 [PATCH] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
` (6 preceding siblings ...)
2017-08-11 11:11 ` [PATCH v4] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
@ 2017-08-11 11:44 ` Patchwork
7 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2017-08-11 11:44 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: Split obj->cache_coherent to track r/w (rev4)
URL : https://patchwork.freedesktop.org/series/28641/
State : success
== Summary ==
Series 28641v4 drm/i915: Split obj->cache_coherent to track r/w
https://patchwork.freedesktop.org/api/1.0/series/28641/revisions/4/mbox/
fi-bdw-5557u total:279 pass:268 dwarn:0 dfail:0 fail:0 skip:11 time:456s
fi-bdw-gvtdvm total:279 pass:265 dwarn:0 dfail:0 fail:0 skip:14 time:433s
fi-blb-e6850 total:279 pass:224 dwarn:1 dfail:0 fail:0 skip:54 time:354s
fi-bsw-n3050 total:279 pass:243 dwarn:0 dfail:0 fail:0 skip:36 time:542s
fi-bxt-j4205 total:279 pass:260 dwarn:0 dfail:0 fail:0 skip:19 time:517s
fi-byt-j1900 total:279 pass:254 dwarn:1 dfail:0 fail:0 skip:24 time:525s
fi-byt-n2820 total:279 pass:250 dwarn:1 dfail:0 fail:0 skip:28 time:509s
fi-glk-2a total:279 pass:260 dwarn:0 dfail:0 fail:0 skip:19 time:612s
fi-hsw-4770 total:279 pass:263 dwarn:0 dfail:0 fail:0 skip:16 time:445s
fi-hsw-4770r total:279 pass:263 dwarn:0 dfail:0 fail:0 skip:16 time:418s
fi-ilk-650 total:279 pass:229 dwarn:0 dfail:0 fail:0 skip:50 time:425s
fi-ivb-3520m total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:506s
fi-ivb-3770 total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:487s
fi-kbl-7500u total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:473s
fi-kbl-7560u total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:590s
fi-kbl-r total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:592s
fi-pnv-d510 total:279 pass:223 dwarn:1 dfail:0 fail:0 skip:55 time:526s
fi-skl-6260u total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:455s
fi-skl-6700k total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:472s
fi-skl-6770hq total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:480s
fi-skl-gvtdvm total:279 pass:266 dwarn:0 dfail:0 fail:0 skip:13 time:438s
fi-skl-x1585l total:279 pass:268 dwarn:0 dfail:0 fail:0 skip:11 time:481s
fi-snb-2520m total:279 pass:251 dwarn:0 dfail:0 fail:0 skip:28 time:550s
fi-snb-2600 total:279 pass:250 dwarn:0 dfail:0 fail:0 skip:29 time:408s
fbb8288699ef622bbfc6e10bdca6773a16f93fac drm-tip: 2017y-08m-11d-09h-03m-47s UTC integration manifest
4c222cfa814d drm/i915: Split obj->cache_coherent to track r/w
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_5380/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v4] drm/i915: Split obj->cache_coherent to track r/w
2017-08-11 11:11 ` [PATCH v4] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
@ 2017-08-15 14:34 ` Joonas Lahtinen
2017-08-15 14:51 ` Chris Wilson
0 siblings, 1 reply; 12+ messages in thread
From: Joonas Lahtinen @ 2017-08-15 14:34 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Dongwon Kim
On Fri, 2017-08-11 at 12:11 +0100, Chris Wilson wrote:
> Another month, another story in the cache coherency saga. This time, we
> come to the realisation that i915_gem_object_is_coherent() has been
> reporting whether we can read from the target without requiring a cache
> invalidate; but we were using it in places for testing whether we could
> write into the object without requiring a cache flush. So split the
> tracking into two, one to decide before reads, one after writes.
>
> See commit e27ab73d17ef ("drm/i915: Mark CPU cache as dirty on every
> transition for CPU writes") for the previous entry in this saga.
>
> v2: Be verbose
> v3: Remove unused function (i915_gem_object_is_coherent)
> v4: Fix inverted coherency check prior to execbuf (from v2)
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101109
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101555
> Testcase: igt/kms_mmap_write_crc
> Testcase: igt/kms_pwrite_crc
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Dongwon Kim <dongwon.kim@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Tested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
<SNIP>
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1842,7 +1842,13 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
> eb->request->capture_list = capture;
> }
>
> - if (unlikely(obj->cache_dirty && !obj->cache_coherent)) {
> + /*
> + * If the GPU is not _reading_ through the CPU cache, we need
> + * to make sure that any writes (both previous GPU writes from
> + * before a change in snooping levels and normal CPU writes)
> + * caught in that cache are flushed to main memory.
> + */
> + if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
I'd rather see this as "obj->cache_dirty && !(obj->cache_coherent &
.._READ)" but if GCC is doing such a poor job, do add a comment here,
then this is:
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Regards, Joonas
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v4] drm/i915: Split obj->cache_coherent to track r/w
2017-08-15 14:34 ` Joonas Lahtinen
@ 2017-08-15 14:51 ` Chris Wilson
0 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2017-08-15 14:51 UTC (permalink / raw)
To: Joonas Lahtinen, intel-gfx; +Cc: Dongwon Kim
Quoting Joonas Lahtinen (2017-08-15 15:34:38)
> On Fri, 2017-08-11 at 12:11 +0100, Chris Wilson wrote:
> > Another month, another story in the cache coherency saga. This time, we
> > come to the realisation that i915_gem_object_is_coherent() has been
> > reporting whether we can read from the target without requiring a cache
> > invalidate; but we were using it in places for testing whether we could
> > write into the object without requiring a cache flush. So split the
> > tracking into two, one to decide before reads, one after writes.
> >
> > See commit e27ab73d17ef ("drm/i915: Mark CPU cache as dirty on every
> > transition for CPU writes") for the previous entry in this saga.
> >
> > v2: Be verbose
> > v3: Remove unused function (i915_gem_object_is_coherent)
> > v4: Fix inverted coherency check prior to execbuf (from v2)
> >
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101109
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101555
> > Testcase: igt/kms_mmap_write_crc
> > Testcase: igt/kms_pwrite_crc
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > Cc: Dongwon Kim <dongwon.kim@intel.com>
> > Cc: Matt Roper <matthew.d.roper@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Tested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>
> <SNIP>
>
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -1842,7 +1842,13 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
> > eb->request->capture_list = capture;
> > }
> >
> > - if (unlikely(obj->cache_dirty && !obj->cache_coherent)) {
> > + /*
> > + * If the GPU is not _reading_ through the CPU cache, we need
> > + * to make sure that any writes (both previous GPU writes from
> > + * before a change in snooping levels and normal CPU writes)
> > + * caught in that cache are flushed to main memory.
> > + */
> > + if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
>
> I'd rather see this as "obj->cache_dirty && !(obj->cache_coherent &
> .._READ)" but if GCC is doing such a poor job, do add a comment here,
> then this is:
Added the comment, should tie nicely into any grepping if we need.
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Thanks, pushed. Now to poke you for some late execbuf patches...
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2017-08-15 14:51 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-10 16:20 [PATCH] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
2017-08-10 16:53 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-08-11 7:26 ` [PATCH] " Maarten Lankhorst
2017-08-11 8:08 ` Joonas Lahtinen
2017-08-11 10:11 ` [PATCH v2] " Chris Wilson
2017-08-11 11:12 ` Chris Wilson
2017-08-11 10:23 ` [PATCH v3] " Chris Wilson
2017-08-11 10:55 ` ✗ Fi.CI.BAT: failure for drm/i915: Split obj->cache_coherent to track r/w (rev3) Patchwork
2017-08-11 11:11 ` [PATCH v4] drm/i915: Split obj->cache_coherent to track r/w Chris Wilson
2017-08-15 14:34 ` Joonas Lahtinen
2017-08-15 14:51 ` Chris Wilson
2017-08-11 11:44 ` ✓ Fi.CI.BAT: success for drm/i915: Split obj->cache_coherent to track r/w (rev4) Patchwork
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.