All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
@ 2017-03-06  9:29 Chris Wilson
  2017-03-06  9:29 ` [PATCH 2/3] drm/i915: Wake up all waiters before idling Chris Wilson
                   ` (5 more replies)
  0 siblings, 6 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-06  9:29 UTC (permalink / raw)
  To: intel-gfx

Before we instantiate/pin the backing store for our use, we
can prepopulate the shmemfs filp efficiently using the a
write into the pagecache. We avoid the penalty of instantiating
all the pages, important if the user is just writing to a few
and never uses the object on the GPU, and using a direct write
into shmemfs allows it to avoid the cost of retrieving a page
(either swapin or clearing-before-use) before it is overwritten.

This can be extended later to provide additional specialisation for
other backends (other than shmemfs).

References: https://bugs.freedesktop.org/show_bug.cgi?id=99107
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c        | 78 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_object.h |  3 ++
 2 files changed, 81 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7c20601fe1de..73d5cee23dd7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1452,6 +1452,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 
 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 
+	ret = -ENODEV;
+	if (obj->ops->pwrite)
+		ret = obj->ops->pwrite(obj, args);
+	if (ret != -ENODEV)
+		goto err;
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   I915_WAIT_ALL,
@@ -2575,6 +2581,75 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	goto out_unlock;
 }
 
+static int
+i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
+			   const struct drm_i915_gem_pwrite *arg)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+	u64 remain, offset;
+	unsigned int pg;
+
+	/* Before we instantiate/pin the backing store for our use, we
+	 * can prepopulate the shmemfs filp efficiently using the a
+	 * write into the pagecache. We avoid the penalty of instantiating
+	 * all the pages, important if the user is just writing to a few
+	 * and never uses the object on the GPU, and using a direct write
+	 * into shmemfs allows it to avoid the cost of retrieving a page
+	 * (either swapin or clearing-before-use) before it is overwritten.
+	 */
+	if (READ_ONCE(obj->mm.pages))
+		return -ENODEV;
+
+	/* Before the pages are instantioted the object is treated as being
+	 * in the CPU domain. The pages will be clflushed as required before
+	 * use, and we can freely write into the pages directly. If userspace
+	 * races pwrite with any other operation; corruption will ensue -
+	 * that is userspace's perogative!
+	 */
+
+	remain = arg->size;
+	offset = arg->offset;
+	pg = offset_in_page(offset);
+
+	do {
+		unsigned int len, unwritten;
+		struct page *page;
+		void *data, *vaddr;
+		int err;
+
+		len = PAGE_SIZE - pg;
+		if (len > remain)
+			len = remain;
+
+		err = pagecache_write_begin(obj->base.filp, mapping,
+					    offset, len,
+					    0, &page, &data);
+		if (err < 0)
+			return err;
+
+		vaddr = kmap(page);
+		unwritten = copy_from_user(vaddr + pg, user_data, len);
+		kunmap(page);
+
+		err = pagecache_write_end(obj->base.filp, mapping, offset,
+					  len, len - unwritten,
+					  page, data);
+		if (err < 0)
+			return err;
+
+		if (unwritten)
+			return -EFAULT;
+
+		remain -= len;
+		user_data += len;
+		offset += len;
+		pg = 0;
+	} while (remain);
+
+	return 0;
+}
+
 static bool ban_context(const struct i915_gem_context *ctx)
 {
 	return (i915_gem_context_is_bannable(ctx) &&
@@ -3991,8 +4066,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
 	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
 		 I915_GEM_OBJECT_IS_SHRINKABLE,
+
 	.get_pages = i915_gem_object_get_pages_gtt,
 	.put_pages = i915_gem_object_put_pages_gtt,
+
+	.pwrite = i915_gem_object_pwrite_gtt,
 };
 
 struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 33b0dc4782a9..d26155e0a026 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -56,6 +56,9 @@ struct drm_i915_gem_object_ops {
 	struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
 
+	int (*pwrite)(struct drm_i915_gem_object *,
+		      const struct drm_i915_gem_pwrite *);
+
 	int (*dmabuf_export)(struct drm_i915_gem_object *);
 	void (*release)(struct drm_i915_gem_object *);
 };
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/3] drm/i915: Wake up all waiters before idling
  2017-03-06  9:29 [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Chris Wilson
@ 2017-03-06  9:29 ` Chris Wilson
  2017-03-06 11:02   ` Chris Wilson
  2017-03-06  9:29 ` [PATCH 3/3] drm/i915: Take rpm wakelock for releasing the fence on unbind Chris Wilson
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2017-03-06  9:29 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

When we idle, we wakeup the first waiter (checking to see if it missed
an earlier wakeup) and disarm the breadcrumbs. However, we now assert
that there are no waiter when the interrupt is disabled, triggering an
assert if there were multiple waiters when we idled.

[  420.842275] invalid opcode: 0000 [#1] PREEMPT SMP
[  420.842285] Modules linked in: vgem snd_hda_codec_realtek x86_pkg_temp_thermal snd_hda_codec_generic intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep mei_me snd_hda_core mei snd_pcm lpc_ich i915 r8169 mii prime_numbers
[  420.842357] CPU: 4 PID: 8714 Comm: kms_pipe_crc_ba Tainted: G     U  W       4.10.0-CI-CI_DRM_2280+ #1
[  420.842377] Hardware name: Hewlett-Packard HP Pro 3500 Series/2ABF, BIOS 8.11 10/24/2012
[  420.842395] task: ffff880117ddce40 task.stack: ffffc90001114000
[  420.842439] RIP: 0010:__intel_engine_remove_wait+0x1f4/0x200 [i915]
[  420.842454] RSP: 0018:ffffc90001117b18 EFLAGS: 00010046
[  420.842467] RAX: 0000000000000000 RBX: ffff88010c25c2a8 RCX: 0000000000000001
[  420.842481] RDX: 0000000000000001 RSI: 00000000ffffffff RDI: ffffc90001117c50
[  420.842495] RBP: ffffc90001117b58 R08: 0000000011e52352 R09: c4d16acc00000000
[  420.842511] R10: ffffffff82789eb0 R11: ffff880117ddce40 R12: ffffc90001117c50
[  420.842525] R13: ffffc90001117c50 R14: 0000000000000078 R15: 0000000000000000
[  420.842540] FS:  00007fe47dda0a40(0000) GS:ffff88011fb00000(0000) knlGS:0000000000000000
[  420.842559] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  420.842571] CR2: 00007fd6c0a2cec4 CR3: 000000010a5e5000 CR4: 00000000001406e0
[  420.842586] Call Trace:
[  420.842595]  ? do_raw_spin_lock+0xad/0xb0
[  420.842635]  intel_engine_remove_wait.part.3+0x26/0x40 [i915]
[  420.842678]  intel_engine_remove_wait+0xe/0x20 [i915]
[  420.842721]  i915_wait_request+0x4f0/0x8c0 [i915]
[  420.842736]  ? wake_up_q+0x70/0x70
[  420.842747]  ? wake_up_q+0x70/0x70
[  420.842787]  i915_gem_object_wait_fence+0x7d/0x1a0 [i915]
[  420.842829]  i915_gem_object_wait+0x30d/0x520 [i915]
[  420.842842]  ? __this_cpu_preempt_check+0x13/0x20
[  420.842884]  i915_gem_wait_ioctl+0x12e/0x2e0 [i915]
[  420.842924]  ? i915_gem_wait_ioctl+0x22/0x2e0 [i915]
[  420.842939]  drm_ioctl+0x200/0x450
[  420.842976]  ? i915_gem_set_wedged+0x90/0x90 [i915]
[  420.842993]  do_vfs_ioctl+0x90/0x6e0
[  420.843003]  ? entry_SYSCALL_64_fastpath+0x5/0xb1
[  420.843017]  ? __this_cpu_preempt_check+0x13/0x20
[  420.843030]  ? trace_hardirqs_on_caller+0xe7/0x200
[  420.843042]  SyS_ioctl+0x3c/0x70
[  420.843054]  entry_SYSCALL_64_fastpath+0x1c/0xb1
[  420.843065] RIP: 0033:0x7fe47c4b9357
[  420.843075] RSP: 002b:00007ffc3c0633c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  420.843094] RAX: ffffffffffffffda RBX: ffffffff81482393 RCX: 00007fe47c4b9357
[  420.843109] RDX: 00007ffc3c063400 RSI: 00000000c010646c RDI: 0000000000000004
[  420.843123] RBP: ffffc90001117f88 R08: 0000000000000008 R09: 0000000000000000
[  420.843137] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[  420.843151] R13: 0000000000000004 R14: 00000000c010646c R15: 0000000000000000
[  420.843168]  ? __this_cpu_preempt_check+0x13/0x20
[  420.843180] Code: 81 48 c7 c1 40 6a 16 a0 48 c7 c2 47 29 15 a0 be 17 01 00 00 48 c7 c7 10 6a 16 a0 e8 c7 ea fe e0 e9 5d ff ff ff 0f 0b 0f 0b 0f 0b <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 e8 67 41 7e e1
[  420.843325] RIP: __intel_engine_remove_wait+0x1f4/0x200 [i915] RSP: ffffc90001117b18

Fixes: b66255f0f779 ("drm/i915: Refactor wakeup of the next breadcrumb waiter")
Fixes: 67b807a89230 ("drm/i915: Delay disabling the user interrupt for breadcrumbs")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 2393bb9fe665..cf402cafa3ac 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -168,6 +168,7 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
 	lockdep_assert_held(&b->irq_lock);
+	GEM_BUG_ON(b->irq_wait);
 
 	if (b->irq_enabled) {
 		irq_disable(engine);
@@ -180,23 +181,30 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	unsigned long flags;
+	struct intel_wait *wait, *n;
 
 	if (!b->irq_armed)
 		return;
 
-	spin_lock_irqsave(&b->irq_lock, flags);
-
 	/* We only disarm the irq when we are idle (all requests completed),
 	 * so if there remains a sleeping waiter, it missed the request
 	 * completion.
 	 */
-	if (__intel_breadcrumbs_wakeup(b) & ENGINE_WAKEUP_ASLEEP)
-		missed_breadcrumb(engine);
 
+	spin_lock_irq(&b->rb_lock);
+	rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) {
+		RB_CLEAR_NODE(&wait->node);
+		if (wake_up_process(wait->tsk) && wait == b->irq_wait)
+			missed_breadcrumb(engine);
+	}
+	b->waiters = RB_ROOT;
+
+	spin_lock(&b->irq_lock);
+	b->irq_wait = NULL;
 	__intel_engine_disarm_breadcrumbs(engine);
+	spin_unlock(&b->irq_lock);
 
-	spin_unlock_irqrestore(&b->irq_lock, flags);
+	spin_unlock_irq(&b->rb_lock);
 }
 
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/3] drm/i915: Take rpm wakelock for releasing the fence on unbind
  2017-03-06  9:29 [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Chris Wilson
  2017-03-06  9:29 ` [PATCH 2/3] drm/i915: Wake up all waiters before idling Chris Wilson
@ 2017-03-06  9:29 ` Chris Wilson
  2017-03-06 14:03   ` Tvrtko Ursulin
  2017-03-06 12:24 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2017-03-06  9:29 UTC (permalink / raw)
  To: intel-gfx

Unbind the vma may happen at any time, outside of the normal GT wakeref.
As such it relies on having a wakeref of its own. However, we can forgo
clearing the register whilst the device is asleep and just mark it as
unused - so that when we do wake up the device, we will clear the unused
fence register (see i915_gem_restore_fences).

[22423.944631] WARNING: CPU: 3 PID: 26178 at drivers/gpu/drm/i915/intel_drv.h:1739 i915_vma_put_fence+0xf3/0x100 [i915]
[22423.946053] RPM wakelock ref not held during HW access
[22423.946056] Modules linked in: vgem(E) i915(E) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) x86_pkg_temp_thermal(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) ghash_clmulni_intel(E) intel_gtt(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) evdev(E) aesni_intel(E) aes_x86_64(E) crypto_simd(E) cryptd(E) glue_helper(E) sysimgblt(E) fb_sys_fops(E) prime_numbers(E) drm(E) efivars(E) mei_me(E) lpc_ich(E) mei(E) mfd_core(E) battery(E) video(E) acpi_pad(E) button(E) tpm_tis(E) tpm_tis_core(E) tpm(E) autofs4(E) i2c_i801(E) thermal(E) fan(E) i2c_designware_platform(E) i2c_designware_core(E)
[22423.946438] CPU: 2 PID: 26178 Comm: gem_concurrent_ Tainted: G            E   4.10.0+ #101
[22423.946513] Hardware name: ��������������������������������� ���������������������������������/���������������������������������, BIOS RYBDWi35.86A.0246.2
[22423.946600] Call Trace:
[22423.946641]  dump_stack+0x68/0x9f
[22423.946703]  __warn+0x107/0x130
[22423.946763]  warn_slowpath_fmt+0xa8/0xe0
[22423.946825]  ? __warn+0x130/0x130
[22423.946868]  ? free_hot_cold_page_list+0x53/0x70
[22423.946942]  ? mark_lock+0xcc/0x7f0
[22423.946997]  ? __lock_is_held+0x84/0x100
[22423.947115]  ? i915_vma_put_fence+0x64/0x100 [i915]
[22423.947224]  i915_vma_put_fence+0xf3/0x100 [i915]
[22423.947335]  i915_vma_unbind+0x4da/0x560 [i915]
[22423.947387]  ? rb_erase+0x812/0x8a0
[22423.947439]  ? kfree+0xa2/0xd0
[22423.947562]  i915_vma_close+0x159/0x180 [i915]
[22423.947674]  intel_ring_free+0x31/0x50 [i915]
[22423.947776]  i915_gem_context_free+0x1ff/0x3d0 [i915]
[22423.947887]  context_close+0x106/0x110 [i915]
[22423.947989]  context_idr_cleanup+0xc/0x10 [i915]
[22423.948041]  idr_for_each+0x14d/0x1d0
[22423.948158]  ? context_close+0x110/0x110 [i915]
[22423.948206]  ? get_from_free_list+0x70/0x70
[22423.948261]  ? __lock_is_held+0x84/0x100
[22423.948325]  ? __mutex_unlock_slowpath+0xd4/0x400
[22423.948448]  i915_gem_context_close+0x4b/0x90 [i915]
[22423.948544]  i915_driver_preclose+0x28/0x50 [i915]
[22423.948620]  drm_release+0x175/0x690 [drm]
[22423.948681]  ? fcntl_setlk+0x5e0/0x5e0
[22423.948746]  __fput+0x17d/0x300
[22423.948807]  ____fput+0x9/0x10
[22423.948859]  task_work_run+0xa7/0xe0
[22423.948924]  do_exit+0x4d2/0x13e0
[22423.948986]  ? mm_update_next_owner+0x320/0x320
[22423.949051]  ? __do_page_fault+0x209/0x5c0
[22423.949110]  ? mark_held_locks+0x23/0xc0
[22423.949166]  ? entry_SYSCALL_64_fastpath+0x5/0xb1
[22423.949232]  do_group_exit+0x93/0x160
[22423.949289]  SyS_exit_group+0x18/0x20
[22423.949350]  entry_SYSCALL_64_fastpath+0x1c/0xb1
[22423.949403] RIP: 0033:0x7f9cc2e154c8
[22423.949484] RSP: 002b:00007ffd7e81b448 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
[22423.949557] RAX: ffffffffffffffda RBX: ffffffff810ef1f0 RCX: 00007f9cc2e154c8
[22423.949617] RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000
[22423.949677] RBP: ffff880367e9ff98 R08: 00000000000000e7 R09: ffffffffffffff88
[22423.949741] R10: 00007f9cc1d5c000 R11: 0000000000000246 R12: 00007f9cc30f6c30
[22423.949798] R13: 0000000000000000 R14: 00007f9cc30f6c20 R15: 0000000000000003
[22423.949868]  ? trace_hardirqs_off_caller+0xc0/0x110

v2: Move the rpm check down a layer so that we still perform the
vma/fence update required for the deferred mmio write on resume.
v3: Don't touch i915_gem_object_set_cache_level() and leave the rpm to
the low level routines (such as i915_vma_put_fence).
v4: vma may be null in fence_write, so extract drm_i915_private from
fence->i915

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_fence_reg.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index fadbe8f4c745..5fe2cd8c8f28 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -248,7 +248,14 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 		list_move(&fence->link, &fence->i915->mm.fence_list);
 	}
 
-	fence_write(fence, vma);
+	/* We only need to update the register itself if the device is awake.
+	 * If the device is currently powered down, we will defer the write
+	 * to the runtime resume, see i915_gem_restore_fences().
+	 */
+	if (intel_runtime_pm_get_if_in_use(fence->i915)) {
+		fence_write(fence, vma);
+		intel_runtime_pm_put(fence->i915);
+	}
 
 	if (vma) {
 		if (fence->vma != vma) {
@@ -278,8 +285,6 @@ i915_vma_put_fence(struct i915_vma *vma)
 {
 	struct drm_i915_fence_reg *fence = vma->fence;
 
-	assert_rpm_wakelock_held(vma->vm->i915);
-
 	if (!fence)
 		return 0;
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] drm/i915: Wake up all waiters before idling
  2017-03-06  9:29 ` [PATCH 2/3] drm/i915: Wake up all waiters before idling Chris Wilson
@ 2017-03-06 11:02   ` Chris Wilson
  2017-03-06 13:47     ` Chris Wilson
  0 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2017-03-06 11:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

On Mon, Mar 06, 2017 at 09:29:15AM +0000, Chris Wilson wrote:
>  void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
>  {
>  	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -	unsigned long flags;
> +	struct intel_wait *wait, *n;
>  
>  	if (!b->irq_armed)
>  		return;
>  
> -	spin_lock_irqsave(&b->irq_lock, flags);
> -
>  	/* We only disarm the irq when we are idle (all requests completed),
>  	 * so if there remains a sleeping waiter, it missed the request

*so if the bottom-half remains asleep, it missed the request

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-06  9:29 [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Chris Wilson
  2017-03-06  9:29 ` [PATCH 2/3] drm/i915: Wake up all waiters before idling Chris Wilson
  2017-03-06  9:29 ` [PATCH 3/3] drm/i915: Take rpm wakelock for releasing the fence on unbind Chris Wilson
@ 2017-03-06 12:24 ` Patchwork
  2017-03-06 12:24 ` [PATCH 1/3] " Tvrtko Ursulin
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 20+ messages in thread
From: Patchwork @ 2017-03-06 12:24 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
URL   : https://patchwork.freedesktop.org/series/20747/
State : failure

== Summary ==

Series 20747v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/20747/revisions/1/mbox/

Test gem_exec_flush:
        Subgroup basic-batch-kernel-default-uc:
                fail       -> PASS       (fi-snb-2600) fdo#100007
Test kms_cursor_legacy:
        Subgroup basic-flip-after-cursor-varying-size:
                pass       -> INCOMPLETE (fi-bxt-t5700)

fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:278  pass:239  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:203  pass:189  dwarn:0   dfail:0   fail:0   skip:13 
fi-byt-j1900     total:278  pass:251  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:278  pass:247  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16 
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:278  pass:259  dwarn:1   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:278  pass:249  dwarn:0   dfail:0   fail:0   skip:29 

2b4b4e5e7eab214579146e59214ccd7684955cca drm-tip: 2017y-03m-06d-10h-57m-03s UTC integration manifest
4736213 drm/i915: Take rpm wakelock for releasing the fence on unbind
a1f7cd0 drm/i915: Wake up all waiters before idling
7ce9756 drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4069/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-06  9:29 [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Chris Wilson
                   ` (2 preceding siblings ...)
  2017-03-06 12:24 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Patchwork
@ 2017-03-06 12:24 ` Tvrtko Ursulin
  2017-03-06 12:48   ` Chris Wilson
  2017-03-06 19:48 ` Joonas Lahtinen
  2017-03-07  9:55 ` Petri Latvala
  5 siblings, 1 reply; 20+ messages in thread
From: Tvrtko Ursulin @ 2017-03-06 12:24 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 06/03/2017 09:29, Chris Wilson wrote:
> Before we instantiate/pin the backing store for our use, we
> can prepopulate the shmemfs filp efficiently using the a
> write into the pagecache. We avoid the penalty of instantiating
> all the pages, important if the user is just writing to a few
> and never uses the object on the GPU, and using a direct write
> into shmemfs allows it to avoid the cost of retrieving a page
> (either swapin or clearing-before-use) before it is overwritten.
>
> This can be extended later to provide additional specialisation for
> other backends (other than shmemfs).
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=99107
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.william.auld@gmail.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c        | 78 ++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_object.h |  3 ++
>  2 files changed, 81 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7c20601fe1de..73d5cee23dd7 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1452,6 +1452,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>
>  	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
>
> +	ret = -ENODEV;
> +	if (obj->ops->pwrite)
> +		ret = obj->ops->pwrite(obj, args);

ops->pwrite_nowait or some good name to signify the conditions?

I am not sure that the obj->mm.pages check can be in the vfunc since 
this call happens before the i915_gem_object_wait. Which to me sounds 
like we should be explicit at this level that the ops->pwrite is not 
allowed to run if obj->mm.pages has already been instantiated.

Or if this is only true for the normal objects, and the future 
specialisation may become full, then the standard pattern of

	if (obj->ops->pwrite)
		ret = obj->ops->pwrite();
	else
		ret = default_obj_pwrite();

?

In which case ops->pwrite would be the correct name to keep.

The ops->pwrite for the shmemfs objects would then become 
i915_gem_object_pwrite_gtt from below followed by default_obj_pwrite.

I don't know, maybe I am complicating it too much?

> +	if (ret != -ENODEV)
> +		goto err;
> +
>  	ret = i915_gem_object_wait(obj,
>  				   I915_WAIT_INTERRUPTIBLE |
>  				   I915_WAIT_ALL,
> @@ -2575,6 +2581,75 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>  	goto out_unlock;
>  }
>
> +static int
> +i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,

_shmem ? Or _nowait_shmem depending on the outcome of the above. Just 
think _gtt is wrong. Or keep it for consistency since get/put pages for 
shmem objects are also called _gtt at the moment, so until that is changed?

> +			   const struct drm_i915_gem_pwrite *arg)
> +{
> +	struct address_space *mapping = obj->base.filp->f_mapping;
> +	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
> +	u64 remain, offset;
> +	unsigned int pg;
> +
> +	/* Before we instantiate/pin the backing store for our use, we
> +	 * can prepopulate the shmemfs filp efficiently using the a
> +	 * write into the pagecache. We avoid the penalty of instantiating
> +	 * all the pages, important if the user is just writing to a few
> +	 * and never uses the object on the GPU, and using a direct write
> +	 * into shmemfs allows it to avoid the cost of retrieving a page
> +	 * (either swapin or clearing-before-use) before it is overwritten.
> +	 */
> +	if (READ_ONCE(obj->mm.pages))
> +		return -ENODEV;
> +
> +	/* Before the pages are instantioted the object is treated as being

Typo in instantiated.

> +	 * in the CPU domain. The pages will be clflushed as required before
> +	 * use, and we can freely write into the pages directly. If userspace
> +	 * races pwrite with any other operation; corruption will ensue -
> +	 * that is userspace's perogative!

Typo in prerogative. (Thanks to automatic spell checker, have to say I 
thought it was spelled perogative myself!)

> +	 */
> +
> +	remain = arg->size;
> +	offset = arg->offset;
> +	pg = offset_in_page(offset);
> +
> +	do {
> +		unsigned int len, unwritten;
> +		struct page *page;
> +		void *data, *vaddr;
> +		int err;
> +
> +		len = PAGE_SIZE - pg;
> +		if (len > remain)
> +			len = remain;
> +
> +		err = pagecache_write_begin(obj->base.filp, mapping,
> +					    offset, len,
> +					    0, &page, &data);
> +		if (err < 0)
> +			return err;
> +
> +		vaddr = kmap(page);
> +		unwritten = copy_from_user(vaddr + pg, user_data, len);
> +		kunmap(page);
> +
> +		err = pagecache_write_end(obj->base.filp, mapping, offset,
> +					  len, len - unwritten,
> +					  page, data);
> +		if (err < 0)
> +			return err;
> +
> +		if (unwritten)
> +			return -EFAULT;
> +
> +		remain -= len;
> +		user_data += len;
> +		offset += len;
> +		pg = 0;
> +	} while (remain);
> +
> +	return 0;
> +}
> +
>  static bool ban_context(const struct i915_gem_context *ctx)
>  {
>  	return (i915_gem_context_is_bannable(ctx) &&
> @@ -3991,8 +4066,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
>  	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
>  		 I915_GEM_OBJECT_IS_SHRINKABLE,
> +
>  	.get_pages = i915_gem_object_get_pages_gtt,
>  	.put_pages = i915_gem_object_put_pages_gtt,
> +
> +	.pwrite = i915_gem_object_pwrite_gtt,
>  };
>
>  struct drm_i915_gem_object *
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index 33b0dc4782a9..d26155e0a026 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -56,6 +56,9 @@ struct drm_i915_gem_object_ops {
>  	struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
>  	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
>
> +	int (*pwrite)(struct drm_i915_gem_object *,
> +		      const struct drm_i915_gem_pwrite *);
> +
>  	int (*dmabuf_export)(struct drm_i915_gem_object *);
>  	void (*release)(struct drm_i915_gem_object *);
>  };
>

Code looks OK, just would like to discuss the design of the vfunc a bit.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-06 12:24 ` [PATCH 1/3] " Tvrtko Ursulin
@ 2017-03-06 12:48   ` Chris Wilson
  2017-03-06 13:49     ` Tvrtko Ursulin
  0 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2017-03-06 12:48 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Mon, Mar 06, 2017 at 12:24:46PM +0000, Tvrtko Ursulin wrote:
> 
> On 06/03/2017 09:29, Chris Wilson wrote:
> >Before we instantiate/pin the backing store for our use, we
> >can prepopulate the shmemfs filp efficiently using the a
> >write into the pagecache. We avoid the penalty of instantiating
> >all the pages, important if the user is just writing to a few
> >and never uses the object on the GPU, and using a direct write
> >into shmemfs allows it to avoid the cost of retrieving a page
> >(either swapin or clearing-before-use) before it is overwritten.
> >
> >This can be extended later to provide additional specialisation for
> >other backends (other than shmemfs).
> >
> >References: https://bugs.freedesktop.org/show_bug.cgi?id=99107
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Matthew Auld <matthew.william.auld@gmail.com>
> >Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> >---
> > drivers/gpu/drm/i915/i915_gem.c        | 78 ++++++++++++++++++++++++++++++++++
> > drivers/gpu/drm/i915/i915_gem_object.h |  3 ++
> > 2 files changed, 81 insertions(+)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >index 7c20601fe1de..73d5cee23dd7 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -1452,6 +1452,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> >
> > 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
> >
> >+	ret = -ENODEV;
> >+	if (obj->ops->pwrite)
> >+		ret = obj->ops->pwrite(obj, args);
> 
> ops->pwrite_nowait or some good name to signify the conditions?
> 
> I am not sure that the obj->mm.pages check can be in the vfunc since
> this call happens before the i915_gem_object_wait. Which to me
> sounds like we should be explicit at this level that the ops->pwrite
> is not allowed to run if obj->mm.pages has already been
> instantiated.

That doesn't make sense to me. There's no reason that the backend can't
handle the wait, or understand special cases for when there are pages.
 
> Or if this is only true for the normal objects, and the future
> specialisation may become full, then the standard pattern of
> 
> 	if (obj->ops->pwrite)
> 		ret = obj->ops->pwrite();
> 	else
> 		ret = default_obj_pwrite();
> 
> ?

Yeah, is kind of where I think we will end up. However, pwrite is of
limited usefulness that I think we should do this case by case. Getting
rid the the shmem/phys special cases would be nice.

The sketch I have is that each backend has something like:

	err = fast_pwrite();
	if (err != -ENODEV)
		return err;

	err = default_gtt_pwrite();
	if (err != -ENODEV)
		return err;

	return slow_pwrite();

The challenge is trying to limit the duplication of the common steps to
avoid struct_mutex. (Ideally by eliminating struct_mutex entirely from
this path.)

> In which case ops->pwrite would be the correct name to keep.
> 
> The ops->pwrite for the shmemfs objects would then become
> i915_gem_object_pwrite_gtt from below followed by
> default_obj_pwrite.
> 
> I don't know, maybe I am complicating it too much?

For the moment, yes :) This was a quick hack to hide a regression - the
real bug fix will be breaking struct_mutex out of the shrinker, I think,
and there's some nasty behaviour to resolve when we do hit the shrinker
the whole object page-in/page-out behaviour is much, much slower than
what should be the equivalent with individual pages via shmemfs. The
bonus here is that shmemfs can avoid clearing/swapping-in the page if
knows it will be completely overwritten, which makes the patch useful on
its own merit.

> >+	if (ret != -ENODEV)
> >+		goto err;
> >+
> > 	ret = i915_gem_object_wait(obj,
> > 				   I915_WAIT_INTERRUPTIBLE |
> > 				   I915_WAIT_ALL,
> >@@ -2575,6 +2581,75 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
> > 	goto out_unlock;
> > }
> >
> >+static int
> >+i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
> 
> _shmem ? Or _nowait_shmem depending on the outcome of the above.
> Just think _gtt is wrong. Or keep it for consistency since get/put
> pages for shmem objects are also called _gtt at the moment, so until
> that is changed?

Right, until we actually move this to i915_gem_object_shmemfs.c (or just
i915_gem_shmemfs) I choose _gtt to match the existing nomenclature.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] drm/i915: Wake up all waiters before idling
  2017-03-06 11:02   ` Chris Wilson
@ 2017-03-06 13:47     ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-06 13:47 UTC (permalink / raw)
  To: intel-gfx, mika.kuoppala, Mika Kuoppala, Tvrtko Ursulin

On Mon, Mar 06, 2017 at 11:02:03AM +0000, Chris Wilson wrote:
> On Mon, Mar 06, 2017 at 09:29:15AM +0000, Chris Wilson wrote:
> >  void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> >  {
> >  	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> > -	unsigned long flags;
> > +	struct intel_wait *wait, *n;
> >  
> >  	if (!b->irq_armed)
> >  		return;
> >  
> > -	spin_lock_irqsave(&b->irq_lock, flags);
> > -
> >  	/* We only disarm the irq when we are idle (all requests completed),
> >  	 * so if there remains a sleeping waiter, it missed the request
> 
> *so if the bottom-half remains asleep, it missed the request
> 
> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

Pushed this patch as it should prevent some of the sporadic oopses in
CI.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-06 12:48   ` Chris Wilson
@ 2017-03-06 13:49     ` Tvrtko Ursulin
  2017-03-06 14:14       ` Chris Wilson
  0 siblings, 1 reply; 20+ messages in thread
From: Tvrtko Ursulin @ 2017-03-06 13:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 06/03/2017 12:48, Chris Wilson wrote:
> On Mon, Mar 06, 2017 at 12:24:46PM +0000, Tvrtko Ursulin wrote:
>>
>> On 06/03/2017 09:29, Chris Wilson wrote:
>>> Before we instantiate/pin the backing store for our use, we
>>> can prepopulate the shmemfs filp efficiently using the a
>>> write into the pagecache. We avoid the penalty of instantiating
>>> all the pages, important if the user is just writing to a few
>>> and never uses the object on the GPU, and using a direct write
>>> into shmemfs allows it to avoid the cost of retrieving a page
>>> (either swapin or clearing-before-use) before it is overwritten.
>>>
>>> This can be extended later to provide additional specialisation for
>>> other backends (other than shmemfs).
>>>
>>> References: https://bugs.freedesktop.org/show_bug.cgi?id=99107
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Matthew Auld <matthew.william.auld@gmail.com>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_gem.c        | 78 ++++++++++++++++++++++++++++++++++
>>> drivers/gpu/drm/i915/i915_gem_object.h |  3 ++
>>> 2 files changed, 81 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index 7c20601fe1de..73d5cee23dd7 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -1452,6 +1452,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>>>
>>> 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
>>>
>>> +	ret = -ENODEV;
>>> +	if (obj->ops->pwrite)
>>> +		ret = obj->ops->pwrite(obj, args);
>>
>> ops->pwrite_nowait or some good name to signify the conditions?
>>
>> I am not sure that the obj->mm.pages check can be in the vfunc since
>> this call happens before the i915_gem_object_wait. Which to me
>> sounds like we should be explicit at this level that the ops->pwrite
>> is not allowed to run if obj->mm.pages has already been
>> instantiated.
>
> That doesn't make sense to me. There's no reason that the backend can't
> handle the wait, or understand special cases for when there are pages.

It doesn't but I thought it is weird and confusing to have a vfunc with 
a completely generic name but very specialized behaviour.

>> Or if this is only true for the normal objects, and the future
>> specialisation may become full, then the standard pattern of
>>
>> 	if (obj->ops->pwrite)
>> 		ret = obj->ops->pwrite();
>> 	else
>> 		ret = default_obj_pwrite();
>>
>> ?
>
> Yeah, is kind of where I think we will end up. However, pwrite is of
> limited usefulness that I think we should do this case by case. Getting
> rid the the shmem/phys special cases would be nice.
>
> The sketch I have is that each backend has something like:
>
> 	err = fast_pwrite();
> 	if (err != -ENODEV)
> 		return err;
>
> 	err = default_gtt_pwrite();
> 	if (err != -ENODEV)
> 		return err;
>
> 	return slow_pwrite();
>
> The challenge is trying to limit the duplication of the common steps to
> avoid struct_mutex. (Ideally by eliminating struct_mutex entirely from
> this path.)
>
>> In which case ops->pwrite would be the correct name to keep.
>>
>> The ops->pwrite for the shmemfs objects would then become
>> i915_gem_object_pwrite_gtt from below followed by
>> default_obj_pwrite.
>>
>> I don't know, maybe I am complicating it too much?
>
> For the moment, yes :) This was a quick hack to hide a regression - the

Oh it was a real regression and not just an optimisation for a strange 
client behaviour? You should add a Fixes: tag then. And explain in the 
commit what it was (the regression).

> real bug fix will be breaking struct_mutex out of the shrinker, I think,
> and there's some nasty behaviour to resolve when we do hit the shrinker
> the whole object page-in/page-out behaviour is much, much slower than
> what should be the equivalent with individual pages via shmemfs. The
> bonus here is that shmemfs can avoid clearing/swapping-in the page if
> knows it will be completely overwritten, which makes the patch useful on
> its own merit.

So in this particular case is that becuase it is swapping out even the 
untouched pages? And it started doing that recently after some commit or 
what?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] drm/i915: Take rpm wakelock for releasing the fence on unbind
  2017-03-06  9:29 ` [PATCH 3/3] drm/i915: Take rpm wakelock for releasing the fence on unbind Chris Wilson
@ 2017-03-06 14:03   ` Tvrtko Ursulin
  2017-03-06 14:47     ` Chris Wilson
  0 siblings, 1 reply; 20+ messages in thread
From: Tvrtko Ursulin @ 2017-03-06 14:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 06/03/2017 09:29, Chris Wilson wrote:
> Unbind the vma may happen at any time, outside of the normal GT wakeref.
> As such it relies on having a wakeref of its own. However, we can forgo
> clearing the register whilst the device is asleep and just mark it as
> unused - so that when we do wake up the device, we will clear the unused
> fence register (see i915_gem_restore_fences).
>
> [22423.944631] WARNING: CPU: 3 PID: 26178 at drivers/gpu/drm/i915/intel_drv.h:1739 i915_vma_put_fence+0xf3/0x100 [i915]
> [22423.946053] RPM wakelock ref not held during HW access
> [22423.946056] Modules linked in: vgem(E) i915(E) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) x86_pkg_temp_thermal(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) ghash_clmulni_intel(E) intel_gtt(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) evdev(E) aesni_intel(E) aes_x86_64(E) crypto_simd(E) cryptd(E) glue_helper(E) sysimgblt(E) fb_sys_fops(E) prime_numbers(E) drm(E) efivars(E) mei_me(E) lpc_ich(E) mei(E) mfd_core(E) battery(E) video(E) acpi_pad(E) button(E) tpm_tis(E) tpm_tis_core(E) tpm(E) autofs4(E) i2c_i801(E) thermal(E) fan(E) i2c_designware_platform(E) i2c_designware_core(E)
> [22423.946438] CPU: 2 PID: 26178 Comm: gem_concurrent_ Tainted: G            E   4.10.0+ #101
> [22423.946513] Hardware name: ��������������������������������� ���������������������������������/���������������������������������, BIOS RYBDWi35.86A.0246.2
> [22423.946600] Call Trace:
> [22423.946641]  dump_stack+0x68/0x9f
> [22423.946703]  __warn+0x107/0x130
> [22423.946763]  warn_slowpath_fmt+0xa8/0xe0
> [22423.946825]  ? __warn+0x130/0x130
> [22423.946868]  ? free_hot_cold_page_list+0x53/0x70
> [22423.946942]  ? mark_lock+0xcc/0x7f0
> [22423.946997]  ? __lock_is_held+0x84/0x100
> [22423.947115]  ? i915_vma_put_fence+0x64/0x100 [i915]
> [22423.947224]  i915_vma_put_fence+0xf3/0x100 [i915]
> [22423.947335]  i915_vma_unbind+0x4da/0x560 [i915]
> [22423.947387]  ? rb_erase+0x812/0x8a0
> [22423.947439]  ? kfree+0xa2/0xd0
> [22423.947562]  i915_vma_close+0x159/0x180 [i915]
> [22423.947674]  intel_ring_free+0x31/0x50 [i915]
> [22423.947776]  i915_gem_context_free+0x1ff/0x3d0 [i915]
> [22423.947887]  context_close+0x106/0x110 [i915]
> [22423.947989]  context_idr_cleanup+0xc/0x10 [i915]
> [22423.948041]  idr_for_each+0x14d/0x1d0
> [22423.948158]  ? context_close+0x110/0x110 [i915]
> [22423.948206]  ? get_from_free_list+0x70/0x70
> [22423.948261]  ? __lock_is_held+0x84/0x100
> [22423.948325]  ? __mutex_unlock_slowpath+0xd4/0x400
> [22423.948448]  i915_gem_context_close+0x4b/0x90 [i915]
> [22423.948544]  i915_driver_preclose+0x28/0x50 [i915]
> [22423.948620]  drm_release+0x175/0x690 [drm]
> [22423.948681]  ? fcntl_setlk+0x5e0/0x5e0
> [22423.948746]  __fput+0x17d/0x300
> [22423.948807]  ____fput+0x9/0x10
> [22423.948859]  task_work_run+0xa7/0xe0
> [22423.948924]  do_exit+0x4d2/0x13e0
> [22423.948986]  ? mm_update_next_owner+0x320/0x320
> [22423.949051]  ? __do_page_fault+0x209/0x5c0
> [22423.949110]  ? mark_held_locks+0x23/0xc0
> [22423.949166]  ? entry_SYSCALL_64_fastpath+0x5/0xb1
> [22423.949232]  do_group_exit+0x93/0x160
> [22423.949289]  SyS_exit_group+0x18/0x20
> [22423.949350]  entry_SYSCALL_64_fastpath+0x1c/0xb1
> [22423.949403] RIP: 0033:0x7f9cc2e154c8
> [22423.949484] RSP: 002b:00007ffd7e81b448 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
> [22423.949557] RAX: ffffffffffffffda RBX: ffffffff810ef1f0 RCX: 00007f9cc2e154c8
> [22423.949617] RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000
> [22423.949677] RBP: ffff880367e9ff98 R08: 00000000000000e7 R09: ffffffffffffff88
> [22423.949741] R10: 00007f9cc1d5c000 R11: 0000000000000246 R12: 00007f9cc30f6c30
> [22423.949798] R13: 0000000000000000 R14: 00007f9cc30f6c20 R15: 0000000000000003
> [22423.949868]  ? trace_hardirqs_off_caller+0xc0/0x110
>
> v2: Move the rpm check down a layer so that we still perform the
> vma/fence update required for the deferred mmio write on resume.
> v3: Don't touch i915_gem_object_set_cache_level() and leave the rpm to
> the low level routines (such as i915_vma_put_fence).
> v4: vma may be null in fence_write, so extract drm_i915_private from
> fence->i915
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_fence_reg.c | 11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index fadbe8f4c745..5fe2cd8c8f28 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -248,7 +248,14 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>  		list_move(&fence->link, &fence->i915->mm.fence_list);
>  	}
>
> -	fence_write(fence, vma);
> +	/* We only need to update the register itself if the device is awake.
> +	 * If the device is currently powered down, we will defer the write
> +	 * to the runtime resume, see i915_gem_restore_fences().
> +	 */
> +	if (intel_runtime_pm_get_if_in_use(fence->i915)) {
> +		fence_write(fence, vma);
> +		intel_runtime_pm_put(fence->i915);
> +	}
>
>  	if (vma) {
>  		if (fence->vma != vma) {
> @@ -278,8 +285,6 @@ i915_vma_put_fence(struct i915_vma *vma)
>  {
>  	struct drm_i915_fence_reg *fence = vma->fence;
>
> -	assert_rpm_wakelock_held(vma->vm->i915);
> -
>  	if (!fence)
>  		return 0;
>
>

Looks OK. At least my power management untrained eyes. :) But I've 
traced the code paths and all and it looks simple.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-06 13:49     ` Tvrtko Ursulin
@ 2017-03-06 14:14       ` Chris Wilson
  2017-03-06 16:32         ` Tvrtko Ursulin
  0 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2017-03-06 14:14 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Mon, Mar 06, 2017 at 01:49:33PM +0000, Tvrtko Ursulin wrote:
> 
> On 06/03/2017 12:48, Chris Wilson wrote:
> >For the moment, yes :) This was a quick hack to hide a regression - the
> 
> Oh it was a real regression and not just an optimisation for a
> strange client behaviour? You should add a Fixes: tag then. And
> explain in the commit what it was (the regression).

I was having that debate with myself in another thread. userspace is
clearly broken, but this excerbates the damage.
 
> >real bug fix will be breaking struct_mutex out of the shrinker, I think,
> >and there's some nasty behaviour to resolve when we do hit the shrinker
> >the whole object page-in/page-out behaviour is much, much slower than
> >what should be the equivalent with individual pages via shmemfs. The
> >bonus here is that shmemfs can avoid clearing/swapping-in the page if
> >knows it will be completely overwritten, which makes the patch useful on
> >its own merit.
> 
> So in this particular case is that becuase it is swapping out even
> the untouched pages? And it started doing that recently after some
> commit or what?

Remember when I said that nobody would touch pages without using them,
(and so could defer the update for the shrinker until we had the
struct_mutex) and certainly not 16GiB of written-but-unused pages on a
small box? libva happened.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] drm/i915: Take rpm wakelock for releasing the fence on unbind
  2017-03-06 14:03   ` Tvrtko Ursulin
@ 2017-03-06 14:47     ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-06 14:47 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Mon, Mar 06, 2017 at 02:03:26PM +0000, Tvrtko Ursulin wrote:
> 
> On 06/03/2017 09:29, Chris Wilson wrote:
> >Unbind the vma may happen at any time, outside of the normal GT wakeref.
> >As such it relies on having a wakeref of its own. However, we can forgo
> >clearing the register whilst the device is asleep and just mark it as
> >unused - so that when we do wake up the device, we will clear the unused
> >fence register (see i915_gem_restore_fences).
> >
> 
> Looks OK. At least my power management untrained eyes. :) But I've
> traced the code paths and all and it looks simple.

Ta. Too bad CI didn't hit this first so had I a bugzilla to cite.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-06 14:14       ` Chris Wilson
@ 2017-03-06 16:32         ` Tvrtko Ursulin
  2017-03-06 21:49           ` Chris Wilson
  0 siblings, 1 reply; 20+ messages in thread
From: Tvrtko Ursulin @ 2017-03-06 16:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 06/03/2017 14:14, Chris Wilson wrote:
> On Mon, Mar 06, 2017 at 01:49:33PM +0000, Tvrtko Ursulin wrote:
>>
>> On 06/03/2017 12:48, Chris Wilson wrote:
>>> For the moment, yes :) This was a quick hack to hide a regression - the
>>
>> Oh it was a real regression and not just an optimisation for a
>> strange client behaviour? You should add a Fixes: tag then. And
>> explain in the commit what it was (the regression).
>
> I was having that debate with myself in another thread. userspace is
> clearly broken, but this excerbates the damage.
>
>>> real bug fix will be breaking struct_mutex out of the shrinker, I think,
>>> and there's some nasty behaviour to resolve when we do hit the shrinker
>>> the whole object page-in/page-out behaviour is much, much slower than
>>> what should be the equivalent with individual pages via shmemfs. The
>>> bonus here is that shmemfs can avoid clearing/swapping-in the page if
>>> knows it will be completely overwritten, which makes the patch useful on
>>> its own merit.
>>
>> So in this particular case is that becuase it is swapping out even
>> the untouched pages? And it started doing that recently after some
>> commit or what?
>
> Remember when I said that nobody would touch pages without using them,
> (and so could defer the update for the shrinker until we had the
> struct_mutex) and certainly not 16GiB of written-but-unused pages on a
> small box? libva happened.

Oh dear.. Ok, going back to the previous reply..

I can see the benefit of avoiding the shrinker and struct mutex but 
haven't found that other benefit.

I've been rummaging in the shmem.c & co but so far haven't found 
anything to explain me the possibility of it avoiding 
clearing/swapping-in the pages. It looks like both our normal page 
allocation and this new one boil down to the same shmem_getpage.

Could you explain what I am missing?

Also, would we have any IGT coverage for this new path? And keep a solid 
amount of coverage for the old paths as well.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-06  9:29 [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Chris Wilson
                   ` (3 preceding siblings ...)
  2017-03-06 12:24 ` [PATCH 1/3] " Tvrtko Ursulin
@ 2017-03-06 19:48 ` Joonas Lahtinen
  2017-03-07  9:55 ` Petri Latvala
  5 siblings, 0 replies; 20+ messages in thread
From: Joonas Lahtinen @ 2017-03-06 19:48 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ma, 2017-03-06 at 09:29 +0000, Chris Wilson wrote:
> Before we instantiate/pin the backing store for our use, we
> can prepopulate the shmemfs filp efficiently using the a
> write into the pagecache. We avoid the penalty of instantiating
> all the pages, important if the user is just writing to a few
> and never uses the object on the GPU, and using a direct write
> into shmemfs allows it to avoid the cost of retrieving a page
> (either swapin or clearing-before-use) before it is overwritten.
> 
> This can be extended later to provide additional specialisation for
> other backends (other than shmemfs).
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=99107
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.william.auld@gmail.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>

<SNIP>

> +static int
> +i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
> +			   const struct drm_i915_gem_pwrite *arg)
> +{
> +	struct address_space *mapping = obj->base.filp->f_mapping;
> +	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
> +	u64 remain, offset;
> +	unsigned int pg;
> +
> +	/* Before we instantiate/pin the backing store for our use, we
> +	 * can prepopulate the shmemfs filp efficiently using the a
> +	 * write into the pagecache. We avoid the penalty of instantiating
> +	 * all the pages, important if the user is just writing to a few
> +	 * and never uses the object on the GPU, and using a direct write
> +	 * into shmemfs allows it to avoid the cost of retrieving a page
> +	 * (either swapin or clearing-before-use) before it is overwritten.
> +	 */
> +	if (READ_ONCE(obj->mm.pages))
> +		return -ENODEV;
> +
> +	/* Before the pages are instantioted the object is treated as being

*instantiated

<SNIP>

> +		err = pagecache_write_end(obj->base.filp, mapping, offset,
> +					  len, len - unwritten,
> +					  page, data);

Did you deliberately make the arguments different from write_begin? :)

If this solves a problem;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

But should we instead fix libva?

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-06 16:32         ` Tvrtko Ursulin
@ 2017-03-06 21:49           ` Chris Wilson
  2017-03-07  7:30             ` Tvrtko Ursulin
  0 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2017-03-06 21:49 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Mon, Mar 06, 2017 at 04:32:45PM +0000, Tvrtko Ursulin wrote:
> 
> On 06/03/2017 14:14, Chris Wilson wrote:
> >Remember when I said that nobody would touch pages without using them,
> >(and so could defer the update for the shrinker until we had the
> >struct_mutex) and certainly not 16GiB of written-but-unused pages on a
> >small box? libva happened.
> 
> Oh dear.. Ok, going back to the previous reply..
> 
> I can see the benefit of avoiding the shrinker and struct mutex but
> haven't found that other benefit.
> 
> I've been rummaging in the shmem.c & co but so far haven't found
> anything to explain me the possibility of it avoiding
> clearing/swapping-in the pages. It looks like both our normal page
> allocation and this new one boil down to the same shmem_getpage.
> 
> Could you explain what I am missing?

Normally we use shmem_getpages(SGP_CACHE). write_begin uses SGP_WRITE.
That has the clear avoidance, but alas I can't see it taking advantage
of avoiding a swapin - probably because SGP_WRITE doesn't differentiate
between a full or partial page write at that point, though it has the
information to do so. (swapin avoidance is then just a pipe dream.) For
bonus points would be handling high-order pages...

> Also, would we have any IGT coverage for this new path? And keep a
> solid amount of coverage for the old paths as well.

Virtually every test has some form of
gem_pwrite(gem_create(4096), 0, &bbe, sizeof(bbe));
and this path is extensively tested by gem_concurrent_blit, gem_pwrite
which should both exercise initial pwrites plus pwrites following
shrinking, as well as the ordinary pwrite with obj->mm.pages.

In real workloads though, while pwrite is fairly widespread in mesa for
uploading the batch buffer (on !llc at least), the userspace bo cache
means that the number of times we can expect to hit this path are rare.
Which just leaves the confusing case of libva.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-06 21:49           ` Chris Wilson
@ 2017-03-07  7:30             ` Tvrtko Ursulin
  0 siblings, 0 replies; 20+ messages in thread
From: Tvrtko Ursulin @ 2017-03-07  7:30 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 06/03/2017 21:49, Chris Wilson wrote:
> On Mon, Mar 06, 2017 at 04:32:45PM +0000, Tvrtko Ursulin wrote:
>>
>> On 06/03/2017 14:14, Chris Wilson wrote:
>>> Remember when I said that nobody would touch pages without using them,
>>> (and so could defer the update for the shrinker until we had the
>>> struct_mutex) and certainly not 16GiB of written-but-unused pages on a
>>> small box? libva happened.
>>
>> Oh dear.. Ok, going back to the previous reply..
>>
>> I can see the benefit of avoiding the shrinker and struct mutex but
>> haven't found that other benefit.
>>
>> I've been rummaging in the shmem.c & co but so far haven't found
>> anything to explain me the possibility of it avoiding
>> clearing/swapping-in the pages. It looks like both our normal page
>> allocation and this new one boil down to the same shmem_getpage.
>>
>> Could you explain what I am missing?
>
> Normally we use shmem_getpages(SGP_CACHE). write_begin uses SGP_WRITE.
> That has the clear avoidance, but alas I can't see it taking advantage
> of avoiding a swapin - probably because SGP_WRITE doesn't differentiate
> between a full or partial page write at that point, though it has the
> information to do so. (swapin avoidance is then just a pipe dream.) For
> bonus points would be handling high-order pages...

I've looked in that code but it's too deep in page handling for me at 
the moment to say one way or the other...

>> Also, would we have any IGT coverage for this new path? And keep a
>> solid amount of coverage for the old paths as well.
>
> Virtually every test has some form of
> gem_pwrite(gem_create(4096), 0, &bbe, sizeof(bbe));
> and this path is extensively tested by gem_concurrent_blit, gem_pwrite
> which should both exercise initial pwrites plus pwrites following
> shrinking, as well as the ordinary pwrite with obj->mm.pages.
>
> In real workloads though, while pwrite is fairly widespread in mesa for
> uploading the batch buffer (on !llc at least), the userspace bo cache
> means that the number of times we can expect to hit this path are rare.
> Which just leaves the confusing case of libva.

.. but since it has the benefit of avoiding the shrinker and you 
describe the IGT coverage is good:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-06  9:29 [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Chris Wilson
                   ` (4 preceding siblings ...)
  2017-03-06 19:48 ` Joonas Lahtinen
@ 2017-03-07  9:55 ` Petri Latvala
  2017-03-07 10:22   ` Chris Wilson
  5 siblings, 1 reply; 20+ messages in thread
From: Petri Latvala @ 2017-03-07  9:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


For the record, manually launched extended test run on SKL
resulted in:

commit f88cf1067cc499f4c9236c4e3ff7e410f9cc76d7
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Sat Mar 4 10:35:32 2017 +0000

    drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl


igt@gem_madvise@dontneed-before-exec: pass -> {'fail', 'incomplete'}
igt@gem_madvise@dontneed-before-pwrite: pass -> fail




--
Petri Latvala
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-07  9:55 ` Petri Latvala
@ 2017-03-07 10:22   ` Chris Wilson
  2017-03-07 11:46     ` Petri Latvala
  0 siblings, 1 reply; 20+ messages in thread
From: Chris Wilson @ 2017-03-07 10:22 UTC (permalink / raw)
  To: Petri Latvala; +Cc: intel-gfx

On Tue, Mar 07, 2017 at 11:55:20AM +0200, Petri Latvala wrote:
> 
> For the record, manually launched extended test run on SKL
> resulted in:
> 
> commit f88cf1067cc499f4c9236c4e3ff7e410f9cc76d7
> Author: Chris Wilson <chris@chris-wilson.co.uk>
> Date:   Sat Mar 4 10:35:32 2017 +0000
> 
>     drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
> 
> 
> igt@gem_madvise@dontneed-before-exec: pass -> {'fail', 'incomplete'}
That test is expected to fail, that it ever passed is a fluke.
incomplete there should be a failure in the runner? That should quite
happily spot the purged object in execbuf.

> igt@gem_madvise@dontneed-before-pwrite: pass -> fail
Hmm. Here we are relying on shmemfs reporting the write to the truncated
object as invalid. Looks like we need to do the filtering ourselves. It
is not a huge problem as we still prevent any later instantiation of
pages so just a temporary slip. Setting obj->mm.pages = -EFAULT; is
likely a fun solution.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-07 10:22   ` Chris Wilson
@ 2017-03-07 11:46     ` Petri Latvala
  2017-03-07 12:02       ` Chris Wilson
  0 siblings, 1 reply; 20+ messages in thread
From: Petri Latvala @ 2017-03-07 11:46 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On Tue, Mar 07, 2017 at 10:22:09AM +0000, Chris Wilson wrote:
> > igt@gem_madvise@dontneed-before-exec: pass -> {'fail', 'incomplete'}
> That test is expected to fail, that it ever passed is a fluke.

That subtest should be removed from IGT then?


> incomplete there should be a failure in the runner? That should quite
> happily spot the purged object in execbuf.

The test executed right before was igt@drv_missed_irq and the used
versions of IGT and kernel did not have the recent fixes related to
that test. Quite possible that the incomplete result was due to that
one. 'incomplete' here means that the test didn't finish and the
machine was killed a watchdog. 'fail' also being in the set is because
the test order was different in that run.


-- 
Petri Latvala
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl
  2017-03-07 11:46     ` Petri Latvala
@ 2017-03-07 12:02       ` Chris Wilson
  0 siblings, 0 replies; 20+ messages in thread
From: Chris Wilson @ 2017-03-07 12:02 UTC (permalink / raw)
  To: Petri Latvala; +Cc: intel-gfx

On Tue, Mar 07, 2017 at 01:46:36PM +0200, Petri Latvala wrote:
> On Tue, Mar 07, 2017 at 10:22:09AM +0000, Chris Wilson wrote:
> > > igt@gem_madvise@dontneed-before-exec: pass -> {'fail', 'incomplete'}
> > That test is expected to fail, that it ever passed is a fluke.
> 
> That subtest should be removed from IGT then?

Or updated for consistency. It's the odd-one-out in that test set atm.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-03-07 12:02 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-06  9:29 [PATCH 1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Chris Wilson
2017-03-06  9:29 ` [PATCH 2/3] drm/i915: Wake up all waiters before idling Chris Wilson
2017-03-06 11:02   ` Chris Wilson
2017-03-06 13:47     ` Chris Wilson
2017-03-06  9:29 ` [PATCH 3/3] drm/i915: Take rpm wakelock for releasing the fence on unbind Chris Wilson
2017-03-06 14:03   ` Tvrtko Ursulin
2017-03-06 14:47     ` Chris Wilson
2017-03-06 12:24 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Patchwork
2017-03-06 12:24 ` [PATCH 1/3] " Tvrtko Ursulin
2017-03-06 12:48   ` Chris Wilson
2017-03-06 13:49     ` Tvrtko Ursulin
2017-03-06 14:14       ` Chris Wilson
2017-03-06 16:32         ` Tvrtko Ursulin
2017-03-06 21:49           ` Chris Wilson
2017-03-07  7:30             ` Tvrtko Ursulin
2017-03-06 19:48 ` Joonas Lahtinen
2017-03-07  9:55 ` Petri Latvala
2017-03-07 10:22   ` Chris Wilson
2017-03-07 11:46     ` Petri Latvala
2017-03-07 12:02       ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.