[PATCH 01/11] drm/i915/selftests: Prevent background reaping of active objects

* [PATCH 01/11] drm/i915/selftests: Prevent background reaping of active objects
@ 2018-07-09 13:01 Chris Wilson
  2018-07-09 13:01 ` [PATCH 02/11] drm/i915: Only reset hangcheck at the start of an activity cycle Chris Wilson
                   ` (14 more replies)
  0 siblings, 15 replies; 25+ messages in thread
From: Chris Wilson @ 2018-07-09 13:01 UTC (permalink / raw)
  To: intel-gfx

igt_mmap_offset_exhaustion() wants to test what happens when the mmap
space is filled with zombie objects, objects discarded by userspace but
still active on the GPU. As they are only protected by the active
reference, we have to be certain that active reference is kept while we
peek into our dangling pointer. That active reference should not be
freed until we retire, but we do that retirement from a background
thread. This leaves us with a subtle timing problem, exacerbated and
highlighted by KASAN:

<3>[  132.380399] BUG: KASAN: use-after-free in drm_gem_create_mmap_offset+0x8c/0xd0
<3>[  132.380430] Read of size 8 at addr ffff8801e13245f8 by task drv_selftest/5822

<4>[  132.380470] CPU: 0 PID: 5822 Comm: drv_selftest Tainted: G     U            4.18.0-rc3-g7ae7763aa2be-kasan_48+ #1
<4>[  132.380473] Hardware name: Dell Inc. XPS 8300  /0Y2MRG, BIOS A06 10/17/2011
<4>[  132.380475] Call Trace:
<4>[  132.380481]  dump_stack+0x7c/0xbb
<4>[  132.380487]  print_address_description+0x65/0x270
<4>[  132.380493]  kasan_report+0x25b/0x380
<4>[  132.380497]  ? drm_gem_create_mmap_offset+0x8c/0xd0
<4>[  132.380503]  drm_gem_create_mmap_offset+0x8c/0xd0
<4>[  132.380584]  i915_gem_object_create_mmap_offset+0x6d/0x100 [i915]
<4>[  132.380650]  igt_mmap_offset_exhaustion+0x462/0x940 [i915]
<4>[  132.380714]  ? i915_gem_close_object+0x740/0x740 [i915]
<4>[  132.380784]  ? igt_gem_huge+0x269/0x3d0 [i915]
<4>[  132.380865]  __i915_subtests+0x5a/0x160 [i915]
<4>[  132.380936]  __run_selftests+0x1a2/0x2f0 [i915]
<4>[  132.381008]  i915_live_selftests+0x4e/0x80 [i915]
<4>[  132.381071]  i915_pci_probe+0xd8/0x1b0 [i915]
<4>[  132.381077]  pci_device_probe+0x1c5/0x3a0
<4>[  132.381087]  driver_probe_device+0x6b6/0xcb0
<4>[  132.381094]  __driver_attach+0x22d/0x2c0
<4>[  132.381100]  ? driver_probe_device+0xcb0/0xcb0
<4>[  132.381103]  bus_for_each_dev+0x113/0x1a0
<4>[  132.381108]  ? check_flags.part.24+0x450/0x450
<4>[  132.381112]  ? subsys_dev_iter_exit+0x10/0x10
<4>[  132.381123]  bus_add_driver+0x38b/0x6e0
<4>[  132.381131]  driver_register+0x189/0x400
<4>[  132.381136]  ? 0xffffffffc12d8000
<4>[  132.381140]  do_one_initcall+0xa0/0x4c0
<4>[  132.381145]  ? initcall_blacklisted+0x180/0x180
<4>[  132.381152]  ? do_init_module+0x4a/0x54c
<4>[  132.381156]  ? rcu_lockdep_current_cpu_online+0xdc/0x130
<4>[  132.381161]  ? kasan_unpoison_shadow+0x30/0x40
<4>[  132.381169]  do_init_module+0x1b5/0x54c
<4>[  132.381177]  load_module+0x619e/0x9b70
<4>[  132.381202]  ? module_frob_arch_sections+0x20/0x20
<4>[  132.381211]  ? vfs_read+0x257/0x2f0
<4>[  132.381214]  ? vfs_read+0x257/0x2f0
<4>[  132.381221]  ? kernel_read+0x8b/0x130
<4>[  132.381231]  ? copy_strings_kernel+0x120/0x120
<4>[  132.381244]  ? __se_sys_finit_module+0x17c/0x1a0
<4>[  132.381248]  __se_sys_finit_module+0x17c/0x1a0
<4>[  132.381252]  ? __ia32_sys_init_module+0xa0/0xa0
<4>[  132.381261]  ? __se_sys_newstat+0x77/0xd0
<4>[  132.381265]  ? cp_new_stat+0x590/0x590
<4>[  132.381269]  ? kmem_cache_free+0x2f0/0x340
<4>[  132.381285]  do_syscall_64+0x97/0x400
<4>[  132.381292]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4>[  132.381295] RIP: 0033:0x7eff4af46839
<4>[  132.381297] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1f f6 2c 00 f7 d8 64 89 01 48
<4>[  132.381426] RSP: 002b:00007ffcd84f4cf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
<4>[  132.381432] RAX: ffffffffffffffda RBX: 000055dfdeb429a0 RCX: 00007eff4af46839
<4>[  132.381435] RDX: 0000000000000000 RSI: 000055dfdeb43670 RDI: 0000000000000004
<4>[  132.381437] RBP: 000055dfdeb43670 R08: 0000000000000004 R09: 0000000000000000
<4>[  132.381440] R10: 00007ffcd84f4e60 R11: 0000000000000246 R12: 0000000000000000
<4>[  132.381442] R13: 000055dfdeb3bec0 R14: 0000000000000000 R15: 000000000000003b

<3>[  132.381466] Allocated by task 5822:
<4>[  132.381485]  kmem_cache_alloc+0xdf/0x2e0
<4>[  132.381546]  i915_gem_object_create_internal+0x24/0x1e0 [i915]
<4>[  132.381609]  igt_mmap_offset_exhaustion+0x257/0x940 [i915]
<4>[  132.381677]  __i915_subtests+0x5a/0x160 [i915]
<4>[  132.381742]  __run_selftests+0x1a2/0x2f0 [i915]
<4>[  132.381806]  i915_live_selftests+0x4e/0x80 [i915]
<4>[  132.381865]  i915_pci_probe+0xd8/0x1b0 [i915]
<4>[  132.381868]  pci_device_probe+0x1c5/0x3a0
<4>[  132.381871]  driver_probe_device+0x6b6/0xcb0
<4>[  132.381874]  __driver_attach+0x22d/0x2c0
<4>[  132.381877]  bus_for_each_dev+0x113/0x1a0
<4>[  132.381880]  bus_add_driver+0x38b/0x6e0
<4>[  132.381884]  driver_register+0x189/0x400
<4>[  132.381886]  do_one_initcall+0xa0/0x4c0
<4>[  132.381889]  do_init_module+0x1b5/0x54c
<4>[  132.381892]  load_module+0x619e/0x9b70
<4>[  132.381895]  __se_sys_finit_module+0x17c/0x1a0
<4>[  132.381898]  do_syscall_64+0x97/0x400
<4>[  132.381901]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

<3>[  132.381914] Freed by task 150:
<4>[  132.381931]  kmem_cache_free+0xb7/0x340
<4>[  132.381995]  __i915_gem_free_objects+0x875/0xf50 [i915]
<4>[  132.382054]  __i915_gem_free_work+0x69/0xb0 [i915]
<4>[  132.382058]  process_one_work+0x78b/0x1740
<4>[  132.382061]  worker_thread+0x82/0xb80
<4>[  132.382064]  kthread+0x30c/0x3d0
<4>[  132.382067]  ret_from_fork+0x3a/0x50

<3>[  132.382081] The buggy address belongs to the object at ffff8801e1324500
                   which belongs to the cache drm_i915_gem_object of size 1168
<3>[  132.382133] The buggy address is located 248 bytes inside of
                   1168-byte region [ffff8801e1324500, ffff8801e1324990)
<3>[  132.382179] The buggy address belongs to the page:
<0>[  132.382202] page:ffffea000784c800 count:1 mapcount:0 mapping:ffff8801dedf6500 index:0xffff8801e1323ec0 compound_mapcount: 0
<0>[  132.382251] flags: 0x8000000000008100(slab|head)
<1>[  132.382274] raw: 8000000000008100 ffff8801d6317440 ffff8801d6317440 ffff8801dedf6500
<1>[  132.382307] raw: ffff8801e1323ec0 0000000000140013 00000001ffffffff 0000000000000000
<1>[  132.382339] page dumped because: kasan: bad access detected

<3>[  132.382373] Memory state around the buggy address:
<3>[  132.382395]  ffff8801e1324480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
<3>[  132.382426]  ffff8801e1324500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
<3>[  132.382457] >ffff8801e1324580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
<3>[  132.382488]                                                                 ^
<3>[  132.382517]  ffff8801e1324600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
<3>[  132.382548]  ffff8801e1324680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

This patch tricks the system into running without the background retire
thread, until after we finish the test. The only reaping should then be
performed by the mmap offset routine to reclaim the space as required.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/selftests/i915_gem_object.c  | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index f4a5099c75b5..d77acf4cc439 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -507,6 +507,15 @@ static int igt_mmap_offset_exhaustion(void *arg)
 	u64 hole_start, hole_end;
 	int loop, err;
 
+	/* Disable background reaper */
+	mutex_lock(&i915->drm.struct_mutex);
+	if (!i915->gt.active_requests++)
+		i915_gem_unpark(i915);
+	mutex_unlock(&i915->drm.struct_mutex);
+	cancel_delayed_work_sync(&i915->gt.retire_work);
+	cancel_delayed_work_sync(&i915->gt.idle_work);
+	GEM_BUG_ON(!i915->gt.awake);
+
 	/* Trim the device mmap space to only a page */
 	memset(&resv, 0, sizeof(resv));
 	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
@@ -515,7 +524,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
 		err = drm_mm_reserve_node(mm, &resv);
 		if (err) {
 			pr_err("Failed to trim VMA manager, err=%d\n", err);
-			return err;
+			goto out_park;
 		}
 		break;
 	}
@@ -576,6 +585,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
 			goto err_obj;
 		}
 
+		/* NB we rely on the _active_ reference to access obj now */
 		GEM_BUG_ON(!i915_gem_object_is_active(obj));
 		err = i915_gem_object_create_mmap_offset(obj);
 		if (err) {
@@ -587,6 +597,13 @@ static int igt_mmap_offset_exhaustion(void *arg)
 
 out:
 	drm_mm_remove_node(&resv);
+out_park:
+	mutex_lock(&i915->drm.struct_mutex);
+	if (--i915->gt.active_requests)
+		queue_delayed_work(i915->wq, &i915->gt.retire_work, 0);
+	else
+		queue_delayed_work(i915->wq, &i915->gt.idle_work, 0);
+	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 err_obj:
 	i915_gem_object_put(obj);
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread