All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Kumar, Janga Rahul" <janga.rahul.kumar@intel.com>
To: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	"igt-dev@lists.freedesktop.org" <igt-dev@lists.freedesktop.org>
Subject: Re: [igt-dev] [PATCH i-g-t] tests/xe: Add a test for corner cases of eviction.
Date: Wed, 31 May 2023 15:58:44 +0000	[thread overview]
Message-ID: <MW3PR11MB4748892C517A0AC411059E4BAE489@MW3PR11MB4748.namprd11.prod.outlook.com> (raw)
In-Reply-To: <20230529124824.1876093-1-maarten.lankhorst@linux.intel.com>



> -----Original Message-----
> From: igt-dev <igt-dev-bounces@lists.freedesktop.org> On Behalf Of Maarten
> Lankhorst
> Sent: 29 May 2023 18:18
> To: igt-dev@lists.freedesktop.org
> Subject: [igt-dev] [PATCH i-g-t] tests/xe: Add a test for corner cases of eviction.
> 
> Gitlab issues 194 and 239 show some corner cases. In particular, signal handling,
> pt eviction and losing track of external objects on the list.
> 
> The patch series to fix those was sent as
> https://patchwork.freedesktop.org/series/118428/
> 
> But because it's tricky to reproduce those issues, I had to add some custom igt's
> to handle those.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>  tests/meson.build     |   1 +
>  tests/xe/xe_evicted.c | 313 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 314 insertions(+)
>  create mode 100644 tests/xe/xe_evicted.c
> 
> diff --git a/tests/meson.build b/tests/meson.build index 6551194fe..166937b6c
> 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -248,6 +248,7 @@ xe_progs = [
>  	'xe_dma_buf_sync',
>  	'xe_debugfs',
>  	'xe_evict',
> +	'xe_evicted',
>  	'xe_exec_balancer',
>  	'xe_exec_basic',
>  	'xe_exec_compute_mode',
> diff --git a/tests/xe/xe_evicted.c b/tests/xe/xe_evicted.c new file mode 100644
> index 000000000..3f5e6a82c
> --- /dev/null
> +++ b/tests/xe/xe_evicted.c
> @@ -0,0 +1,313 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +/**
> + * TEST: Test various eviction and ENOMEM corner cases.
> + * Category: Software building block
> + * Sub-category: VM_BIND / eviction
> + * Test category: functionality test
> + */
> +
> +#include "igt.h"
> +#include "lib/igt_syncobj.h"
> +#include "lib/intel_reg.h"
> +#include "xe_drm.h"
> +
> +#include "xe/xe_ioctl.h"
> +#include "xe/xe_query.h"
> +#include "xe/xe_spin.h"
> +#include <string.h>
> +
> +static void evict_mem(struct xe_device *xe) {
> +	if (xe->has_vram)
> +		igt_debugfs_write(xe->fd, "vram0_evict", "1");
> +	else
> +		igt_debugfs_write(xe->fd, "gtt_evict", "1"); }
> +
> +static void execstress(struct xe_device *xe, uint32_t engine, uint32_t
> +addr) {
> +	struct timespec start, end;
> +
> +	igt_fork(child, 2) {
> +		struct drm_xe_sync sync = {
> +			.flags = DRM_XE_SYNC_SYNCOBJ |
> DRM_XE_SYNC_SIGNAL,
> +			.handle = syncobj_create(xe->fd, 0),
> +		};
> +
> +		clock_gettime(CLOCK_MONOTONIC, &start);
> +
> +		do {
> +			if (!child)
> +				evict_mem(xe);
> +			else
> +				xe_exec_sync(xe->fd, engine, addr, &sync, 1);
> +
> +			clock_gettime(CLOCK_MONOTONIC, &end);
> +		} while (end.tv_sec - start.tv_sec <= 3);
> +
> +		if (child)
> +			igt_assert(syncobj_wait(xe->fd, &sync.handle, 1,
> INT64_MAX, 0, NULL));
> +		syncobj_destroy(xe->fd, sync.handle);
> +	}
> +
> +	igt_waitchildren();
> +
> +	/* Leave function in a known state */
> +	evict_mem(xe);
> +}
> +
> +/* See issue 194, unbinding first VMA may cause BO not to be locked
> +correctly */ static void test_multimap(struct xe_device *xe, bool
> +external) {
> +	uint64_t size = xe->default_alignment;
> +	uint32_t vm = xe_vm_create(xe->fd,
> DRM_XE_VM_CREATE_SCRATCH_PAGE, 0);
> +	uint32_t bo = xe_bo_create(xe->fd, 0, external ? 0 : vm, size);
> +	uint32_t engine = xe_engine_create_class(xe->fd, vm,
> DRM_XE_ENGINE_CLASS_COPY);
> +	uint32_t *map = xe_bo_map(xe->fd, bo, size);
> +
> +	*map = MI_BATCH_BUFFER_END;
> +	munmap(map, size);
> +
> +	/* Create 2 mappings */
> +	xe_vm_bind_sync(xe->fd, vm, bo, 0, 0, size);
> +	xe_vm_bind_sync(xe->fd, vm, bo, 0, size, size);
> +
> +	execstress(xe, engine, size);
> +
> +	/* Unbind the first one, to possibly confuse stuff */
> +	xe_vm_unbind_sync(xe->fd, vm, 0, 0, size);
> +
> +	execstress(xe, engine, size);
> +
> +	/* Teardown.. */
> +	xe_engine_destroy(xe->fd, engine);
> +	xe_vm_destroy(xe->fd, vm);
> +	gem_close(xe->fd, bo);
> +}
> +
> +static uint64_t avail_vram(struct xe_device **xe) {
> +	int fd = (*xe)->fd;
> +	int region_idx;
> +
> +	/* Refresh to get up-to-date values */
> +	xe_device_put(fd);
> +	*xe = xe_device_get(fd);
> +
> +	region_idx = ffs((*xe)->gts->gts[0].native_mem_regions) - 1;
> +
> +	return (*xe)->mem_usage->regions[region_idx].total_size -
> +	       (*xe)->mem_usage->regions[region_idx].used;
> +}
> +
> +static void test_pt_eviction(struct xe_device *xe, bool external) {
> +	uint32_t bo, i;
> +	uint64_t size, full_size, full_addr, addr, avail;
> +	uint32_t vm;
> +	uint32_t engine;
> +	uint32_t *map;
> +	struct drm_xe_exec exec = {};
> +	igt_require(xe->has_vram);
> +
> +	vm = xe_vm_create(xe->fd, 0, 0);
> +	engine = xe_engine_create_class(xe->fd, vm,
> DRM_XE_ENGINE_CLASS_COPY);
> +
> +	/* Refresh xe_device size after creating vm + engine */
> +	evict_mem(xe);
> +	full_size = (avail_vram(&xe) - (2 << 20ULL)) & ~((2 << 20ULL) - 1);
> +
> +	bo = xe_bo_create(xe->fd, 0, external ? 0 : vm, full_size);
> +
> +	/* Only map what we need.. */
> +	map = xe_bo_map(xe->fd, bo, xe->default_alignment);
> +	*map = MI_BATCH_BUFFER_END;
> +	munmap(map, xe->default_alignment);
> +
> +	/* Stuff bo at the end, so we can use start for partial bo's */
> +	full_addr = (1ULL << (uint64_t)xe_va_bits(xe->fd)) - full_size;
> +	xe_vm_bind_sync(xe->fd, vm, bo, 0, full_addr, full_size);
> +
> +	exec.engine_id = engine;
> +	exec.num_batch_buffer = 1;
> +	exec.address = full_addr;
> +
> +	/* Now map partially at 0, and see how many pagetables we can still
> fill.. */
> +	addr = 0;
> +	size = xe->default_alignment;
> +	xe_vm_bind_sync(xe->fd, vm, bo, 0, addr, size);
> +
> +	xe_exec_sync(xe->fd, engine, full_addr, NULL, 0);
> +
> +	avail = full_size >> 12;
> +	while (1) {
> +		uint64_t new_avail = avail_vram(&xe) >> 12;
> +
> +		if (new_avail >= avail)
> +			igt_info("TTM delayed destroy may have freed some
> memory\n");
> +		avail = new_avail;
> +
> +		igt_info("%"PRIu64" available 4K pages left\n", avail);
> +		if (!avail)
> +			break;
> +
> +		if (avail == 1) {
> +			xe_vm_bind_sync(xe->fd, vm, bo, 0, 2 << 20ULL, size);
> +			break;
> +		}
> +
> +		addr += 1 << 30ULL;
> +
> +		/* Remove 1 for second level PT */
> +		avail--;
> +
> +		for (i = 0; i < min((uint64_t)512, avail); i++) {
> +			uint64_t this_addr = addr + (i << 21);
> +
> +			xe_vm_bind_sync(xe->fd, vm, bo, 0, this_addr, size);
> +		}
> +	}
> +
> +	/* Verify that with 0 memory available, exec still completes */
> +	xe_exec_sync(xe->fd, engine, full_addr, NULL, 0);
> +
> +	/* Bind 1 more to go over the edge, this should never succeed.. */
> +	if (__xe_vm_bind(xe->fd, vm, 0, bo, 0, 4 << 20ULL, size,
> +			 XE_VM_BIND_OP_MAP, NULL, 0, 0, 0) == 0) {
> +
> +		/* Boom! */
> +		igt_assert(igt_ioctl(xe->fd, DRM_IOCTL_XE_EXEC, &exec) < 0
> &&
> +			   errno == ENOMEM);
> +
> +		xe_vm_unbind_sync(xe->fd, vm, 0, 4 << 20ULL, size);
> +	} else {
> +		igt_assert(errno == ENOMEM);
> +	}
> +
> +	/*
> +	 * After evicting, we may end up creating new page tables,
> +	 * so this should fail..
> +	 */
> +	evict_mem(xe);
> +	igt_assert(igt_ioctl(xe->fd, DRM_IOCTL_XE_EXEC, &exec) < 0 &&
> +		   errno == ENOMEM);
> +
> +	/* Cleanup */
> +	xe_engine_destroy(xe->fd, engine);
> +	xe_vm_destroy(xe->fd, vm);
> +	gem_close(xe->fd, bo);
> +}
> +
> +/* Test OOM with signal handler enabled, to mimic X.org (issue #239) */
When you are checking these OOM tests, Can the igt process get killed due to OOM killer ? how are you preventing it in this test, is this test passing on any platform.
> +static void test_signal_oom(struct xe_device *xe, bool external) {
> +	uint32_t bo[13], num_bo = 0, i;
> +	uint64_t binds, size = xe->has_vram ? xe->vram_size[0] :
> igt_get_avail_ram_mb();
> +	uint32_t vm = xe_vm_create(xe->fd,
> DRM_XE_VM_CREATE_SCRATCH_PAGE, 0);
> +	uint32_t engine = xe_engine_create_class(xe->fd, vm,
> DRM_XE_ENGINE_CLASS_COPY);
> +	int err;
> +	struct drm_xe_gem_create create = {
> +		.flags = vram_if_possible(xe->fd, 0),
> +	};
> +	struct drm_xe_sync sync[2] = {
> +		{
> +			.flags = DRM_XE_SYNC_SYNCOBJ |
> DRM_XE_SYNC_SIGNAL,
> +			.handle = syncobj_create(xe->fd, 0),
> +		},
> +		{
> +			.flags = DRM_XE_SYNC_SYNCOBJ |
> DRM_XE_SYNC_SIGNAL,
> +			.handle = syncobj_create(xe->fd, 0),
> +		},
> +	};
> +	struct drm_xe_exec exec = {
> +		.engine_id = engine,
> +		.syncs = (uintptr_t)sync,
> +		.num_syncs = ARRAY_SIZE(sync),
> +		.address = 0,
> +		.num_batch_buffer = 1,
> +	};
> +
> +	size /= 12;
> +	size &= ~(xe->default_alignment - 1);
> +
> +	create.size = size;
> +	if (!external)
> +		create.vm_id = vm;
> +
> +	/* Create as many bo's as we can without OOMs */
> +	while (num_bo < ARRAY_SIZE(bo)) {
> +		igt_while_interruptible(true) {
> +			if (num_bo >= ARRAY_SIZE(bo))
> +				continue;
> +
> +			create.handle = 0;
> +			err = igt_ioctl(xe->fd, DRM_IOCTL_XE_GEM_CREATE,
> &create);
> +			if (!err)
> +				bo[num_bo++] = create.handle;
> +		}
> +
> +		igt_assert(err == 0 || errno == ENOMEM);
> +		if (err) {
> +			igt_assert(!xe->has_vram || num_bo > 9);
			Can you comment why you are checking with num_bo with 9 ? Instead of defining bo array size as 13, if you are trying to check the vram eviction, can you use the available System memory and VRAM ,  allowed memory limit for eviction and derive these limits based on calculation so that they can work for all platforms.
> +			break;
> +		}
> +	}
> +
> +	igt_info("Created %u bo's (total: %"PRIu64" MB)\n",
> +		 num_bo, (num_bo * size) >> 20ULL);
> +
> +	binds = 0;
> +	for (i = 0; i < num_bo; i++) {
> +		evict_mem(xe);
> +		igt_debug("Binding bo %i, total attempts %"PRIi64"\n", i, binds);
> +
> +		igt_while_interruptible(true)
> +			xe_vm_bind(xe->fd, vm, bo[i], 0, ((binds++) << 32ULL),
> size, sync, 1);
> +	}
> +
> +	sync[0].flags &= ~DRM_XE_SYNC_SIGNAL;
> +
> +	/* Try a few times, why not? */
> +	for (i = 0; i < 4; i++) {
> +		igt_while_interruptible(true) {
> +			err = igt_ioctl(xe->fd, DRM_IOCTL_XE_EXEC, &exec);
> +			igt_assert(err < 0 && errno == ENOMEM);
> +		}
> +	}
> +
> +	xe_vm_destroy(xe->fd, vm);
> +	while (num_bo--)
> +		gem_close(xe->fd, bo[num_bo]);
> +	syncobj_destroy(xe->fd, sync[1].handle);
> +	syncobj_destroy(xe->fd, sync[0].handle); }
> +
> +igt_main
> +{
> +	int fd;
> +
> +	igt_fixture
> +		fd = drm_open_driver(DRIVER_XE);
> +
> +	igt_subtest("multimap-internal")
> +		test_multimap(xe_device_get(fd), false);
> +
> +	igt_subtest("multimap-external")
> +		test_multimap(xe_device_get(fd), true);
> +
> +	igt_subtest("pt-eviction-internal")
> +		test_pt_eviction(xe_device_get(fd), false);
> +
> +	igt_subtest("pt-eviction-external")
> +		test_pt_eviction(xe_device_get(fd), false);
		For testing external , change above line to test_pt_eviction(xe_device_get(fd), true);
> +
> +	igt_subtest("signal-oom-internal")
> +		test_signal_oom(xe_device_get(fd), false);
> +
> +	igt_subtest("signal-oom-external")
> +		test_signal_oom(xe_device_get(fd), false);
		For testing external , change above line to test_signal_oom(xe_device_get(fd), true);

Thanks,
Rahul
 }
> --
> 2.34.1


  parent reply	other threads:[~2023-05-31 15:59 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-29 12:48 [igt-dev] [PATCH i-g-t] tests/xe: Add a test for corner cases of eviction Maarten Lankhorst
2023-05-30 13:33 ` [igt-dev] ✗ Fi.CI.BUILD: failure for " Patchwork
2023-05-30 14:55 ` [igt-dev] [PATCH i-g-t] " Maarten Lankhorst
2023-05-31  9:53   ` Thomas Hellström
2023-05-31 18:18     ` Maarten Lankhorst
2023-06-02 10:09       ` Thomas Hellström
2023-05-31 15:58 ` Kumar, Janga Rahul [this message]
2023-06-01 11:53   ` Maarten Lankhorst

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=MW3PR11MB4748892C517A0AC411059E4BAE489@MW3PR11MB4748.namprd11.prod.outlook.com \
    --to=janga.rahul.kumar@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=maarten.lankhorst@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.