From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by gabe.freedesktop.org (Postfix) with ESMTPS id 72E1B10E1CC for ; Wed, 29 Dec 2021 13:57:53 +0000 (UTC) From: =?UTF-8?q?Zbigniew=20Kempczy=C5=84ski?= Date: Wed, 29 Dec 2021 14:57:40 +0100 Message-Id: <20211229135742.37177-2-zbigniew.kempczynski@intel.com> In-Reply-To: <20211229135742.37177-1-zbigniew.kempczynski@intel.com> References: <20211229135742.37177-1-zbigniew.kempczynski@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 8bit Subject: [igt-dev] [PATCH i-g-t 1/3] lib/intel_memory_region: Add start offset and alignment detection List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" To: igt-dev@lists.freedesktop.org Cc: Petri Latvala List-ID: With era of new gens we're enforced to use no-reloc (softpin). This brings few problems like vm range limitations which were well solved by the kernel. This can be handled also in userspace code by adding gen related conditionals or by trying to detect the constraints. Lets try to do this dynamically and detect safe start offset and alignment for each memory region we got. This should be universal solution regardless hw limitations and bugs. As such detection is not lightweight technique add also some caching structures to handle consequtive calls about same data. Signed-off-by: Zbigniew KempczyƄski Cc: Petri Latvala Cc: Ashutosh Dixit --- lib/i915/intel_memory_region.c | 269 +++++++++++++++++++++++++++++++++ lib/i915/intel_memory_region.h | 5 + 2 files changed, 274 insertions(+) diff --git a/lib/i915/intel_memory_region.c b/lib/i915/intel_memory_region.c index dfbb8acf0..4bf116dc9 100644 --- a/lib/i915/intel_memory_region.c +++ b/lib/i915/intel_memory_region.c @@ -28,11 +28,13 @@ #include #include #include +#include #include "i915/gem_create.h" #include "intel_reg.h" #include "drmtest.h" #include "ioctl_wrappers.h" +#include "igt_aux.h" #include "igt_dummyload.h" #include "igt_gt.h" #include "igt_params.h" @@ -40,6 +42,7 @@ #include "intel_chipset.h" #include "igt_collection.h" #include "igt_device.h" +#include "gem_mman.h" #include "i915/intel_memory_region.h" @@ -480,3 +483,269 @@ uint64_t gpu_meminfo_region_available(const struct drm_i915_query_memory_regions return 0; } + +#define PAGE_SIZE 4096 +#define START_OFFSET PAGE_SIZE + +struct devid_start { + uint16_t devid; + uint64_t start; + uint32_t region; + struct igt_list_head link; +}; + +static IGT_LIST_HEAD(start_cache); +static pthread_mutex_t start_cache_mutex = PTHREAD_MUTEX_INITIALIZER; + +static uint64_t detect_start_offset(int i915, uint32_t region_bb) +{ + struct drm_i915_gem_exec_object2 obj; + struct drm_i915_gem_execbuffer2 eb; + uint64_t start_offset = START_OFFSET; + uint64_t bb_size = PAGE_SIZE; + uint32_t *batch; + uint16_t devid = intel_get_drm_devid(i915); + struct devid_start *entry; + + pthread_mutex_lock(&start_cache_mutex); + igt_list_for_each_entry(entry, &start_cache, link) { + if (entry->devid == devid && entry->region == region_bb) + goto out; + } + pthread_mutex_unlock(&start_cache_mutex); + + memset(&obj, 0, sizeof(obj)); + memset(&eb, 0, sizeof(eb)); + + eb.buffers_ptr = to_user_pointer(&obj); + eb.buffer_count = 1; + eb.flags = I915_EXEC_DEFAULT; + igt_assert(__gem_create_in_memory_regions(i915, &obj.handle, &bb_size, region_bb) == 0); + obj.flags = EXEC_OBJECT_PINNED; + + batch = gem_mmap__device_coherent(i915, obj.handle, 0, bb_size, PROT_WRITE); + *batch = MI_BATCH_BUFFER_END; + munmap(batch, bb_size); + + while (1) { + obj.offset = start_offset; + + if (__gem_execbuf(i915, &eb) == 0) + break; + + start_offset <<= 1; + + igt_assert(start_offset <= 1ull << 32); + } + gem_close(i915, obj.handle); + + /* Check does other thread did the job before */ + pthread_mutex_lock(&start_cache_mutex); + igt_list_for_each_entry(entry, &start_cache, link) { + if (entry->devid == devid && entry->region == region_bb) + goto out; + } + + entry = malloc(sizeof(*entry)); + if (entry) + pthread_mutex_unlock(&start_cache_mutex); + igt_assert(entry); + entry->devid = devid; + entry->start = start_offset; + entry->region = region_bb; + igt_list_add(&entry->link, &start_cache); + +out: + pthread_mutex_unlock(&start_cache_mutex); + + return entry->start; +} + +/** + * gem_get_start_offset_for_region: + * @i915: drm fd + * @region: memory region + * + * Returns: start offset at which kernel allows placing objects for memory + * region. + */ +uint64_t gem_get_start_offset_for_region(int i915, uint32_t region) +{ + return detect_start_offset(i915, region); +} + +/** + * gem_get_safe_start_offset: + * @i915: drm fd + * + * Returns: finds start offset which can be used as first one regardless + * memory region. Useful if for some reason some regions don't allow + * starting from 0x0 offset. + */ +uint64_t gem_get_safe_start_offset(int i915) +{ + struct drm_i915_query_memory_regions *query_info; + struct igt_collection *regions, *set; + uint32_t region; + uint64_t offset = 0; + + query_info = gem_get_query_memory_regions(i915); + igt_assert(query_info); + + set = get_memory_region_set(query_info, + I915_SYSTEM_MEMORY, + I915_DEVICE_MEMORY); + + for_each_combination(regions, 1, set) { + region = igt_collection_get_value(regions, 0); + offset = max(offset, gem_get_start_offset_for_region(i915, region)); + } + free(query_info); + igt_collection_destroy(set); + + return offset; +} + +static uint64_t detect_alignment(int i915, uint32_t region_bb, uint32_t region_obj) +{ + struct drm_i915_gem_exec_object2 obj[2]; + struct drm_i915_gem_execbuffer2 eb; + uint64_t default_alignment = PAGE_SIZE; + uint64_t bb_size = PAGE_SIZE, obj_size = PAGE_SIZE; + uint32_t *batch; + + memset(obj, 0, sizeof(obj)); + memset(&eb, 0, sizeof(eb)); + + /* Establish bb offset first */ + eb.buffers_ptr = to_user_pointer(obj); + eb.buffer_count = 1; + eb.flags = I915_EXEC_BATCH_FIRST | I915_EXEC_DEFAULT; + igt_assert(__gem_create_in_memory_regions(i915, &obj[0].handle, &bb_size, region_bb) == 0); + obj[0].flags = EXEC_OBJECT_PINNED; + + batch = gem_mmap__device_coherent(i915, obj[0].handle, 0, bb_size, PROT_WRITE); + *batch = MI_BATCH_BUFFER_END; + munmap(batch, bb_size); + + obj[0].offset = detect_start_offset(i915, region_bb); + + /* Find appropriate alignment of object */ + eb.buffer_count = ARRAY_SIZE(obj); + igt_assert(__gem_create_in_memory_regions(i915, &obj[1].handle, &obj_size, region_obj) == 0); + obj[1].handle = gem_create_in_memory_regions(i915, PAGE_SIZE, region_obj); + obj[1].flags = EXEC_OBJECT_PINNED; + while (1) { + obj[1].offset = ALIGN(obj[0].offset + bb_size, default_alignment); + igt_assert(obj[1].offset <= 1ull << 32); + + if (__gem_execbuf(i915, &eb) == 0) + break; + + default_alignment <<= 1; + } + + gem_close(i915, obj[0].handle); + gem_close(i915, obj[1].handle); + + return default_alignment; +} + +/** + * gem_get_alignment_for_regions: + * @i915: drm fd + * @region1: first region + * @region2: second region + * + * Returns: alignment which must be used when objects from @region1 and + * @region2 are going to interact. + */ +uint64_t gem_get_alignment_for_regions(int i915, uint32_t region1, uint32_t region2) +{ + return detect_alignment(i915, region1, region2); +} + +struct devid_align { + uint16_t devid; + uint64_t alignment; + struct igt_list_head link; +}; + +static IGT_LIST_HEAD(alignment_cache); +static pthread_mutex_t alignment_cache_mutex = PTHREAD_MUTEX_INITIALIZER; + +/** + * gem_get_safe_alignment: + * @i915: drm fd + * + * Returns: safe (maximum) alignment for all memory regions on @i915 device. + */ +uint64_t gem_get_safe_alignment(int i915) +{ + struct drm_i915_query_memory_regions *query_info; + struct igt_collection *regions, *set; + uint64_t default_alignment = 0; + uint32_t region_bb, region_obj; + uint16_t devid = intel_get_drm_devid(i915); + struct devid_align *entry; + + /* non-discrete uses 4K page size */ + if (!gem_has_lmem(i915)) + return PAGE_SIZE; + + pthread_mutex_lock(&alignment_cache_mutex); + igt_list_for_each_entry(entry, &alignment_cache, link) { + if (entry->devid == devid) + goto out; + } + /* + * Unlock mutex in hope parallel alignment detection will happen + * on different devid-s. + */ + pthread_mutex_unlock(&alignment_cache_mutex); + + query_info = gem_get_query_memory_regions(i915); + igt_assert(query_info); + + set = get_memory_region_set(query_info, + I915_SYSTEM_MEMORY, + I915_DEVICE_MEMORY); + + for_each_variation_r(regions, 2, set) { + uint64_t alignment; + + region_bb = igt_collection_get_value(regions, 0); + region_obj = igt_collection_get_value(regions, 1); + + /* We're interested in triangular matrix */ + if (region_bb > region_obj) + continue; + + alignment = detect_alignment(i915, region_bb, region_obj); + if (default_alignment < alignment) + default_alignment = alignment; + } + + free(query_info); + igt_collection_destroy(set); + + /* Try again, check does we have cache updated in the meantime. */ + pthread_mutex_lock(&alignment_cache_mutex); + igt_list_for_each_entry(entry, &alignment_cache, link) { + if (entry->devid == devid) + goto out; + } + + entry = malloc(sizeof(*entry)); + if (!entry) + pthread_mutex_unlock(&alignment_cache_mutex); + igt_assert(entry); + entry->devid = devid; + entry->alignment = default_alignment; + igt_list_add(&entry->link, &alignment_cache); + +out: + pthread_mutex_unlock(&alignment_cache_mutex); + + return entry->alignment; +} diff --git a/lib/i915/intel_memory_region.h b/lib/i915/intel_memory_region.h index 8b427b7e7..4d994f1ad 100644 --- a/lib/i915/intel_memory_region.h +++ b/lib/i915/intel_memory_region.h @@ -129,4 +129,9 @@ uint64_t gpu_meminfo_region_available(const struct drm_i915_query_memory_regions uint16_t memory_class, uint16_t memory_instance); +uint64_t gem_get_start_offset_for_region(int i915, uint32_t region); +uint64_t gem_get_safe_start_offset(int i915); +uint64_t gem_get_alignment_for_regions(int i915, uint32_t region1, uint32_t region2); +uint64_t gem_get_safe_alignment(int i915); + #endif /* INTEL_MEMORY_REGION_H */ -- 2.32.0