* [PATCH 01/20] drm: improve drm_buddy_alloc function
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-01-26 18:03 ` [Intel-gfx] " Jani Nikula
2022-01-26 15:21 ` [PATCH 02/20] drm: implement top-down allocation method Matthew Auld
` (18 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: dri-devel, Arunpravin
From: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
- Make drm_buddy_alloc a single function to handle
range allocation and non-range allocation demands
- Implemented a new function alloc_range() which allocates
the requested power-of-two block comply with range limitations
- Moved order computation and memory alignment logic from
i915 driver to drm buddy
v2:
merged below changes to keep the build unbroken
- drm_buddy_alloc_range() becomes obsolete and may be removed
- enable ttm range allocation (fpfn / lpfn) support in i915 driver
- apply enhanced drm_buddy_alloc() function to i915 driver
v3(Matthew Auld):
- Fix alignment issues and remove unnecessary list_empty check
- add more validation checks for input arguments
- make alloc_range() block allocations as bottom-up
- optimize order computation logic
- replace uint64_t with u64, which is preferred in the kernel
v4(Matthew Auld):
- keep drm_buddy_alloc_range() function implementation for generic
actual range allocations
- keep alloc_range() implementation for end bias allocations
v5(Matthew Auld):
- modify drm_buddy_alloc() passing argument place->lpfn to lpfn
as place->lpfn will currently always be zero for i915
v6(Matthew Auld):
- fixup potential uaf - If we are unlucky and can't allocate
enough memory when splitting blocks, where we temporarily
end up with the given block and its buddy on the respective
free list, then we need to ensure we delete both blocks,
and no just the buddy, before potentially freeing them
- fix warnings reported by kernel test robot <lkp@intel.com>
Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
drivers/gpu/drm/drm_buddy.c | 326 +++++++++++++-----
drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 67 ++--
drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 2 +
include/drm/drm_buddy.h | 22 +-
4 files changed, 293 insertions(+), 124 deletions(-)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index d60878bc9c20..954e31962c74 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -282,23 +282,99 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
}
EXPORT_SYMBOL(drm_buddy_free_list);
-/**
- * drm_buddy_alloc_blocks - allocate power-of-two blocks
- *
- * @mm: DRM buddy manager to allocate from
- * @order: size of the allocation
- *
- * The order value here translates to:
- *
- * 0 = 2^0 * mm->chunk_size
- * 1 = 2^1 * mm->chunk_size
- * 2 = 2^2 * mm->chunk_size
- *
- * Returns:
- * allocated ptr to the &drm_buddy_block on success
- */
-struct drm_buddy_block *
-drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
+static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+ return s1 <= e2 && e1 >= s2;
+}
+
+static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
+{
+ return s1 <= s2 && e1 >= e2;
+}
+
+static struct drm_buddy_block *
+alloc_range_bias(struct drm_buddy *mm,
+ u64 start, u64 end,
+ unsigned int order)
+{
+ struct drm_buddy_block *block;
+ struct drm_buddy_block *buddy;
+ LIST_HEAD(dfs);
+ int err;
+ int i;
+
+ end = end - 1;
+
+ for (i = 0; i < mm->n_roots; ++i)
+ list_add_tail(&mm->roots[i]->tmp_link, &dfs);
+
+ do {
+ u64 block_start;
+ u64 block_end;
+
+ block = list_first_entry_or_null(&dfs,
+ struct drm_buddy_block,
+ tmp_link);
+ if (!block)
+ break;
+
+ list_del(&block->tmp_link);
+
+ if (drm_buddy_block_order(block) < order)
+ continue;
+
+ block_start = drm_buddy_block_offset(block);
+ block_end = block_start + drm_buddy_block_size(mm, block) - 1;
+
+ if (!overlaps(start, end, block_start, block_end))
+ continue;
+
+ if (drm_buddy_block_is_allocated(block))
+ continue;
+
+ if (contains(start, end, block_start, block_end) &&
+ order == drm_buddy_block_order(block)) {
+ /*
+ * Find the free block within the range.
+ */
+ if (drm_buddy_block_is_free(block))
+ return block;
+
+ continue;
+ }
+
+ if (!drm_buddy_block_is_split(block)) {
+ err = split_block(mm, block);
+ if (unlikely(err))
+ goto err_undo;
+ }
+
+ list_add(&block->right->tmp_link, &dfs);
+ list_add(&block->left->tmp_link, &dfs);
+ } while (1);
+
+ return ERR_PTR(-ENOSPC);
+
+err_undo:
+ /*
+ * We really don't want to leave around a bunch of split blocks, since
+ * bigger is better, so make sure we merge everything back before we
+ * free the allocated blocks.
+ */
+ buddy = get_buddy(block);
+ if (buddy &&
+ (drm_buddy_block_is_free(block) &&
+ drm_buddy_block_is_free(buddy))) {
+ list_del(&block->link);
+ __drm_buddy_free(mm, block);
+ }
+ return ERR_PTR(err);
+}
+
+static struct drm_buddy_block *
+alloc_from_freelist(struct drm_buddy *mm,
+ unsigned int order,
+ unsigned long flags)
{
struct drm_buddy_block *block = NULL;
unsigned int i;
@@ -320,78 +396,30 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
while (i != order) {
err = split_block(mm, block);
if (unlikely(err))
- goto out_free;
+ goto err_undo;
- /* Go low */
- block = block->left;
+ block = block->right;
i--;
}
-
- mark_allocated(block);
- mm->avail -= drm_buddy_block_size(mm, block);
- kmemleak_update_trace(block);
return block;
-out_free:
- if (i != order)
+err_undo:
+ if (i != order) {
+ list_del(&block->link);
__drm_buddy_free(mm, block);
+ }
return ERR_PTR(err);
}
-EXPORT_SYMBOL(drm_buddy_alloc_blocks);
-
-static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
-{
- return s1 <= e2 && e1 >= s2;
-}
-static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
-{
- return s1 <= s2 && e1 >= e2;
-}
-
-/**
- * drm_buddy_alloc_range - allocate range
- *
- * @mm: DRM buddy manager to allocate from
- * @blocks: output list head to add allocated blocks
- * @start: start of the allowed range for this block
- * @size: size of the allocation
- *
- * Intended for pre-allocating portions of the address space, for example to
- * reserve a block for the initial framebuffer or similar, hence the expectation
- * here is that drm_buddy_alloc_blocks() is still the main vehicle for
- * allocations, so if that's not the case then the drm_mm range allocator is
- * probably a much better fit, and so you should probably go use that instead.
- *
- * Note that it's safe to chain together multiple alloc_ranges
- * with the same blocks list
- *
- * Returns:
- * 0 on success, error code on failure.
- */
-int drm_buddy_alloc_range(struct drm_buddy *mm,
- struct list_head *blocks,
- u64 start, u64 size)
+static int __alloc_range(struct drm_buddy *mm,
+ struct list_head *dfs,
+ u64 start, u64 size,
+ struct list_head *blocks)
{
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
- LIST_HEAD(allocated);
- LIST_HEAD(dfs);
u64 end;
int err;
- int i;
-
- if (size < mm->chunk_size)
- return -EINVAL;
-
- if (!IS_ALIGNED(size | start, mm->chunk_size))
- return -EINVAL;
-
- if (range_overflows(start, size, mm->size))
- return -EINVAL;
-
- for (i = 0; i < mm->n_roots; ++i)
- list_add_tail(&mm->roots[i]->tmp_link, &dfs);
end = start + size - 1;
@@ -399,7 +427,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
u64 block_start;
u64 block_end;
- block = list_first_entry_or_null(&dfs,
+ block = list_first_entry_or_null(dfs,
struct drm_buddy_block,
tmp_link);
if (!block)
@@ -426,7 +454,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
mark_allocated(block);
mm->avail -= drm_buddy_block_size(mm, block);
- list_add_tail(&block->link, &allocated);
+ list_add_tail(&block->link, blocks);
continue;
}
@@ -436,11 +464,10 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
goto err_undo;
}
- list_add(&block->right->tmp_link, &dfs);
- list_add(&block->left->tmp_link, &dfs);
+ list_add(&block->right->tmp_link, dfs);
+ list_add(&block->left->tmp_link, dfs);
} while (1);
- list_splice_tail(&allocated, blocks);
return 0;
err_undo:
@@ -452,14 +479,149 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
buddy = get_buddy(block);
if (buddy &&
(drm_buddy_block_is_free(block) &&
- drm_buddy_block_is_free(buddy)))
+ drm_buddy_block_is_free(buddy))) {
+ list_del(&block->link);
__drm_buddy_free(mm, block);
+ }
+
+err_free:
+ drm_buddy_free_list(mm, blocks);
+ return err;
+}
+
+/**
+ * __drm_buddy_alloc_range - actual range allocation
+ *
+ * @mm: DRM buddy manager to allocate from
+ * @start: start of the allowed range for this block
+ * @size: size of the allocation
+ * @blocks: output list head to add allocated blocks
+ *
+ * Intended for pre-allocating portions of the address space, for example to
+ * reserve a block for the initial framebuffer or similar
+ *
+ * Note that it's safe to chain together multiple alloc_ranges
+ * with the same blocks list
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+static int __drm_buddy_alloc_range(struct drm_buddy *mm,
+ u64 start,
+ u64 size,
+ struct list_head *blocks)
+{
+ LIST_HEAD(dfs);
+ int i;
+
+ for (i = 0; i < mm->n_roots; ++i)
+ list_add_tail(&mm->roots[i]->tmp_link, &dfs);
+
+ return __alloc_range(mm, &dfs, start, size, blocks);
+}
+
+/**
+ * drm_buddy_alloc_blocks - allocate power-of-two blocks
+ *
+ * @mm: DRM buddy manager to allocate from
+ * @start: start of the allowed range for this block
+ * @end: end of the allowed range for this block
+ * @size: size of the allocation
+ * @min_page_size: alignment of the allocation
+ * @blocks: output list head to add allocated blocks
+ * @flags: DRM_BUDDY_*_ALLOCATION flags
+ *
+ * alloc_range_bias() called on range limitations, which traverses
+ * the tree and returns the desired block.
+ *
+ * alloc_from_freelist() called when *no* range restrictions
+ * are enforced, which picks the block from the freelist.
+ *
+ * blocks are allocated in order, the order value here translates to:
+ *
+ * 0 = 2^0 * mm->chunk_size
+ * 1 = 2^1 * mm->chunk_size
+ * 2 = 2^2 * mm->chunk_size
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+ u64 start, u64 end, u64 size,
+ u64 min_page_size,
+ struct list_head *blocks,
+ unsigned long flags)
+{
+ struct drm_buddy_block *block = NULL;
+ unsigned int min_order, order;
+ unsigned long pages;
+ LIST_HEAD(allocated);
+ int err;
+
+ if (size < mm->chunk_size)
+ return -EINVAL;
+
+ if (min_page_size < mm->chunk_size)
+ return -EINVAL;
+
+ if (!is_power_of_2(min_page_size))
+ return -EINVAL;
+
+ if (!IS_ALIGNED(start | end | size, mm->chunk_size))
+ return -EINVAL;
+
+ if (check_range_overflow(start, end, size, mm->size))
+ return -EINVAL;
+
+ /* Actual range allocation */
+ if (start + size == end)
+ return __drm_buddy_alloc_range(mm, start, size, blocks);
+
+ pages = size >> ilog2(mm->chunk_size);
+ order = fls(pages) - 1;
+ min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
+
+ do {
+ order = min(order, (unsigned int)fls(pages) - 1);
+ BUG_ON(order > mm->max_order);
+ BUG_ON(order < min_order);
+
+ do {
+ if (flags & DRM_BUDDY_RANGE_ALLOCATION)
+ /* Allocate traversing within the range */
+ block = alloc_range_bias(mm, start, end, order);
+ else
+ /* Allocate from freelist */
+ block = alloc_from_freelist(mm, order, flags);
+
+ if (!IS_ERR(block))
+ break;
+
+ if (order-- == min_order) {
+ err = -ENOSPC;
+ goto err_free;
+ }
+ } while (1);
+
+ mark_allocated(block);
+ mm->avail -= drm_buddy_block_size(mm, block);
+ kmemleak_update_trace(block);
+ list_add_tail(&block->link, &allocated);
+
+ pages -= BIT(order);
+
+ if (!pages)
+ break;
+ } while (1);
+
+ list_splice_tail(&allocated, blocks);
+ return 0;
err_free:
drm_buddy_free_list(mm, &allocated);
return err;
}
-EXPORT_SYMBOL(drm_buddy_alloc_range);
+EXPORT_SYMBOL(drm_buddy_alloc_blocks);
/**
* drm_buddy_block_print - print block information
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 6ba314f9836a..1411f4cf1f21 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
struct i915_ttm_buddy_resource *bman_res;
struct drm_buddy *mm = &bman->mm;
- unsigned long n_pages;
- unsigned int min_order;
+ unsigned long n_pages, lpfn;
u64 min_page_size;
u64 size;
int err;
- GEM_BUG_ON(place->fpfn || place->lpfn);
+ lpfn = place->lpfn;
+ if (!lpfn)
+ lpfn = man->size;
bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
if (!bman_res)
@@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
INIT_LIST_HEAD(&bman_res->blocks);
bman_res->mm = mm;
+ if (place->fpfn || lpfn != man->size)
+ bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
GEM_BUG_ON(!bman_res->base.num_pages);
size = bman_res->base.num_pages << PAGE_SHIFT;
@@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
min_page_size = bo->page_alignment << PAGE_SHIFT;
GEM_BUG_ON(min_page_size < mm->chunk_size);
- min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
+
if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+ unsigned long pages;
+
size = roundup_pow_of_two(size);
- min_order = ilog2(size) - ilog2(mm->chunk_size);
+ min_page_size = size;
+
+ pages = size >> ilog2(mm->chunk_size);
+ if (pages > lpfn)
+ lpfn = pages;
}
if (size > mm->size) {
@@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
n_pages = size >> ilog2(mm->chunk_size);
- do {
- struct drm_buddy_block *block;
- unsigned int order;
-
- order = fls(n_pages) - 1;
- GEM_BUG_ON(order > mm->max_order);
- GEM_BUG_ON(order < min_order);
-
- do {
- mutex_lock(&bman->lock);
- block = drm_buddy_alloc_blocks(mm, order);
- mutex_unlock(&bman->lock);
- if (!IS_ERR(block))
- break;
-
- if (order-- == min_order) {
- err = -ENOSPC;
- goto err_free_blocks;
- }
- } while (1);
-
- n_pages -= BIT(order);
-
- list_add_tail(&block->link, &bman_res->blocks);
-
- if (!n_pages)
- break;
- } while (1);
+ mutex_lock(&bman->lock);
+ err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
+ (u64)lpfn << PAGE_SHIFT,
+ (u64)n_pages << PAGE_SHIFT,
+ min_page_size,
+ &bman_res->blocks,
+ bman_res->flags);
+ mutex_unlock(&bman->lock);
+ if (unlikely(err))
+ goto err_free_blocks;
*res = &bman_res->base;
return 0;
@@ -266,10 +258,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
{
struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
struct drm_buddy *mm = &bman->mm;
+ unsigned long flags = 0;
int ret;
+ flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
mutex_lock(&bman->lock);
- ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
+ ret = drm_buddy_alloc_blocks(mm, start,
+ start + size,
+ size, mm->chunk_size,
+ &bman->reserved,
+ flags);
mutex_unlock(&bman->lock);
return ret;
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
index 312077941411..72c90b432e87 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
@@ -20,6 +20,7 @@ struct drm_buddy;
*
* @base: struct ttm_resource base class we extend
* @blocks: the list of struct i915_buddy_block for this resource/allocation
+ * @flags: DRM_BUDDY_*_ALLOCATION flags
* @mm: the struct i915_buddy_mm for this resource
*
* Extends the struct ttm_resource to manage an address space allocation with
@@ -28,6 +29,7 @@ struct drm_buddy;
struct i915_ttm_buddy_resource {
struct ttm_resource base;
struct list_head blocks;
+ unsigned long flags;
struct drm_buddy *mm;
};
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index f524db152413..865664b90a8a 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -13,15 +13,22 @@
#include <drm/drm_print.h>
-#define range_overflows(start, size, max) ({ \
+#define check_range_overflow(start, end, size, max) ({ \
typeof(start) start__ = (start); \
+ typeof(end) end__ = (end);\
typeof(size) size__ = (size); \
typeof(max) max__ = (max); \
(void)(&start__ == &size__); \
(void)(&start__ == &max__); \
- start__ >= max__ || size__ > max__ - start__; \
+ (void)(&start__ == &end__); \
+ (void)(&end__ == &size__); \
+ (void)(&end__ == &max__); \
+ start__ >= max__ || end__ > max__ || \
+ size__ > end__ - start__; \
})
+#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
+
struct drm_buddy_block {
#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
#define DRM_BUDDY_HEADER_STATE GENMASK_ULL(11, 10)
@@ -131,12 +138,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
void drm_buddy_fini(struct drm_buddy *mm);
-struct drm_buddy_block *
-drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order);
-
-int drm_buddy_alloc_range(struct drm_buddy *mm,
- struct list_head *blocks,
- u64 start, u64 size);
+int drm_buddy_alloc_blocks(struct drm_buddy *mm,
+ u64 start, u64 end, u64 size,
+ u64 min_page_size,
+ struct list_head *blocks,
+ unsigned long flags);
void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [Intel-gfx] [PATCH 01/20] drm: improve drm_buddy_alloc function
2022-01-26 15:21 ` [PATCH 01/20] drm: improve drm_buddy_alloc function Matthew Auld
@ 2022-01-26 18:03 ` Jani Nikula
0 siblings, 0 replies; 50+ messages in thread
From: Jani Nikula @ 2022-01-26 18:03 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel, Arunpravin
On Wed, 26 Jan 2022, Matthew Auld <matthew.auld@intel.com> wrote:
> From: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>
> - Make drm_buddy_alloc a single function to handle
> range allocation and non-range allocation demands
>
> - Implemented a new function alloc_range() which allocates
> the requested power-of-two block comply with range limitations
>
> - Moved order computation and memory alignment logic from
> i915 driver to drm buddy
>
> v2:
> merged below changes to keep the build unbroken
> - drm_buddy_alloc_range() becomes obsolete and may be removed
> - enable ttm range allocation (fpfn / lpfn) support in i915 driver
> - apply enhanced drm_buddy_alloc() function to i915 driver
>
> v3(Matthew Auld):
> - Fix alignment issues and remove unnecessary list_empty check
> - add more validation checks for input arguments
> - make alloc_range() block allocations as bottom-up
> - optimize order computation logic
> - replace uint64_t with u64, which is preferred in the kernel
>
> v4(Matthew Auld):
> - keep drm_buddy_alloc_range() function implementation for generic
> actual range allocations
> - keep alloc_range() implementation for end bias allocations
>
> v5(Matthew Auld):
> - modify drm_buddy_alloc() passing argument place->lpfn to lpfn
> as place->lpfn will currently always be zero for i915
>
> v6(Matthew Auld):
> - fixup potential uaf - If we are unlucky and can't allocate
> enough memory when splitting blocks, where we temporarily
> end up with the given block and its buddy on the respective
> free list, then we need to ensure we delete both blocks,
> and no just the buddy, before potentially freeing them
>
> - fix warnings reported by kernel test robot <lkp@intel.com>
>
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
> drivers/gpu/drm/drm_buddy.c | 326 +++++++++++++-----
> drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 67 ++--
> drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 2 +
> include/drm/drm_buddy.h | 22 +-
> 4 files changed, 293 insertions(+), 124 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index d60878bc9c20..954e31962c74 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -282,23 +282,99 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects)
> }
> EXPORT_SYMBOL(drm_buddy_free_list);
>
> -/**
> - * drm_buddy_alloc_blocks - allocate power-of-two blocks
> - *
> - * @mm: DRM buddy manager to allocate from
> - * @order: size of the allocation
> - *
> - * The order value here translates to:
> - *
> - * 0 = 2^0 * mm->chunk_size
> - * 1 = 2^1 * mm->chunk_size
> - * 2 = 2^2 * mm->chunk_size
> - *
> - * Returns:
> - * allocated ptr to the &drm_buddy_block on success
> - */
> -struct drm_buddy_block *
> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
> +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> +{
> + return s1 <= e2 && e1 >= s2;
> +}
> +
> +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
> +{
> + return s1 <= s2 && e1 >= e2;
> +}
> +
> +static struct drm_buddy_block *
> +alloc_range_bias(struct drm_buddy *mm,
> + u64 start, u64 end,
> + unsigned int order)
> +{
> + struct drm_buddy_block *block;
> + struct drm_buddy_block *buddy;
> + LIST_HEAD(dfs);
> + int err;
> + int i;
> +
> + end = end - 1;
> +
> + for (i = 0; i < mm->n_roots; ++i)
> + list_add_tail(&mm->roots[i]->tmp_link, &dfs);
> +
> + do {
> + u64 block_start;
> + u64 block_end;
> +
> + block = list_first_entry_or_null(&dfs,
> + struct drm_buddy_block,
> + tmp_link);
> + if (!block)
> + break;
> +
> + list_del(&block->tmp_link);
> +
> + if (drm_buddy_block_order(block) < order)
> + continue;
> +
> + block_start = drm_buddy_block_offset(block);
> + block_end = block_start + drm_buddy_block_size(mm, block) - 1;
> +
> + if (!overlaps(start, end, block_start, block_end))
> + continue;
> +
> + if (drm_buddy_block_is_allocated(block))
> + continue;
> +
> + if (contains(start, end, block_start, block_end) &&
> + order == drm_buddy_block_order(block)) {
> + /*
> + * Find the free block within the range.
> + */
> + if (drm_buddy_block_is_free(block))
> + return block;
> +
> + continue;
> + }
> +
> + if (!drm_buddy_block_is_split(block)) {
> + err = split_block(mm, block);
> + if (unlikely(err))
> + goto err_undo;
> + }
> +
> + list_add(&block->right->tmp_link, &dfs);
> + list_add(&block->left->tmp_link, &dfs);
> + } while (1);
> +
> + return ERR_PTR(-ENOSPC);
> +
> +err_undo:
> + /*
> + * We really don't want to leave around a bunch of split blocks, since
> + * bigger is better, so make sure we merge everything back before we
> + * free the allocated blocks.
> + */
> + buddy = get_buddy(block);
> + if (buddy &&
> + (drm_buddy_block_is_free(block) &&
> + drm_buddy_block_is_free(buddy))) {
> + list_del(&block->link);
> + __drm_buddy_free(mm, block);
> + }
> + return ERR_PTR(err);
> +}
> +
> +static struct drm_buddy_block *
> +alloc_from_freelist(struct drm_buddy *mm,
> + unsigned int order,
> + unsigned long flags)
> {
> struct drm_buddy_block *block = NULL;
> unsigned int i;
> @@ -320,78 +396,30 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order)
> while (i != order) {
> err = split_block(mm, block);
> if (unlikely(err))
> - goto out_free;
> + goto err_undo;
>
> - /* Go low */
> - block = block->left;
> + block = block->right;
> i--;
> }
> -
> - mark_allocated(block);
> - mm->avail -= drm_buddy_block_size(mm, block);
> - kmemleak_update_trace(block);
> return block;
>
> -out_free:
> - if (i != order)
> +err_undo:
> + if (i != order) {
> + list_del(&block->link);
> __drm_buddy_free(mm, block);
> + }
> return ERR_PTR(err);
> }
> -EXPORT_SYMBOL(drm_buddy_alloc_blocks);
> -
> -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2)
> -{
> - return s1 <= e2 && e1 >= s2;
> -}
>
> -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2)
> -{
> - return s1 <= s2 && e1 >= e2;
> -}
> -
> -/**
> - * drm_buddy_alloc_range - allocate range
> - *
> - * @mm: DRM buddy manager to allocate from
> - * @blocks: output list head to add allocated blocks
> - * @start: start of the allowed range for this block
> - * @size: size of the allocation
> - *
> - * Intended for pre-allocating portions of the address space, for example to
> - * reserve a block for the initial framebuffer or similar, hence the expectation
> - * here is that drm_buddy_alloc_blocks() is still the main vehicle for
> - * allocations, so if that's not the case then the drm_mm range allocator is
> - * probably a much better fit, and so you should probably go use that instead.
> - *
> - * Note that it's safe to chain together multiple alloc_ranges
> - * with the same blocks list
> - *
> - * Returns:
> - * 0 on success, error code on failure.
> - */
> -int drm_buddy_alloc_range(struct drm_buddy *mm,
> - struct list_head *blocks,
> - u64 start, u64 size)
> +static int __alloc_range(struct drm_buddy *mm,
> + struct list_head *dfs,
> + u64 start, u64 size,
> + struct list_head *blocks)
> {
> struct drm_buddy_block *block;
> struct drm_buddy_block *buddy;
> - LIST_HEAD(allocated);
> - LIST_HEAD(dfs);
> u64 end;
> int err;
> - int i;
> -
> - if (size < mm->chunk_size)
> - return -EINVAL;
> -
> - if (!IS_ALIGNED(size | start, mm->chunk_size))
> - return -EINVAL;
> -
> - if (range_overflows(start, size, mm->size))
> - return -EINVAL;
> -
> - for (i = 0; i < mm->n_roots; ++i)
> - list_add_tail(&mm->roots[i]->tmp_link, &dfs);
>
> end = start + size - 1;
>
> @@ -399,7 +427,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
> u64 block_start;
> u64 block_end;
>
> - block = list_first_entry_or_null(&dfs,
> + block = list_first_entry_or_null(dfs,
> struct drm_buddy_block,
> tmp_link);
> if (!block)
> @@ -426,7 +454,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
>
> mark_allocated(block);
> mm->avail -= drm_buddy_block_size(mm, block);
> - list_add_tail(&block->link, &allocated);
> + list_add_tail(&block->link, blocks);
> continue;
> }
>
> @@ -436,11 +464,10 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
> goto err_undo;
> }
>
> - list_add(&block->right->tmp_link, &dfs);
> - list_add(&block->left->tmp_link, &dfs);
> + list_add(&block->right->tmp_link, dfs);
> + list_add(&block->left->tmp_link, dfs);
> } while (1);
>
> - list_splice_tail(&allocated, blocks);
> return 0;
>
> err_undo:
> @@ -452,14 +479,149 @@ int drm_buddy_alloc_range(struct drm_buddy *mm,
> buddy = get_buddy(block);
> if (buddy &&
> (drm_buddy_block_is_free(block) &&
> - drm_buddy_block_is_free(buddy)))
> + drm_buddy_block_is_free(buddy))) {
> + list_del(&block->link);
> __drm_buddy_free(mm, block);
> + }
> +
> +err_free:
> + drm_buddy_free_list(mm, blocks);
> + return err;
> +}
> +
> +/**
> + * __drm_buddy_alloc_range - actual range allocation
> + *
> + * @mm: DRM buddy manager to allocate from
> + * @start: start of the allowed range for this block
> + * @size: size of the allocation
> + * @blocks: output list head to add allocated blocks
> + *
> + * Intended for pre-allocating portions of the address space, for example to
> + * reserve a block for the initial framebuffer or similar
> + *
> + * Note that it's safe to chain together multiple alloc_ranges
> + * with the same blocks list
> + *
> + * Returns:
> + * 0 on success, error code on failure.
> + */
> +static int __drm_buddy_alloc_range(struct drm_buddy *mm,
> + u64 start,
> + u64 size,
> + struct list_head *blocks)
> +{
> + LIST_HEAD(dfs);
> + int i;
> +
> + for (i = 0; i < mm->n_roots; ++i)
> + list_add_tail(&mm->roots[i]->tmp_link, &dfs);
> +
> + return __alloc_range(mm, &dfs, start, size, blocks);
> +}
> +
> +/**
> + * drm_buddy_alloc_blocks - allocate power-of-two blocks
> + *
> + * @mm: DRM buddy manager to allocate from
> + * @start: start of the allowed range for this block
> + * @end: end of the allowed range for this block
> + * @size: size of the allocation
> + * @min_page_size: alignment of the allocation
> + * @blocks: output list head to add allocated blocks
> + * @flags: DRM_BUDDY_*_ALLOCATION flags
> + *
> + * alloc_range_bias() called on range limitations, which traverses
> + * the tree and returns the desired block.
> + *
> + * alloc_from_freelist() called when *no* range restrictions
> + * are enforced, which picks the block from the freelist.
> + *
> + * blocks are allocated in order, the order value here translates to:
> + *
> + * 0 = 2^0 * mm->chunk_size
> + * 1 = 2^1 * mm->chunk_size
> + * 2 = 2^2 * mm->chunk_size
> + *
> + * Returns:
> + * 0 on success, error code on failure.
> + */
> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> + u64 start, u64 end, u64 size,
> + u64 min_page_size,
> + struct list_head *blocks,
> + unsigned long flags)
> +{
> + struct drm_buddy_block *block = NULL;
> + unsigned int min_order, order;
> + unsigned long pages;
> + LIST_HEAD(allocated);
> + int err;
> +
> + if (size < mm->chunk_size)
> + return -EINVAL;
> +
> + if (min_page_size < mm->chunk_size)
> + return -EINVAL;
> +
> + if (!is_power_of_2(min_page_size))
> + return -EINVAL;
> +
> + if (!IS_ALIGNED(start | end | size, mm->chunk_size))
> + return -EINVAL;
> +
> + if (check_range_overflow(start, end, size, mm->size))
> + return -EINVAL;
> +
> + /* Actual range allocation */
> + if (start + size == end)
> + return __drm_buddy_alloc_range(mm, start, size, blocks);
> +
> + pages = size >> ilog2(mm->chunk_size);
> + order = fls(pages) - 1;
> + min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
> +
> + do {
> + order = min(order, (unsigned int)fls(pages) - 1);
> + BUG_ON(order > mm->max_order);
> + BUG_ON(order < min_order);
> +
> + do {
> + if (flags & DRM_BUDDY_RANGE_ALLOCATION)
> + /* Allocate traversing within the range */
> + block = alloc_range_bias(mm, start, end, order);
> + else
> + /* Allocate from freelist */
> + block = alloc_from_freelist(mm, order, flags);
> +
> + if (!IS_ERR(block))
> + break;
> +
> + if (order-- == min_order) {
> + err = -ENOSPC;
> + goto err_free;
> + }
> + } while (1);
> +
> + mark_allocated(block);
> + mm->avail -= drm_buddy_block_size(mm, block);
> + kmemleak_update_trace(block);
> + list_add_tail(&block->link, &allocated);
> +
> + pages -= BIT(order);
> +
> + if (!pages)
> + break;
> + } while (1);
> +
> + list_splice_tail(&allocated, blocks);
> + return 0;
>
> err_free:
> drm_buddy_free_list(mm, &allocated);
> return err;
> }
> -EXPORT_SYMBOL(drm_buddy_alloc_range);
> +EXPORT_SYMBOL(drm_buddy_alloc_blocks);
>
> /**
> * drm_buddy_block_print - print block information
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index 6ba314f9836a..1411f4cf1f21 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
> struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
> struct i915_ttm_buddy_resource *bman_res;
> struct drm_buddy *mm = &bman->mm;
> - unsigned long n_pages;
> - unsigned int min_order;
> + unsigned long n_pages, lpfn;
> u64 min_page_size;
> u64 size;
> int err;
>
> - GEM_BUG_ON(place->fpfn || place->lpfn);
> + lpfn = place->lpfn;
> + if (!lpfn)
> + lpfn = man->size;
>
> bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL);
> if (!bman_res)
> @@ -52,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
> INIT_LIST_HEAD(&bman_res->blocks);
> bman_res->mm = mm;
>
> + if (place->fpfn || lpfn != man->size)
> + bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
> GEM_BUG_ON(!bman_res->base.num_pages);
> size = bman_res->base.num_pages << PAGE_SHIFT;
>
> @@ -60,10 +64,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
> min_page_size = bo->page_alignment << PAGE_SHIFT;
>
> GEM_BUG_ON(min_page_size < mm->chunk_size);
> - min_order = ilog2(min_page_size) - ilog2(mm->chunk_size);
> +
> if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
> + unsigned long pages;
> +
> size = roundup_pow_of_two(size);
> - min_order = ilog2(size) - ilog2(mm->chunk_size);
> + min_page_size = size;
> +
> + pages = size >> ilog2(mm->chunk_size);
> + if (pages > lpfn)
> + lpfn = pages;
> }
>
> if (size > mm->size) {
> @@ -73,34 +83,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
>
> n_pages = size >> ilog2(mm->chunk_size);
>
> - do {
> - struct drm_buddy_block *block;
> - unsigned int order;
> -
> - order = fls(n_pages) - 1;
> - GEM_BUG_ON(order > mm->max_order);
> - GEM_BUG_ON(order < min_order);
> -
> - do {
> - mutex_lock(&bman->lock);
> - block = drm_buddy_alloc_blocks(mm, order);
> - mutex_unlock(&bman->lock);
> - if (!IS_ERR(block))
> - break;
> -
> - if (order-- == min_order) {
> - err = -ENOSPC;
> - goto err_free_blocks;
> - }
> - } while (1);
> -
> - n_pages -= BIT(order);
> -
> - list_add_tail(&block->link, &bman_res->blocks);
> -
> - if (!n_pages)
> - break;
> - } while (1);
> + mutex_lock(&bman->lock);
> + err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
> + (u64)lpfn << PAGE_SHIFT,
> + (u64)n_pages << PAGE_SHIFT,
> + min_page_size,
> + &bman_res->blocks,
> + bman_res->flags);
> + mutex_unlock(&bman->lock);
> + if (unlikely(err))
> + goto err_free_blocks;
>
> *res = &bman_res->base;
> return 0;
> @@ -266,10 +258,17 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
> {
> struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
> struct drm_buddy *mm = &bman->mm;
> + unsigned long flags = 0;
> int ret;
>
> + flags |= DRM_BUDDY_RANGE_ALLOCATION;
> +
> mutex_lock(&bman->lock);
> - ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size);
> + ret = drm_buddy_alloc_blocks(mm, start,
> + start + size,
> + size, mm->chunk_size,
> + &bman->reserved,
> + flags);
> mutex_unlock(&bman->lock);
>
> return ret;
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> index 312077941411..72c90b432e87 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
> @@ -20,6 +20,7 @@ struct drm_buddy;
> *
> * @base: struct ttm_resource base class we extend
> * @blocks: the list of struct i915_buddy_block for this resource/allocation
> + * @flags: DRM_BUDDY_*_ALLOCATION flags
> * @mm: the struct i915_buddy_mm for this resource
> *
> * Extends the struct ttm_resource to manage an address space allocation with
> @@ -28,6 +29,7 @@ struct drm_buddy;
> struct i915_ttm_buddy_resource {
> struct ttm_resource base;
> struct list_head blocks;
> + unsigned long flags;
> struct drm_buddy *mm;
> };
>
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index f524db152413..865664b90a8a 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -13,15 +13,22 @@
>
> #include <drm/drm_print.h>
>
> -#define range_overflows(start, size, max) ({ \
> +#define check_range_overflow(start, end, size, max) ({ \
Random drive-by comment, why the rename? The old one is understandable,
it's a bool, while "check", to me, implies it might *do* something in
case of overflow.
> typeof(start) start__ = (start); \
> + typeof(end) end__ = (end);\
> typeof(size) size__ = (size); \
> typeof(max) max__ = (max); \
> (void)(&start__ == &size__); \
> (void)(&start__ == &max__); \
> - start__ >= max__ || size__ > max__ - start__; \
> + (void)(&start__ == &end__); \
> + (void)(&end__ == &size__); \
> + (void)(&end__ == &max__); \
> + start__ >= max__ || end__ > max__ || \
> + size__ > end__ - start__; \
> })
>
> +#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
> +
> struct drm_buddy_block {
> #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
> #define DRM_BUDDY_HEADER_STATE GENMASK_ULL(11, 10)
> @@ -131,12 +138,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size);
>
> void drm_buddy_fini(struct drm_buddy *mm);
>
> -struct drm_buddy_block *
> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order);
> -
> -int drm_buddy_alloc_range(struct drm_buddy *mm,
> - struct list_head *blocks,
> - u64 start, u64 size);
> +int drm_buddy_alloc_blocks(struct drm_buddy *mm,
> + u64 start, u64 end, u64 size,
> + u64 min_page_size,
> + struct list_head *blocks,
> + unsigned long flags);
>
> void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
--
Jani Nikula, Intel Open Source Graphics Center
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 02/20] drm: implement top-down allocation method
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
2022-01-26 15:21 ` [PATCH 01/20] drm: improve drm_buddy_alloc function Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-01-26 18:42 ` [Intel-gfx] " Robert Beckett
2022-01-26 15:21 ` [PATCH 03/20] drm: implement a method to free unused pages Matthew Auld
` (17 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: dri-devel, Arunpravin
From: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
Implemented a function which walk through the order list,
compares the offset and returns the maximum offset block,
this method is unpredictable in obtaining the high range
address blocks which depends on allocation and deallocation.
for instance, if driver requests address at a low specific
range, allocator traverses from the root block and splits
the larger blocks until it reaches the specific block and
in the process of splitting, lower orders in the freelist
are occupied with low range address blocks and for the
subsequent TOPDOWN memory request we may return the low
range blocks.To overcome this issue, we may go with the
below approach.
The other approach, sorting each order list entries in
ascending order and compares the last entry of each
order list in the freelist and return the max block.
This creates sorting overhead on every drm_buddy_free()
request and split up of larger blocks for a single page
request.
v2:
- Fix alignment issues(Matthew Auld)
- Remove unnecessary list_empty check(Matthew Auld)
- merged the below patch to see the feature in action
- add top-down alloc support to i915 driver
Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
drivers/gpu/drm/drm_buddy.c | 36 ++++++++++++++++---
drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 3 ++
include/drm/drm_buddy.h | 1 +
3 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 954e31962c74..6aa5c1ce25bf 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -371,6 +371,26 @@ alloc_range_bias(struct drm_buddy *mm,
return ERR_PTR(err);
}
+static struct drm_buddy_block *
+get_maxblock(struct list_head *head)
+{
+ struct drm_buddy_block *max_block = NULL, *node;
+
+ max_block = list_first_entry_or_null(head,
+ struct drm_buddy_block,
+ link);
+ if (!max_block)
+ return NULL;
+
+ list_for_each_entry(node, head, link) {
+ if (drm_buddy_block_offset(node) >
+ drm_buddy_block_offset(max_block))
+ max_block = node;
+ }
+
+ return max_block;
+}
+
static struct drm_buddy_block *
alloc_from_freelist(struct drm_buddy *mm,
unsigned int order,
@@ -381,11 +401,17 @@ alloc_from_freelist(struct drm_buddy *mm,
int err;
for (i = order; i <= mm->max_order; ++i) {
- block = list_first_entry_or_null(&mm->free_list[i],
- struct drm_buddy_block,
- link);
- if (block)
- break;
+ if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
+ block = get_maxblock(&mm->free_list[i]);
+ if (block)
+ break;
+ } else {
+ block = list_first_entry_or_null(&mm->free_list[i],
+ struct drm_buddy_block,
+ link);
+ if (block)
+ break;
+ }
}
if (!block)
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 1411f4cf1f21..3662434b64bb 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -53,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
INIT_LIST_HEAD(&bman_res->blocks);
bman_res->mm = mm;
+ if (place->flags & TTM_PL_FLAG_TOPDOWN)
+ bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
if (place->fpfn || lpfn != man->size)
bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 865664b90a8a..424fc443115e 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -28,6 +28,7 @@
})
#define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
+#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
struct drm_buddy_block {
#define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [Intel-gfx] [PATCH 02/20] drm: implement top-down allocation method
2022-01-26 15:21 ` [PATCH 02/20] drm: implement top-down allocation method Matthew Auld
@ 2022-01-26 18:42 ` Robert Beckett
0 siblings, 0 replies; 50+ messages in thread
From: Robert Beckett @ 2022-01-26 18:42 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel, Arunpravin
On 26/01/2022 15:21, Matthew Auld wrote:
> From: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
>
> Implemented a function which walk through the order list,
> compares the offset and returns the maximum offset block,
> this method is unpredictable in obtaining the high range
> address blocks which depends on allocation and deallocation.
> for instance, if driver requests address at a low specific
> range, allocator traverses from the root block and splits
> the larger blocks until it reaches the specific block and
> in the process of splitting, lower orders in the freelist
> are occupied with low range address blocks and for the
> subsequent TOPDOWN memory request we may return the low
> range blocks.To overcome this issue, we may go with the
> below approach.
>
> The other approach, sorting each order list entries in
> ascending order and compares the last entry of each
> order list in the freelist and return the max block.
> This creates sorting overhead on every drm_buddy_free()
> request and split up of larger blocks for a single page
> request.
ooc, why did you choose to implement this as an alloc flag?
Seems to me like it would be a good candidate for a new memory region.
That way allocation algorithms wouldn't need exta logic and ttm can
already handle migrations.
>
> v2:
> - Fix alignment issues(Matthew Auld)
> - Remove unnecessary list_empty check(Matthew Auld)
> - merged the below patch to see the feature in action
> - add top-down alloc support to i915 driver
>
> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
> ---
> drivers/gpu/drm/drm_buddy.c | 36 ++++++++++++++++---
> drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 3 ++
> include/drm/drm_buddy.h | 1 +
> 3 files changed, 35 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
> index 954e31962c74..6aa5c1ce25bf 100644
> --- a/drivers/gpu/drm/drm_buddy.c
> +++ b/drivers/gpu/drm/drm_buddy.c
> @@ -371,6 +371,26 @@ alloc_range_bias(struct drm_buddy *mm,
> return ERR_PTR(err);
> }
>
> +static struct drm_buddy_block *
> +get_maxblock(struct list_head *head)
> +{
> + struct drm_buddy_block *max_block = NULL, *node;
> +
> + max_block = list_first_entry_or_null(head,
> + struct drm_buddy_block,
> + link);
> + if (!max_block)
> + return NULL;
> +
> + list_for_each_entry(node, head, link) {
> + if (drm_buddy_block_offset(node) >
> + drm_buddy_block_offset(max_block))
> + max_block = node;
> + }
> +
> + return max_block;
> +}
> +
> static struct drm_buddy_block *
> alloc_from_freelist(struct drm_buddy *mm,
> unsigned int order,
> @@ -381,11 +401,17 @@ alloc_from_freelist(struct drm_buddy *mm,
> int err;
>
> for (i = order; i <= mm->max_order; ++i) {
> - block = list_first_entry_or_null(&mm->free_list[i],
> - struct drm_buddy_block,
> - link);
> - if (block)
> - break;
> + if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) {
> + block = get_maxblock(&mm->free_list[i]);
> + if (block)
> + break;
> + } else {
> + block = list_first_entry_or_null(&mm->free_list[i],
> + struct drm_buddy_block,
> + link);
> + if (block)
> + break;
> + }
> }
>
> if (!block)
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index 1411f4cf1f21..3662434b64bb 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -53,6 +53,9 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
> INIT_LIST_HEAD(&bman_res->blocks);
> bman_res->mm = mm;
>
> + if (place->flags & TTM_PL_FLAG_TOPDOWN)
> + bman_res->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
> +
> if (place->fpfn || lpfn != man->size)
> bman_res->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>
> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
> index 865664b90a8a..424fc443115e 100644
> --- a/include/drm/drm_buddy.h
> +++ b/include/drm/drm_buddy.h
> @@ -28,6 +28,7 @@
> })
>
> #define DRM_BUDDY_RANGE_ALLOCATION (1 << 0)
> +#define DRM_BUDDY_TOPDOWN_ALLOCATION (1 << 1)
>
> struct drm_buddy_block {
> #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12)
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 03/20] drm: implement a method to free unused pages
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
2022-01-26 15:21 ` [PATCH 01/20] drm: improve drm_buddy_alloc function Matthew Auld
2022-01-26 15:21 ` [PATCH 02/20] drm: implement top-down allocation method Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-01-26 15:21 ` [PATCH 04/20] drm/i915: add io_size plumbing Matthew Auld
` (16 subsequent siblings)
19 siblings, 0 replies; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: dri-devel, Arunpravin
From: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
On contiguous allocation, we round up the size
to the *next* power of 2, implement a function
to free the unused pages after the newly allocate block.
v2(Matthew Auld):
- replace function name 'drm_buddy_free_unused_pages' with
drm_buddy_block_trim
- replace input argument name 'actual_size' with 'new_size'
- add more validation checks for input arguments
- add overlaps check to avoid needless searching and splitting
- merged the below patch to see the feature in action
- add free unused pages support to i915 driver
- lock drm_buddy_block_trim() function as it calls mark_free/mark_split
are all globally visible
v3(Matthew Auld):
- remove trim method error handling as we address the failure case
at drm_buddy_block_trim() function
v4:
- in case of trim, at __alloc_range() split_block failure path
marks the block as free and removes it from the original list,
potentially also freeing it, to overcome this problem, we turn
the drm_buddy_block_trim() input node into a temporary node to
prevent recursively freeing itself, but still retain the
un-splitting/freeing of the other nodes(Matthew Auld)
- modify the drm_buddy_block_trim() function return type
v5(Matthew Auld):
- revert drm_buddy_block_trim() function return type changes in v4
- modify drm_buddy_block_trim() passing argument n_pages to original_size
as n_pages has already been rounded up to the next power-of-two and
passing n_pages results noop
v6:
- fix warnings reported by kernel test robot <lkp@intel.com>
Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@amd.com>
---
drivers/gpu/drm/drm_buddy.c | 65 +++++++++++++++++++
drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 10 +++
include/drm/drm_buddy.h | 4 ++
3 files changed, 79 insertions(+)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 6aa5c1ce25bf..c5902a81b8c5 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -546,6 +546,71 @@ static int __drm_buddy_alloc_range(struct drm_buddy *mm,
return __alloc_range(mm, &dfs, start, size, blocks);
}
+/**
+ * drm_buddy_block_trim - free unused pages
+ *
+ * @mm: DRM buddy manager
+ * @new_size: original size requested
+ * @blocks: output list head to add allocated blocks
+ *
+ * For contiguous allocation, we round up the size to the nearest
+ * power of two value, drivers consume *actual* size, so remaining
+ * portions are unused and it can be freed.
+ *
+ * Returns:
+ * 0 on success, error code on failure.
+ */
+int drm_buddy_block_trim(struct drm_buddy *mm,
+ u64 new_size,
+ struct list_head *blocks)
+{
+ struct drm_buddy_block *parent;
+ struct drm_buddy_block *block;
+ LIST_HEAD(dfs);
+ u64 new_start;
+ int err;
+
+ if (!list_is_singular(blocks))
+ return -EINVAL;
+
+ block = list_first_entry(blocks,
+ struct drm_buddy_block,
+ link);
+
+ if (!drm_buddy_block_is_allocated(block))
+ return -EINVAL;
+
+ if (new_size > drm_buddy_block_size(mm, block))
+ return -EINVAL;
+
+ if (!new_size && !IS_ALIGNED(new_size, mm->chunk_size))
+ return -EINVAL;
+
+ if (new_size == drm_buddy_block_size(mm, block))
+ return 0;
+
+ list_del(&block->link);
+ mark_free(mm, block);
+ mm->avail += drm_buddy_block_size(mm, block);
+
+ /* Prevent recursively freeing this node */
+ parent = block->parent;
+ block->parent = NULL;
+
+ new_start = drm_buddy_block_offset(block);
+ list_add(&block->tmp_link, &dfs);
+ err = __alloc_range(mm, &dfs, new_start, new_size, blocks);
+ if (err) {
+ mark_allocated(block);
+ mm->avail -= drm_buddy_block_size(mm, block);
+ list_add(&block->link, blocks);
+ }
+
+ block->parent = parent;
+ return err;
+}
+EXPORT_SYMBOL(drm_buddy_block_trim);
+
/**
* drm_buddy_alloc_blocks - allocate power-of-two blocks
*
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 3662434b64bb..53eb100688a6 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -97,6 +97,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
if (unlikely(err))
goto err_free_blocks;
+ if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+ u64 original_size = (u64)bman_res->base.num_pages << PAGE_SHIFT;
+
+ mutex_lock(&bman->lock);
+ drm_buddy_block_trim(mm,
+ original_size,
+ &bman_res->blocks);
+ mutex_unlock(&bman->lock);
+ }
+
*res = &bman_res->base;
return 0;
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 424fc443115e..17ca928fce8e 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -145,6 +145,10 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
struct list_head *blocks,
unsigned long flags);
+int drm_buddy_block_trim(struct drm_buddy *mm,
+ u64 new_size,
+ struct list_head *blocks);
+
void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block);
void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects);
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* [PATCH 04/20] drm/i915: add io_size plumbing
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (2 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 03/20] drm: implement a method to free unused pages Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-01-31 15:14 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 05/20] drm/i915/ttm: require mappable by default Matthew Auld
` (15 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
With small LMEM-BAR we need to be able to differentiate between the
total size of LMEM, and how much of it is CPU mappable. The end goal is
to be able to utilize the entire range, even if part of is it not CPU
accessible.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +-
drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 8 +++++---
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +-
drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +-
drivers/gpu/drm/i915/gt/intel_region_lmem.c | 6 +++++-
drivers/gpu/drm/i915/intel_memory_region.c | 6 +++++-
drivers/gpu/drm/i915/intel_memory_region.h | 2 ++
drivers/gpu/drm/i915/selftests/intel_memory_region.c | 8 ++++----
drivers/gpu/drm/i915/selftests/mock_region.c | 6 ++++--
drivers/gpu/drm/i915/selftests/mock_region.h | 3 ++-
10 files changed, 30 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 6c57b0a79c8a..a9aca11cedbb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -696,7 +696,7 @@ struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915,
{
return intel_memory_region_create(i915, 0,
totalram_pages() << PAGE_SHIFT,
- PAGE_SIZE, 0,
+ PAGE_SIZE, 0, 0,
type, instance,
&shmem_region_ops);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 26975d857776..387b48686851 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -490,6 +490,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
/* Exclude the reserved region from driver use */
mem->region.end = reserved_base - 1;
+ mem->io_size = resource_size(&mem->region);
/* It is possible for the reserved area to end before the end of stolen
* memory, so just consider the start. */
@@ -746,7 +747,7 @@ static int init_stolen_lmem(struct intel_memory_region *mem)
if (!io_mapping_init_wc(&mem->iomap,
mem->io_start,
- resource_size(&mem->region)))
+ mem->io_size))
return -EIO;
/*
@@ -801,7 +802,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
I915_GTT_PAGE_SIZE_4K;
mem = intel_memory_region_create(i915, lmem_base, lmem_size,
- min_page_size, io_start,
+ min_page_size,
+ io_start, lmem_size,
type, instance,
&i915_region_stolen_lmem_ops);
if (IS_ERR(mem))
@@ -832,7 +834,7 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
mem = intel_memory_region_create(i915,
intel_graphics_stolen_res.start,
resource_size(&intel_graphics_stolen_res),
- PAGE_SIZE, 0, type, instance,
+ PAGE_SIZE, 0, 0, type, instance,
&i915_region_stolen_smem_ops);
if (IS_ERR(mem))
return mem;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 84cae740b4a5..e1140ca3d9a0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1103,7 +1103,7 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915,
mr = intel_memory_region_create(i915, 0,
totalram_pages() << PAGE_SHIFT,
- PAGE_SIZE, 0,
+ PAGE_SIZE, 0, 0,
type, instance,
&ttm_system_region_ops);
if (IS_ERR(mr))
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index f36191ebf964..42db9cd30978 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -499,7 +499,7 @@ static int igt_mock_memory_region_huge_pages(void *arg)
int bit;
int err = 0;
- mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
+ mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
if (IS_ERR(mem)) {
pr_err("%s failed to create memory region\n", __func__);
return PTR_ERR(mem);
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index 21215a080088..2c7ec7ff79fd 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -90,7 +90,7 @@ region_lmem_init(struct intel_memory_region *mem)
if (!io_mapping_init_wc(&mem->iomap,
mem->io_start,
- resource_size(&mem->region))) {
+ mem->io_size)) {
ret = -EIO;
goto out_no_io;
}
@@ -143,6 +143,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt)
mappable_end,
PAGE_SIZE,
io_start,
+ mappable_end,
INTEL_MEMORY_LOCAL,
0,
&intel_region_lmem_ops);
@@ -219,6 +220,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
lmem_size,
min_page_size,
io_start,
+ lmem_size,
INTEL_MEMORY_LOCAL,
0,
&intel_region_lmem_ops);
@@ -232,6 +234,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
drm_dbg(&i915->drm, "Local memory: %pR\n", &mem->region);
drm_dbg(&i915->drm, "Local memory IO start: %pa\n",
&mem->io_start);
+ drm_info(&i915->drm, "Local memory IO size: %pa\n",
+ &mem->io_size);
drm_info(&i915->drm, "Local memory available: %pa\n",
&lmem_size);
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
index c70d7e286a51..595e2489c23e 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -97,7 +97,7 @@ static int iomemtest(struct intel_memory_region *mem,
bool test_all,
const void *caller)
{
- resource_size_t last = resource_size(&mem->region) - PAGE_SIZE;
+ resource_size_t last = mem->io_size - PAGE_SIZE;
resource_size_t page;
int err;
@@ -205,6 +205,8 @@ static int intel_memory_region_memtest(struct intel_memory_region *mem,
if (!mem->io_start)
return 0;
+ WARN_ON_ONCE(!mem->io_size);
+
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) || i915->params.memtest)
err = iomemtest(mem, i915->params.memtest, caller);
@@ -217,6 +219,7 @@ intel_memory_region_create(struct drm_i915_private *i915,
resource_size_t size,
resource_size_t min_page_size,
resource_size_t io_start,
+ resource_size_t io_size,
u16 type,
u16 instance,
const struct intel_memory_region_ops *ops)
@@ -231,6 +234,7 @@ intel_memory_region_create(struct drm_i915_private *i915,
mem->i915 = i915;
mem->region = (struct resource)DEFINE_RES_MEM(start, size);
mem->io_start = io_start;
+ mem->io_size = io_size;
mem->min_page_size = min_page_size;
mem->ops = ops;
mem->total = size;
diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h
index 5625c9c38993..459051ce0c91 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.h
+++ b/drivers/gpu/drm/i915/intel_memory_region.h
@@ -71,6 +71,7 @@ struct intel_memory_region {
struct drm_mm_node fake_mappable;
resource_size_t io_start;
+ resource_size_t io_size;
resource_size_t min_page_size;
resource_size_t total;
resource_size_t avail;
@@ -103,6 +104,7 @@ intel_memory_region_create(struct drm_i915_private *i915,
resource_size_t size,
resource_size_t min_page_size,
resource_size_t io_start,
+ resource_size_t io_size,
u16 type,
u16 instance,
const struct intel_memory_region_ops *ops);
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 7acba1d2135e..247f65f02bbf 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -170,7 +170,7 @@ static int igt_mock_reserve(void *arg)
if (!order)
return 0;
- mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
+ mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
if (IS_ERR(mem)) {
pr_err("failed to create memory region\n");
err = PTR_ERR(mem);
@@ -383,7 +383,7 @@ static int igt_mock_splintered_region(void *arg)
*/
size = (SZ_4G - 1) & PAGE_MASK;
- mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0);
+ mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
if (IS_ERR(mem))
return PTR_ERR(mem);
@@ -471,7 +471,7 @@ static int igt_mock_max_segment(void *arg)
*/
size = SZ_8G;
- mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0);
+ mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
if (IS_ERR(mem))
return PTR_ERR(mem);
@@ -1188,7 +1188,7 @@ int intel_memory_region_mock_selftests(void)
if (!i915)
return -ENOMEM;
- mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
+ mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
if (IS_ERR(mem)) {
pr_err("failed to create memory region\n");
err = PTR_ERR(mem);
diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c
index 19bff8afcaaa..467eeae6d5f0 100644
--- a/drivers/gpu/drm/i915/selftests/mock_region.c
+++ b/drivers/gpu/drm/i915/selftests/mock_region.c
@@ -107,7 +107,8 @@ mock_region_create(struct drm_i915_private *i915,
resource_size_t start,
resource_size_t size,
resource_size_t min_page_size,
- resource_size_t io_start)
+ resource_size_t io_start,
+ resource_size_t io_size)
{
int instance = ida_alloc_max(&i915->selftest.mock_region_instances,
TTM_NUM_MEM_TYPES - TTM_PL_PRIV - 1,
@@ -117,6 +118,7 @@ mock_region_create(struct drm_i915_private *i915,
return ERR_PTR(instance);
return intel_memory_region_create(i915, start, size, min_page_size,
- io_start, INTEL_MEMORY_MOCK, instance,
+ io_start, io_size,
+ INTEL_MEMORY_MOCK, instance,
&mock_region_ops);
}
diff --git a/drivers/gpu/drm/i915/selftests/mock_region.h b/drivers/gpu/drm/i915/selftests/mock_region.h
index 329bf74dfaca..e36c3a433551 100644
--- a/drivers/gpu/drm/i915/selftests/mock_region.h
+++ b/drivers/gpu/drm/i915/selftests/mock_region.h
@@ -16,6 +16,7 @@ mock_region_create(struct drm_i915_private *i915,
resource_size_t start,
resource_size_t size,
resource_size_t min_page_size,
- resource_size_t io_start);
+ resource_size_t io_start,
+ resource_size_t io_size);
#endif /* !__MOCK_REGION_H */
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 04/20] drm/i915: add io_size plumbing
2022-01-26 15:21 ` [PATCH 04/20] drm/i915: add io_size plumbing Matthew Auld
@ 2022-01-31 15:14 ` Thomas Hellström
0 siblings, 0 replies; 50+ messages in thread
From: Thomas Hellström @ 2022-01-31 15:14 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> With small LMEM-BAR we need to be able to differentiate between the
> total size of LMEM, and how much of it is CPU mappable. The end goal is
> to be able to utilize the entire range, even if part of is it not CPU
> accessible.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +-
> drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 8 +++++---
> drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +-
> drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +-
> drivers/gpu/drm/i915/gt/intel_region_lmem.c | 6 +++++-
> drivers/gpu/drm/i915/intel_memory_region.c | 6 +++++-
> drivers/gpu/drm/i915/intel_memory_region.h | 2 ++
> drivers/gpu/drm/i915/selftests/intel_memory_region.c | 8 ++++----
> drivers/gpu/drm/i915/selftests/mock_region.c | 6 ++++--
> drivers/gpu/drm/i915/selftests/mock_region.h | 3 ++-
> 10 files changed, 30 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> index 6c57b0a79c8a..a9aca11cedbb 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> @@ -696,7 +696,7 @@ struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915,
> {
> return intel_memory_region_create(i915, 0,
> totalram_pages() << PAGE_SHIFT,
> - PAGE_SIZE, 0,
> + PAGE_SIZE, 0, 0,
> type, instance,
> &shmem_region_ops);
> }
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index 26975d857776..387b48686851 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -490,6 +490,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
>
> /* Exclude the reserved region from driver use */
> mem->region.end = reserved_base - 1;
> + mem->io_size = resource_size(&mem->region);
>
> /* It is possible for the reserved area to end before the end of stolen
> * memory, so just consider the start. */
> @@ -746,7 +747,7 @@ static int init_stolen_lmem(struct intel_memory_region *mem)
>
> if (!io_mapping_init_wc(&mem->iomap,
> mem->io_start,
> - resource_size(&mem->region)))
> + mem->io_size))
> return -EIO;
>
> /*
> @@ -801,7 +802,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
> I915_GTT_PAGE_SIZE_4K;
>
> mem = intel_memory_region_create(i915, lmem_base, lmem_size,
> - min_page_size, io_start,
> + min_page_size,
> + io_start, lmem_size,
> type, instance,
> &i915_region_stolen_lmem_ops);
> if (IS_ERR(mem))
> @@ -832,7 +834,7 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
> mem = intel_memory_region_create(i915,
> intel_graphics_stolen_res.start,
> resource_size(&intel_graphics_stolen_res),
> - PAGE_SIZE, 0, type, instance,
> + PAGE_SIZE, 0, 0, type, instance,
> &i915_region_stolen_smem_ops);
> if (IS_ERR(mem))
> return mem;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index 84cae740b4a5..e1140ca3d9a0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -1103,7 +1103,7 @@ i915_gem_ttm_system_setup(struct drm_i915_private *i915,
>
> mr = intel_memory_region_create(i915, 0,
> totalram_pages() << PAGE_SHIFT,
> - PAGE_SIZE, 0,
> + PAGE_SIZE, 0, 0,
> type, instance,
> &ttm_system_region_ops);
> if (IS_ERR(mr))
> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> index f36191ebf964..42db9cd30978 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> @@ -499,7 +499,7 @@ static int igt_mock_memory_region_huge_pages(void *arg)
> int bit;
> int err = 0;
>
> - mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
> + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
> if (IS_ERR(mem)) {
> pr_err("%s failed to create memory region\n", __func__);
> return PTR_ERR(mem);
> diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
> index 21215a080088..2c7ec7ff79fd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
> +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
> @@ -90,7 +90,7 @@ region_lmem_init(struct intel_memory_region *mem)
>
> if (!io_mapping_init_wc(&mem->iomap,
> mem->io_start,
> - resource_size(&mem->region))) {
> + mem->io_size)) {
> ret = -EIO;
> goto out_no_io;
> }
> @@ -143,6 +143,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt)
> mappable_end,
> PAGE_SIZE,
> io_start,
> + mappable_end,
> INTEL_MEMORY_LOCAL,
> 0,
> &intel_region_lmem_ops);
> @@ -219,6 +220,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
> lmem_size,
> min_page_size,
> io_start,
> + lmem_size,
> INTEL_MEMORY_LOCAL,
> 0,
> &intel_region_lmem_ops);
> @@ -232,6 +234,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
> drm_dbg(&i915->drm, "Local memory: %pR\n", &mem->region);
> drm_dbg(&i915->drm, "Local memory IO start: %pa\n",
> &mem->io_start);
> + drm_info(&i915->drm, "Local memory IO size: %pa\n",
> + &mem->io_size);
> drm_info(&i915->drm, "Local memory available: %pa\n",
> &lmem_size);
>
> diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
> index c70d7e286a51..595e2489c23e 100644
> --- a/drivers/gpu/drm/i915/intel_memory_region.c
> +++ b/drivers/gpu/drm/i915/intel_memory_region.c
> @@ -97,7 +97,7 @@ static int iomemtest(struct intel_memory_region *mem,
> bool test_all,
> const void *caller)
> {
> - resource_size_t last = resource_size(&mem->region) - PAGE_SIZE;
> + resource_size_t last = mem->io_size - PAGE_SIZE;
> resource_size_t page;
> int err;
>
> @@ -205,6 +205,8 @@ static int intel_memory_region_memtest(struct intel_memory_region *mem,
> if (!mem->io_start)
> return 0;
>
> + WARN_ON_ONCE(!mem->io_size);
> +
> if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) || i915->params.memtest)
> err = iomemtest(mem, i915->params.memtest, caller);
>
> @@ -217,6 +219,7 @@ intel_memory_region_create(struct drm_i915_private *i915,
> resource_size_t size,
> resource_size_t min_page_size,
> resource_size_t io_start,
> + resource_size_t io_size,
> u16 type,
> u16 instance,
> const struct intel_memory_region_ops *ops)
> @@ -231,6 +234,7 @@ intel_memory_region_create(struct drm_i915_private *i915,
> mem->i915 = i915;
> mem->region = (struct resource)DEFINE_RES_MEM(start, size);
> mem->io_start = io_start;
> + mem->io_size = io_size;
> mem->min_page_size = min_page_size;
> mem->ops = ops;
> mem->total = size;
> diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h
> index 5625c9c38993..459051ce0c91 100644
> --- a/drivers/gpu/drm/i915/intel_memory_region.h
> +++ b/drivers/gpu/drm/i915/intel_memory_region.h
> @@ -71,6 +71,7 @@ struct intel_memory_region {
> struct drm_mm_node fake_mappable;
>
> resource_size_t io_start;
> + resource_size_t io_size;
> resource_size_t min_page_size;
> resource_size_t total;
> resource_size_t avail;
> @@ -103,6 +104,7 @@ intel_memory_region_create(struct drm_i915_private *i915,
> resource_size_t size,
> resource_size_t min_page_size,
> resource_size_t io_start,
> + resource_size_t io_size,
> u16 type,
> u16 instance,
> const struct intel_memory_region_ops *ops);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> index 7acba1d2135e..247f65f02bbf 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> @@ -170,7 +170,7 @@ static int igt_mock_reserve(void *arg)
> if (!order)
> return 0;
>
> - mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
> + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
> if (IS_ERR(mem)) {
> pr_err("failed to create memory region\n");
> err = PTR_ERR(mem);
> @@ -383,7 +383,7 @@ static int igt_mock_splintered_region(void *arg)
> */
>
> size = (SZ_4G - 1) & PAGE_MASK;
> - mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0);
> + mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
> if (IS_ERR(mem))
> return PTR_ERR(mem);
>
> @@ -471,7 +471,7 @@ static int igt_mock_max_segment(void *arg)
> */
>
> size = SZ_8G;
> - mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0);
> + mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
> if (IS_ERR(mem))
> return PTR_ERR(mem);
>
> @@ -1188,7 +1188,7 @@ int intel_memory_region_mock_selftests(void)
> if (!i915)
> return -ENOMEM;
>
> - mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0);
> + mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
> if (IS_ERR(mem)) {
> pr_err("failed to create memory region\n");
> err = PTR_ERR(mem);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c
> index 19bff8afcaaa..467eeae6d5f0 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_region.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_region.c
> @@ -107,7 +107,8 @@ mock_region_create(struct drm_i915_private *i915,
> resource_size_t start,
> resource_size_t size,
> resource_size_t min_page_size,
> - resource_size_t io_start)
> + resource_size_t io_start,
> + resource_size_t io_size)
> {
> int instance = ida_alloc_max(&i915->selftest.mock_region_instances,
> TTM_NUM_MEM_TYPES - TTM_PL_PRIV - 1,
> @@ -117,6 +118,7 @@ mock_region_create(struct drm_i915_private *i915,
> return ERR_PTR(instance);
>
> return intel_memory_region_create(i915, start, size, min_page_size,
> - io_start, INTEL_MEMORY_MOCK, instance,
> + io_start, io_size,
> + INTEL_MEMORY_MOCK, instance,
> &mock_region_ops);
> }
> diff --git a/drivers/gpu/drm/i915/selftests/mock_region.h b/drivers/gpu/drm/i915/selftests/mock_region.h
> index 329bf74dfaca..e36c3a433551 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_region.h
> +++ b/drivers/gpu/drm/i915/selftests/mock_region.h
> @@ -16,6 +16,7 @@ mock_region_create(struct drm_i915_private *i915,
> resource_size_t start,
> resource_size_t size,
> resource_size_t min_page_size,
> - resource_size_t io_start);
> + resource_size_t io_start,
> + resource_size_t io_size);
>
> #endif /* !__MOCK_REGION_H */
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 05/20] drm/i915/ttm: require mappable by default
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (3 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 04/20] drm/i915: add io_size plumbing Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-01-26 15:21 ` [PATCH 06/20] drm/i915: add I915_BO_ALLOC_TOPDOWN Matthew Auld
` (14 subsequent siblings)
19 siblings, 0 replies; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
On devices with non-mappable LMEM ensure we always allocate the pages
within the mappable portion. For now we assume that all LMEM buffers
will require CPU access, which is also inline with pretty much all
current kernel internal users. In the next patch we will introduce a new
flag to override this behaviour.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++++
drivers/gpu/drm/i915/intel_region_ttm.c | 5 +++++
2 files changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index e1140ca3d9a0..d9a04c7d41b1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -128,6 +128,10 @@ i915_ttm_place_from_region(const struct intel_memory_region *mr,
if (flags & I915_BO_ALLOC_CONTIGUOUS)
place->flags = TTM_PL_FLAG_CONTIGUOUS;
+ if (mr->io_size && mr->io_size < mr->total) {
+ place->fpfn = 0;
+ place->lpfn = mr->io_size >> PAGE_SHIFT;
+ }
}
static void
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
index f2b888c16958..4689192d5e8d 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -199,6 +199,11 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
struct ttm_resource *res;
int ret;
+ if (mem->io_size && mem->io_size < mem->total) {
+ place.fpfn = 0;
+ place.lpfn = mem->io_size >> PAGE_SHIFT;
+ }
+
mock_bo.base.size = size;
place.flags = flags;
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* [PATCH 06/20] drm/i915: add I915_BO_ALLOC_TOPDOWN
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (4 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 05/20] drm/i915/ttm: require mappable by default Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-01-31 15:28 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 07/20] drm/i915/buddy: track available visible size Matthew Auld
` (13 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
If the user doesn't require CPU access for the buffer, then
ALLOC_TOPDOWN should be used, in order to prioritise allocating in the
non-mappable portion of LMEM.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 15 +++++++++++----
drivers/gpu/drm/i915/gem/i915_gem_pages.c | 3 +++
drivers/gpu/drm/i915/gem/i915_gem_region.c | 5 +++++
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 13 ++++++++++---
drivers/gpu/drm/i915/gt/intel_gt.c | 4 +++-
drivers/gpu/drm/i915/i915_vma.c | 3 +++
drivers/gpu/drm/i915/intel_region_ttm.c | 11 ++++++++---
drivers/gpu/drm/i915/selftests/mock_region.c | 7 +------
8 files changed, 44 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 71e778ecaeb8..29285aaf0477 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -319,15 +319,22 @@ struct drm_i915_gem_object {
#define I915_BO_ALLOC_PM_VOLATILE BIT(4)
/* Object needs to be restored early using memcpy during resume */
#define I915_BO_ALLOC_PM_EARLY BIT(5)
+/*
+ * Object is likely never accessed by the CPU. This will prioritise the BO to be
+ * allocated in the non-mappable portion of lmem. This is merely a hint, and if
+ * dealing with userspace objects the CPU fault handler is free to ignore this.
+ */
+#define I915_BO_ALLOC_TOPDOWN BIT(6)
#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
I915_BO_ALLOC_VOLATILE | \
I915_BO_ALLOC_CPU_CLEAR | \
I915_BO_ALLOC_USER | \
I915_BO_ALLOC_PM_VOLATILE | \
- I915_BO_ALLOC_PM_EARLY)
-#define I915_BO_READONLY BIT(6)
-#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */
-#define I915_BO_PROTECTED BIT(8)
+ I915_BO_ALLOC_PM_EARLY | \
+ I915_BO_ALLOC_TOPDOWN)
+#define I915_BO_READONLY BIT(7)
+#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
+#define I915_BO_PROTECTED BIT(9)
/**
* @mem_flags - Mutable placement-related flags
*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 7d2211fbe548..a95b4d72619f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -346,6 +346,9 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
!i915_gem_object_has_iomem(obj))
return ERR_PTR(-ENXIO);
+ if (WARN_ON_ONCE(obj->flags & I915_BO_ALLOC_TOPDOWN))
+ return ERR_PTR(-EINVAL);
+
assert_object_held(obj);
pinned = !(type & I915_MAP_OVERRIDE);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
index a4350227e9ae..f91e5a9c759d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -45,6 +45,11 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS);
+ if (WARN_ON_ONCE(flags & I915_BO_ALLOC_TOPDOWN &&
+ (flags & I915_BO_ALLOC_CPU_CLEAR ||
+ flags & I915_BO_ALLOC_PM_EARLY)))
+ return ERR_PTR(-EINVAL);
+
if (!mem)
return ERR_PTR(-ENODEV);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index d9a04c7d41b1..e60b677ecd54 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -127,10 +127,14 @@ i915_ttm_place_from_region(const struct intel_memory_region *mr,
place->mem_type = intel_region_to_ttm_type(mr);
if (flags & I915_BO_ALLOC_CONTIGUOUS)
- place->flags = TTM_PL_FLAG_CONTIGUOUS;
+ place->flags |= TTM_PL_FLAG_CONTIGUOUS;
if (mr->io_size && mr->io_size < mr->total) {
- place->fpfn = 0;
- place->lpfn = mr->io_size >> PAGE_SHIFT;
+ if (flags & I915_BO_ALLOC_TOPDOWN) {
+ place->flags |= TTM_PL_FLAG_TOPDOWN;
+ } else {
+ place->fpfn = 0;
+ place->lpfn = mr->io_size >> PAGE_SHIFT;
+ }
}
}
@@ -890,6 +894,9 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
if (!obj)
return VM_FAULT_SIGBUS;
+ if (obj->flags & I915_BO_ALLOC_TOPDOWN)
+ return -EINVAL;
+
/* Sanity check that we allow writing into this object */
if (unlikely(i915_gem_object_is_readonly(obj) &&
area->vm_flags & VM_WRITE))
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 622cdfed8a8b..8b83a771a2f7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -454,7 +454,9 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
struct i915_vma *vma;
int ret;
- obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
+ obj = i915_gem_object_create_lmem(i915, size,
+ I915_BO_ALLOC_VOLATILE |
+ I915_BO_ALLOC_TOPDOWN);
if (IS_ERR(obj))
obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(obj))
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index b1816a835abf..b2fdaa74e4b6 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -528,6 +528,9 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
void __iomem *ptr;
int err;
+ if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_TOPDOWN))
+ return IO_ERR_PTR(-EINVAL);
+
if (!i915_gem_object_is_lmem(vma->obj)) {
if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
err = -ENODEV;
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
index 4689192d5e8d..282802aed174 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -199,13 +199,18 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
struct ttm_resource *res;
int ret;
+ if (flags & I915_BO_ALLOC_CONTIGUOUS)
+ place.flags |= TTM_PL_FLAG_CONTIGUOUS;
if (mem->io_size && mem->io_size < mem->total) {
- place.fpfn = 0;
- place.lpfn = mem->io_size >> PAGE_SHIFT;
+ if (flags & I915_BO_ALLOC_TOPDOWN) {
+ place.flags |= TTM_PL_FLAG_TOPDOWN;
+ } else {
+ place.fpfn = 0;
+ place.lpfn = mem->io_size >> PAGE_SHIFT;
+ }
}
mock_bo.base.size = size;
- place.flags = flags;
ret = man->func->alloc(man, &mock_bo, &place, &res);
if (ret == -ENOSPC)
diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c
index 467eeae6d5f0..f64325491f35 100644
--- a/drivers/gpu/drm/i915/selftests/mock_region.c
+++ b/drivers/gpu/drm/i915/selftests/mock_region.c
@@ -22,17 +22,12 @@ static void mock_region_put_pages(struct drm_i915_gem_object *obj,
static int mock_region_get_pages(struct drm_i915_gem_object *obj)
{
- unsigned int flags;
struct sg_table *pages;
int err;
- flags = 0;
- if (obj->flags & I915_BO_ALLOC_CONTIGUOUS)
- flags |= TTM_PL_FLAG_CONTIGUOUS;
-
obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region,
obj->base.size,
- flags);
+ obj->flags);
if (IS_ERR(obj->mm.res))
return PTR_ERR(obj->mm.res);
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 06/20] drm/i915: add I915_BO_ALLOC_TOPDOWN
2022-01-26 15:21 ` [PATCH 06/20] drm/i915: add I915_BO_ALLOC_TOPDOWN Matthew Auld
@ 2022-01-31 15:28 ` Thomas Hellström
2022-01-31 15:49 ` Matthew Auld
0 siblings, 1 reply; 50+ messages in thread
From: Thomas Hellström @ 2022-01-31 15:28 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> If the user doesn't require CPU access for the buffer, then
> ALLOC_TOPDOWN should be used, in order to prioritise allocating in the
> non-mappable portion of LMEM.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
I was wondering how this would work best with user-space not supplying
any hints. Thinking that mappable LMEM buffers would be a minority,
wouldn't it be better to have TOPDOWN behaviour set by default. It would
then be migrated to mappable only if needed. And if the first usage is a
cpu-map it would either be mapped in system or immediately migrated from
pageless to mappable LMEM?
> ---
> drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 15 +++++++++++----
> drivers/gpu/drm/i915/gem/i915_gem_pages.c | 3 +++
> drivers/gpu/drm/i915/gem/i915_gem_region.c | 5 +++++
> drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 13 ++++++++++---
> drivers/gpu/drm/i915/gt/intel_gt.c | 4 +++-
> drivers/gpu/drm/i915/i915_vma.c | 3 +++
> drivers/gpu/drm/i915/intel_region_ttm.c | 11 ++++++++---
> drivers/gpu/drm/i915/selftests/mock_region.c | 7 +------
> 8 files changed, 44 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 71e778ecaeb8..29285aaf0477 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -319,15 +319,22 @@ struct drm_i915_gem_object {
> #define I915_BO_ALLOC_PM_VOLATILE BIT(4)
> /* Object needs to be restored early using memcpy during resume */
> #define I915_BO_ALLOC_PM_EARLY BIT(5)
> +/*
> + * Object is likely never accessed by the CPU. This will prioritise the BO to be
> + * allocated in the non-mappable portion of lmem. This is merely a hint, and if
> + * dealing with userspace objects the CPU fault handler is free to ignore this.
> + */
> +#define I915_BO_ALLOC_TOPDOWN BIT(6)
> #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
> I915_BO_ALLOC_VOLATILE | \
> I915_BO_ALLOC_CPU_CLEAR | \
> I915_BO_ALLOC_USER | \
> I915_BO_ALLOC_PM_VOLATILE | \
> - I915_BO_ALLOC_PM_EARLY)
> -#define I915_BO_READONLY BIT(6)
> -#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */
> -#define I915_BO_PROTECTED BIT(8)
> + I915_BO_ALLOC_PM_EARLY | \
> + I915_BO_ALLOC_TOPDOWN)
> +#define I915_BO_READONLY BIT(7)
> +#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
> +#define I915_BO_PROTECTED BIT(9)
> /**
> * @mem_flags - Mutable placement-related flags
> *
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> index 7d2211fbe548..a95b4d72619f 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
> @@ -346,6 +346,9 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
> !i915_gem_object_has_iomem(obj))
> return ERR_PTR(-ENXIO);
>
> + if (WARN_ON_ONCE(obj->flags & I915_BO_ALLOC_TOPDOWN))
> + return ERR_PTR(-EINVAL);
> +
> assert_object_held(obj);
>
> pinned = !(type & I915_MAP_OVERRIDE);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
> index a4350227e9ae..f91e5a9c759d 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
> @@ -45,6 +45,11 @@ i915_gem_object_create_region(struct intel_memory_region *mem,
>
> GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS);
>
> + if (WARN_ON_ONCE(flags & I915_BO_ALLOC_TOPDOWN &&
> + (flags & I915_BO_ALLOC_CPU_CLEAR ||
> + flags & I915_BO_ALLOC_PM_EARLY)))
> + return ERR_PTR(-EINVAL);
> +
> if (!mem)
> return ERR_PTR(-ENODEV);
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index d9a04c7d41b1..e60b677ecd54 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -127,10 +127,14 @@ i915_ttm_place_from_region(const struct intel_memory_region *mr,
> place->mem_type = intel_region_to_ttm_type(mr);
>
> if (flags & I915_BO_ALLOC_CONTIGUOUS)
> - place->flags = TTM_PL_FLAG_CONTIGUOUS;
> + place->flags |= TTM_PL_FLAG_CONTIGUOUS;
> if (mr->io_size && mr->io_size < mr->total) {
> - place->fpfn = 0;
> - place->lpfn = mr->io_size >> PAGE_SHIFT;
> + if (flags & I915_BO_ALLOC_TOPDOWN) {
> + place->flags |= TTM_PL_FLAG_TOPDOWN;
> + } else {
> + place->fpfn = 0;
> + place->lpfn = mr->io_size >> PAGE_SHIFT;
> + }
> }
> }
>
> @@ -890,6 +894,9 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
> if (!obj)
> return VM_FAULT_SIGBUS;
>
> + if (obj->flags & I915_BO_ALLOC_TOPDOWN)
> + return -EINVAL;
> +
> /* Sanity check that we allow writing into this object */
> if (unlikely(i915_gem_object_is_readonly(obj) &&
> area->vm_flags & VM_WRITE))
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 622cdfed8a8b..8b83a771a2f7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -454,7 +454,9 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
> struct i915_vma *vma;
> int ret;
>
> - obj = i915_gem_object_create_lmem(i915, size, I915_BO_ALLOC_VOLATILE);
> + obj = i915_gem_object_create_lmem(i915, size,
> + I915_BO_ALLOC_VOLATILE |
> + I915_BO_ALLOC_TOPDOWN);
> if (IS_ERR(obj))
> obj = i915_gem_object_create_stolen(i915, size);
> if (IS_ERR(obj))
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index b1816a835abf..b2fdaa74e4b6 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -528,6 +528,9 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
> void __iomem *ptr;
> int err;
>
> + if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_TOPDOWN))
> + return IO_ERR_PTR(-EINVAL);
> +
> if (!i915_gem_object_is_lmem(vma->obj)) {
> if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
> err = -ENODEV;
> diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
> index 4689192d5e8d..282802aed174 100644
> --- a/drivers/gpu/drm/i915/intel_region_ttm.c
> +++ b/drivers/gpu/drm/i915/intel_region_ttm.c
> @@ -199,13 +199,18 @@ intel_region_ttm_resource_alloc(struct intel_memory_region *mem,
> struct ttm_resource *res;
> int ret;
>
> + if (flags & I915_BO_ALLOC_CONTIGUOUS)
> + place.flags |= TTM_PL_FLAG_CONTIGUOUS;
> if (mem->io_size && mem->io_size < mem->total) {
> - place.fpfn = 0;
> - place.lpfn = mem->io_size >> PAGE_SHIFT;
> + if (flags & I915_BO_ALLOC_TOPDOWN) {
> + place.flags |= TTM_PL_FLAG_TOPDOWN;
> + } else {
> + place.fpfn = 0;
> + place.lpfn = mem->io_size >> PAGE_SHIFT;
> + }
> }
>
> mock_bo.base.size = size;
> - place.flags = flags;
>
> ret = man->func->alloc(man, &mock_bo, &place, &res);
> if (ret == -ENOSPC)
> diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c
> index 467eeae6d5f0..f64325491f35 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_region.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_region.c
> @@ -22,17 +22,12 @@ static void mock_region_put_pages(struct drm_i915_gem_object *obj,
>
> static int mock_region_get_pages(struct drm_i915_gem_object *obj)
> {
> - unsigned int flags;
> struct sg_table *pages;
> int err;
>
> - flags = 0;
> - if (obj->flags & I915_BO_ALLOC_CONTIGUOUS)
> - flags |= TTM_PL_FLAG_CONTIGUOUS;
> -
> obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region,
> obj->base.size,
> - flags);
> + obj->flags);
> if (IS_ERR(obj->mm.res))
> return PTR_ERR(obj->mm.res);
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 06/20] drm/i915: add I915_BO_ALLOC_TOPDOWN
2022-01-31 15:28 ` Thomas Hellström
@ 2022-01-31 15:49 ` Matthew Auld
0 siblings, 0 replies; 50+ messages in thread
From: Matthew Auld @ 2022-01-31 15:49 UTC (permalink / raw)
To: Thomas Hellström, intel-gfx; +Cc: dri-devel
On 31/01/2022 15:28, Thomas Hellström wrote:
> On 1/26/22 16:21, Matthew Auld wrote:
>> If the user doesn't require CPU access for the buffer, then
>> ALLOC_TOPDOWN should be used, in order to prioritise allocating in the
>> non-mappable portion of LMEM.
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>
> I was wondering how this would work best with user-space not supplying
> any hints. Thinking that mappable LMEM buffers would be a minority,
> wouldn't it be better to have TOPDOWN behaviour set by default. It would
> then be migrated to mappable only if needed. And if the first usage is a
> cpu-map it would either be mapped in system or immediately migrated from
> pageless to mappable LMEM?
At this stage of the series I was mostly concerned with kernel internal
users(including all of the selftests), for which pretty much all
existing users want CPU access, so having that as the default seemed
reasonable, and avoids needing to annotate lots of places with
NEEDS_CPU_ACCESS. The TOPDOWN behaviour becomes the default for normal
userspace objects later in the series, which only requires annotating
one place.
>
>
>> ---
>> drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 15 +++++++++++----
>> drivers/gpu/drm/i915/gem/i915_gem_pages.c | 3 +++
>> drivers/gpu/drm/i915/gem/i915_gem_region.c | 5 +++++
>> drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 13 ++++++++++---
>> drivers/gpu/drm/i915/gt/intel_gt.c | 4 +++-
>> drivers/gpu/drm/i915/i915_vma.c | 3 +++
>> drivers/gpu/drm/i915/intel_region_ttm.c | 11 ++++++++---
>> drivers/gpu/drm/i915/selftests/mock_region.c | 7 +------
>> 8 files changed, 44 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> index 71e778ecaeb8..29285aaf0477 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> @@ -319,15 +319,22 @@ struct drm_i915_gem_object {
>> #define I915_BO_ALLOC_PM_VOLATILE BIT(4)
>> /* Object needs to be restored early using memcpy during resume */
>> #define I915_BO_ALLOC_PM_EARLY BIT(5)
>> +/*
>> + * Object is likely never accessed by the CPU. This will prioritise
>> the BO to be
>> + * allocated in the non-mappable portion of lmem. This is merely a
>> hint, and if
>> + * dealing with userspace objects the CPU fault handler is free to
>> ignore this.
>> + */
>> +#define I915_BO_ALLOC_TOPDOWN BIT(6)
>> #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
>> I915_BO_ALLOC_VOLATILE | \
>> I915_BO_ALLOC_CPU_CLEAR | \
>> I915_BO_ALLOC_USER | \
>> I915_BO_ALLOC_PM_VOLATILE | \
>> - I915_BO_ALLOC_PM_EARLY)
>> -#define I915_BO_READONLY BIT(6)
>> -#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not
>> release! */
>> -#define I915_BO_PROTECTED BIT(8)
>> + I915_BO_ALLOC_PM_EARLY | \
>> + I915_BO_ALLOC_TOPDOWN)
>> +#define I915_BO_READONLY BIT(7)
>> +#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not
>> release! */
>> +#define I915_BO_PROTECTED BIT(9)
>> /**
>> * @mem_flags - Mutable placement-related flags
>> *
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
>> index 7d2211fbe548..a95b4d72619f 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
>> @@ -346,6 +346,9 @@ void *i915_gem_object_pin_map(struct
>> drm_i915_gem_object *obj,
>> !i915_gem_object_has_iomem(obj))
>> return ERR_PTR(-ENXIO);
>> + if (WARN_ON_ONCE(obj->flags & I915_BO_ALLOC_TOPDOWN))
>> + return ERR_PTR(-EINVAL);
>> +
>> assert_object_held(obj);
>> pinned = !(type & I915_MAP_OVERRIDE);
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_region.c
>> index a4350227e9ae..f91e5a9c759d 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
>> @@ -45,6 +45,11 @@ i915_gem_object_create_region(struct
>> intel_memory_region *mem,
>> GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS);
>> + if (WARN_ON_ONCE(flags & I915_BO_ALLOC_TOPDOWN &&
>> + (flags & I915_BO_ALLOC_CPU_CLEAR ||
>> + flags & I915_BO_ALLOC_PM_EARLY)))
>> + return ERR_PTR(-EINVAL);
>> +
>> if (!mem)
>> return ERR_PTR(-ENODEV);
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>> index d9a04c7d41b1..e60b677ecd54 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>> @@ -127,10 +127,14 @@ i915_ttm_place_from_region(const struct
>> intel_memory_region *mr,
>> place->mem_type = intel_region_to_ttm_type(mr);
>> if (flags & I915_BO_ALLOC_CONTIGUOUS)
>> - place->flags = TTM_PL_FLAG_CONTIGUOUS;
>> + place->flags |= TTM_PL_FLAG_CONTIGUOUS;
>> if (mr->io_size && mr->io_size < mr->total) {
>> - place->fpfn = 0;
>> - place->lpfn = mr->io_size >> PAGE_SHIFT;
>> + if (flags & I915_BO_ALLOC_TOPDOWN) {
>> + place->flags |= TTM_PL_FLAG_TOPDOWN;
>> + } else {
>> + place->fpfn = 0;
>> + place->lpfn = mr->io_size >> PAGE_SHIFT;
>> + }
>> }
>> }
>> @@ -890,6 +894,9 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
>> if (!obj)
>> return VM_FAULT_SIGBUS;
>> + if (obj->flags & I915_BO_ALLOC_TOPDOWN)
>> + return -EINVAL;
>> +
>> /* Sanity check that we allow writing into this object */
>> if (unlikely(i915_gem_object_is_readonly(obj) &&
>> area->vm_flags & VM_WRITE))
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c
>> b/drivers/gpu/drm/i915/gt/intel_gt.c
>> index 622cdfed8a8b..8b83a771a2f7 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>> @@ -454,7 +454,9 @@ static int intel_gt_init_scratch(struct intel_gt
>> *gt, unsigned int size)
>> struct i915_vma *vma;
>> int ret;
>> - obj = i915_gem_object_create_lmem(i915, size,
>> I915_BO_ALLOC_VOLATILE);
>> + obj = i915_gem_object_create_lmem(i915, size,
>> + I915_BO_ALLOC_VOLATILE |
>> + I915_BO_ALLOC_TOPDOWN);
>> if (IS_ERR(obj))
>> obj = i915_gem_object_create_stolen(i915, size);
>> if (IS_ERR(obj))
>> diff --git a/drivers/gpu/drm/i915/i915_vma.c
>> b/drivers/gpu/drm/i915/i915_vma.c
>> index b1816a835abf..b2fdaa74e4b6 100644
>> --- a/drivers/gpu/drm/i915/i915_vma.c
>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>> @@ -528,6 +528,9 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma
>> *vma)
>> void __iomem *ptr;
>> int err;
>> + if (WARN_ON_ONCE(vma->obj->flags & I915_BO_ALLOC_TOPDOWN))
>> + return IO_ERR_PTR(-EINVAL);
>> +
>> if (!i915_gem_object_is_lmem(vma->obj)) {
>> if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
>> err = -ENODEV;
>> diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c
>> b/drivers/gpu/drm/i915/intel_region_ttm.c
>> index 4689192d5e8d..282802aed174 100644
>> --- a/drivers/gpu/drm/i915/intel_region_ttm.c
>> +++ b/drivers/gpu/drm/i915/intel_region_ttm.c
>> @@ -199,13 +199,18 @@ intel_region_ttm_resource_alloc(struct
>> intel_memory_region *mem,
>> struct ttm_resource *res;
>> int ret;
>> + if (flags & I915_BO_ALLOC_CONTIGUOUS)
>> + place.flags |= TTM_PL_FLAG_CONTIGUOUS;
>> if (mem->io_size && mem->io_size < mem->total) {
>> - place.fpfn = 0;
>> - place.lpfn = mem->io_size >> PAGE_SHIFT;
>> + if (flags & I915_BO_ALLOC_TOPDOWN) {
>> + place.flags |= TTM_PL_FLAG_TOPDOWN;
>> + } else {
>> + place.fpfn = 0;
>> + place.lpfn = mem->io_size >> PAGE_SHIFT;
>> + }
>> }
>> mock_bo.base.size = size;
>> - place.flags = flags;
>> ret = man->func->alloc(man, &mock_bo, &place, &res);
>> if (ret == -ENOSPC)
>> diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c
>> b/drivers/gpu/drm/i915/selftests/mock_region.c
>> index 467eeae6d5f0..f64325491f35 100644
>> --- a/drivers/gpu/drm/i915/selftests/mock_region.c
>> +++ b/drivers/gpu/drm/i915/selftests/mock_region.c
>> @@ -22,17 +22,12 @@ static void mock_region_put_pages(struct
>> drm_i915_gem_object *obj,
>> static int mock_region_get_pages(struct drm_i915_gem_object *obj)
>> {
>> - unsigned int flags;
>> struct sg_table *pages;
>> int err;
>> - flags = 0;
>> - if (obj->flags & I915_BO_ALLOC_CONTIGUOUS)
>> - flags |= TTM_PL_FLAG_CONTIGUOUS;
>> -
>> obj->mm.res = intel_region_ttm_resource_alloc(obj->mm.region,
>> obj->base.size,
>> - flags);
>> + obj->flags);
>> if (IS_ERR(obj->mm.res))
>> return PTR_ERR(obj->mm.res);
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 07/20] drm/i915/buddy: track available visible size
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (5 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 06/20] drm/i915: add I915_BO_ALLOC_TOPDOWN Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-01-31 16:12 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 08/20] drm/i915/buddy: adjust res->start Matthew Auld
` (12 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
Track the total amount of available visible memory, and also track
per-resource the amount of used visible memory. For now this is useful
for our debug output, and deciding if it is even worth calling into the
buddy allocator. In the future tracking the per-resource visible usage
will be useful for when deciding if we should attempt to evict certain
buffers.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 55 ++++++++++++++++++-
drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 8 ++-
drivers/gpu/drm/i915/intel_region_ttm.c | 1 +
3 files changed, 62 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 53eb100688a6..6e5842155898 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -19,6 +19,8 @@ struct i915_ttm_buddy_manager {
struct drm_buddy mm;
struct list_head reserved;
struct mutex lock;
+ unsigned long visible_size;
+ unsigned long visible_avail;
u64 default_page_size;
};
@@ -87,6 +89,13 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
n_pages = size >> ilog2(mm->chunk_size);
mutex_lock(&bman->lock);
+ if (place->lpfn && place->lpfn <= bman->visible_size &&
+ n_pages > bman->visible_avail) {
+ mutex_unlock(&bman->lock);
+ err = -ENOSPC;
+ goto err_free_res;
+ }
+
err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
(u64)lpfn << PAGE_SHIFT,
(u64)n_pages << PAGE_SHIFT,
@@ -107,6 +116,30 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
mutex_unlock(&bman->lock);
}
+ if (place->lpfn && place->lpfn <= bman->visible_size) {
+ bman_res->used_visible_size = bman_res->base.num_pages;
+ } else {
+ struct drm_buddy_block *block;
+
+ list_for_each_entry(block, &bman_res->blocks, link) {
+ unsigned long start =
+ drm_buddy_block_offset(block) >> PAGE_SHIFT;
+ unsigned long end = start +
+ (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+
+ if (start < bman->visible_size) {
+ bman_res->used_visible_size +=
+ min(end, bman->visible_size) - start;
+ }
+ }
+ }
+
+ if (bman_res->used_visible_size) {
+ mutex_lock(&bman->lock);
+ bman->visible_avail -= bman_res->used_visible_size;
+ mutex_unlock(&bman->lock);
+ }
+
*res = &bman_res->base;
return 0;
@@ -127,6 +160,7 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man,
mutex_lock(&bman->lock);
drm_buddy_free_list(&bman->mm, &bman_res->blocks);
+ bman->visible_avail += bman_res->used_visible_size;
mutex_unlock(&bman->lock);
kfree(bman_res);
@@ -141,6 +175,10 @@ static void i915_ttm_buddy_man_debug(struct ttm_resource_manager *man,
mutex_lock(&bman->lock);
drm_printf(printer, "default_page_size: %lluKiB\n",
bman->default_page_size >> 10);
+ drm_printf(printer, "visible_avail: %luMiB\n",
+ bman->visible_avail << PAGE_SHIFT >> 20);
+ drm_printf(printer, "visible_size: %luMiB\n",
+ bman->visible_size << PAGE_SHIFT >> 20);
drm_buddy_print(&bman->mm, printer);
@@ -162,6 +200,7 @@ static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = {
* @type: Memory type we want to manage
* @use_tt: Set use_tt for the manager
* @size: The size in bytes to manage
+ * @visible_size: The CPU visible size in bytes to manage
* @default_page_size: The default minimum page size in bytes for allocations,
* this must be at least as large as @chunk_size, and can be overridden by
* setting the BO page_alignment, to be larger or smaller as needed.
@@ -185,7 +224,7 @@ static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = {
*/
int i915_ttm_buddy_man_init(struct ttm_device *bdev,
unsigned int type, bool use_tt,
- u64 size, u64 default_page_size,
+ u64 size, u64 visible_size, u64 default_page_size,
u64 chunk_size)
{
struct ttm_resource_manager *man;
@@ -204,6 +243,8 @@ int i915_ttm_buddy_man_init(struct ttm_device *bdev,
INIT_LIST_HEAD(&bman->reserved);
GEM_BUG_ON(default_page_size < chunk_size);
bman->default_page_size = default_page_size;
+ bman->visible_size = visible_size >> PAGE_SHIFT;
+ bman->visible_avail = bman->visible_size;
man = &bman->manager;
man->use_tt = use_tt;
@@ -248,6 +289,7 @@ int i915_ttm_buddy_man_fini(struct ttm_device *bdev, unsigned int type)
mutex_lock(&bman->lock);
drm_buddy_free_list(mm, &bman->reserved);
drm_buddy_fini(mm);
+ WARN_ON_ONCE(bman->visible_avail != bman->visible_size);
mutex_unlock(&bman->lock);
ttm_resource_manager_cleanup(man);
@@ -287,3 +329,14 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
return ret;
}
+/**
+ * i915_ttm_buddy_man_visible_size - Return the size of the CPU visible portion
+ * in pages.
+ * @man: The buddy allocator ttm manager
+ */
+u64 i915_ttm_buddy_man_visible_size(struct ttm_resource_manager *man)
+{
+ struct i915_ttm_buddy_manager *bman = to_buddy_manager(man);
+
+ return bman->visible_size;
+}
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
index 72c90b432e87..35fe03a6a78c 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h
@@ -21,6 +21,8 @@ struct drm_buddy;
* @base: struct ttm_resource base class we extend
* @blocks: the list of struct i915_buddy_block for this resource/allocation
* @flags: DRM_BUDDY_*_ALLOCATION flags
+ * @used_visible_size: How much of this resource, if any, uses the CPU visible
+ * portion, in pages.
* @mm: the struct i915_buddy_mm for this resource
*
* Extends the struct ttm_resource to manage an address space allocation with
@@ -30,6 +32,7 @@ struct i915_ttm_buddy_resource {
struct ttm_resource base;
struct list_head blocks;
unsigned long flags;
+ unsigned long used_visible_size;
struct drm_buddy *mm;
};
@@ -48,11 +51,14 @@ to_ttm_buddy_resource(struct ttm_resource *res)
int i915_ttm_buddy_man_init(struct ttm_device *bdev,
unsigned type, bool use_tt,
- u64 size, u64 default_page_size, u64 chunk_size);
+ u64 size, u64 visible_size,
+ u64 default_page_size, u64 chunk_size);
int i915_ttm_buddy_man_fini(struct ttm_device *bdev,
unsigned int type);
int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man,
u64 start, u64 size);
+u64 i915_ttm_buddy_man_visible_size(struct ttm_resource_manager *man);
+
#endif
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
index 282802aed174..353ef195c3be 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -87,6 +87,7 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
ret = i915_ttm_buddy_man_init(bdev, mem_type, false,
resource_size(&mem->region),
+ mem->io_size,
mem->min_page_size, PAGE_SIZE);
if (ret)
return ret;
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 07/20] drm/i915/buddy: track available visible size
2022-01-26 15:21 ` [PATCH 07/20] drm/i915/buddy: track available visible size Matthew Auld
@ 2022-01-31 16:12 ` Thomas Hellström
0 siblings, 0 replies; 50+ messages in thread
From: Thomas Hellström @ 2022-01-31 16:12 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> Track the total amount of available visible memory, and also track
> per-resource the amount of used visible memory. For now this is useful
> for our debug output, and deciding if it is even worth calling into the
> buddy allocator. In the future tracking the per-resource visible usage
> will be useful for when deciding if we should attempt to evict certain
> buffers.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 55 ++++++++++++++++++-
> drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 8 ++-
> drivers/gpu/drm/i915/intel_region_ttm.c | 1 +
> 3 files changed, 62 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index 53eb100688a6..6e5842155898 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -19,6 +19,8 @@ struct i915_ttm_buddy_manager {
> struct drm_buddy mm;
> struct list_head reserved;
> struct mutex lock;
> + unsigned long visible_size;
> + unsigned long visible_avail;
> u64 default_page_size;
> };
>
> @@ -87,6 +89,13 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
> n_pages = size >> ilog2(mm->chunk_size);
>
> mutex_lock(&bman->lock);
> + if (place->lpfn && place->lpfn <= bman->visible_size &&
> + n_pages > bman->visible_avail) {
> + mutex_unlock(&bman->lock);
> + err = -ENOSPC;
> + goto err_free_res;
> + }
> +
> err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
> (u64)lpfn << PAGE_SHIFT,
> (u64)n_pages << PAGE_SHIFT,
> @@ -107,6 +116,30 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
> mutex_unlock(&bman->lock);
> }
>
> + if (place->lpfn && place->lpfn <= bman->visible_size) {
> + bman_res->used_visible_size = bman_res->base.num_pages;
> + } else {
> + struct drm_buddy_block *block;
> +
> + list_for_each_entry(block, &bman_res->blocks, link) {
> + unsigned long start =
> + drm_buddy_block_offset(block) >> PAGE_SHIFT;
> + unsigned long end = start +
> + (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
> +
Move this inside the if statement below? Or perhaps the compiler is
smart enough to figure that out.
> + if (start < bman->visible_size) {
> + bman_res->used_visible_size +=
> + min(end, bman->visible_size) - start;
> + }
> + }
> + }
Reviewed-by: Thomas Hellstrom <thomas.hellstrom@linux.intel.com>
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 08/20] drm/i915/buddy: adjust res->start
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (6 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 07/20] drm/i915/buddy: track available visible size Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-02-01 10:38 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 09/20] drm/i915/buddy: tweak 2big check Matthew Auld
` (11 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
Differentiate between mappable vs non-mappable resources, also if this
is an actual range allocation ensure we set res->start as the starting
pfn. Later when we need to do non-mappable -> mappable moves then we
want TTM to see that the current placement is not compatible, which
should result in an actual move, instead of being turned into a noop.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index 6e5842155898..bc725a92fc5c 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -140,6 +140,13 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
mutex_unlock(&bman->lock);
}
+ if (place->lpfn - place->fpfn == n_pages)
+ bman_res->base.start = place->fpfn;
+ else if (lpfn <= bman->visible_size)
+ bman_res->base.start = 0;
+ else
+ bman_res->base.start = bman->visible_size;
+
*res = &bman_res->base;
return 0;
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 08/20] drm/i915/buddy: adjust res->start
2022-01-26 15:21 ` [PATCH 08/20] drm/i915/buddy: adjust res->start Matthew Auld
@ 2022-02-01 10:38 ` Thomas Hellström
0 siblings, 0 replies; 50+ messages in thread
From: Thomas Hellström @ 2022-02-01 10:38 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On Wed, 2022-01-26 at 15:21 +0000, Matthew Auld wrote:
> Differentiate between mappable vs non-mappable resources, also if
> this
> is an actual range allocation ensure we set res->start as the
> starting
> pfn. Later when we need to do non-mappable -> mappable moves then we
> want TTM to see that the current placement is not compatible, which
> should result in an actual move, instead of being turned into a noop.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index 6e5842155898..bc725a92fc5c 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -140,6 +140,13 @@ static int i915_ttm_buddy_man_alloc(struct
> ttm_resource_manager *man,
> mutex_unlock(&bman->lock);
> }
>
> + if (place->lpfn - place->fpfn == n_pages)
> + bman_res->base.start = place->fpfn;
> + else if (lpfn <= bman->visible_size)
> + bman_res->base.start = 0;
> + else
> + bman_res->base.start = bman->visible_size;
> +
> *res = &bman_res->base;
> return 0;
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 09/20] drm/i915/buddy: tweak 2big check
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (7 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 08/20] drm/i915/buddy: adjust res->start Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-02-01 10:39 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 10/20] drm/i915/selftests: mock test io_size Matthew Auld
` (10 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
Otherwise we get -EINVAL, instead of the more useful -E2BIG if the
allocation doesn't fit within the pfn range, like with mappable lmem.
The hugepages selftest, for example, needs this to know if a smaller
size is needed.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
index bc725a92fc5c..7c24cc6608e3 100644
--- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
+++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
@@ -81,7 +81,7 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man,
lpfn = pages;
}
- if (size > mm->size) {
+ if (size > lpfn << PAGE_SHIFT) {
err = -E2BIG;
goto err_free_res;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 09/20] drm/i915/buddy: tweak 2big check
2022-01-26 15:21 ` [PATCH 09/20] drm/i915/buddy: tweak 2big check Matthew Auld
@ 2022-02-01 10:39 ` Thomas Hellström
0 siblings, 0 replies; 50+ messages in thread
From: Thomas Hellström @ 2022-02-01 10:39 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On Wed, 2022-01-26 at 15:21 +0000, Matthew Auld wrote:
> Otherwise we get -EINVAL, instead of the more useful -E2BIG if the
> allocation doesn't fit within the pfn range, like with mappable lmem.
> The hugepages selftest, for example, needs this to know if a smaller
> size is needed.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> index bc725a92fc5c..7c24cc6608e3 100644
> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c
> @@ -81,7 +81,7 @@ static int i915_ttm_buddy_man_alloc(struct
> ttm_resource_manager *man,
> lpfn = pages;
> }
>
> - if (size > mm->size) {
> + if (size > lpfn << PAGE_SHIFT) {
> err = -E2BIG;
> goto err_free_res;
> }
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 10/20] drm/i915/selftests: mock test io_size
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (8 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 09/20] drm/i915/buddy: tweak 2big check Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-02-02 10:24 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 11/20] drm/i915/ttm: tweak priority hint selection Matthew Auld
` (9 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
Check that mappable vs non-mappable matches our expectations.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
.../drm/i915/selftests/intel_memory_region.c | 143 ++++++++++++++++++
1 file changed, 143 insertions(+)
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 247f65f02bbf..04ae29779206 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -17,6 +17,7 @@
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_ttm.h"
#include "gem/selftests/igt_gem_utils.h"
#include "gem/selftests/mock_context.h"
#include "gt/intel_engine_pm.h"
@@ -512,6 +513,147 @@ static int igt_mock_max_segment(void *arg)
return err;
}
+static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj)
+{
+ struct intel_memory_region *mr = obj->mm.region;
+ struct i915_ttm_buddy_resource *bman_res =
+ to_ttm_buddy_resource(obj->mm.res);
+ struct drm_buddy *mm = bman_res->mm;
+ struct drm_buddy_block *block;
+ u64 total;
+
+ total = 0;
+ list_for_each_entry(block, &bman_res->blocks, link) {
+ u64 start = drm_buddy_block_offset(block);
+ u64 end = start + drm_buddy_block_size(mm, block);
+
+ if (start < mr->io_size)
+ total += min_t(u64, end, mr->io_size) - start;
+ }
+
+ return total;
+}
+
+static int igt_mock_io_size(void *arg)
+{
+ struct intel_memory_region *mr = arg;
+ struct drm_i915_private *i915 = mr->i915;
+ struct drm_i915_gem_object *obj;
+ u64 mappable_theft_total;
+ u64 io_size;
+ u64 total;
+ u64 ps;
+ u64 rem;
+ u64 size;
+ I915_RND_STATE(prng);
+ LIST_HEAD(objects);
+ int err = 0;
+
+ ps = SZ_4K;
+ if (i915_prandom_u64_state(&prng) & 1)
+ ps = SZ_64K; /* For something like DG2 */
+
+ div64_u64_rem(i915_prandom_u64_state(&prng), SZ_8G, &total);
+ total = round_down(total, ps);
+ total = max_t(u64, total, SZ_1G);
+
+ div64_u64_rem(i915_prandom_u64_state(&prng), total - ps, &io_size);
+ io_size = round_down(io_size, ps);
+ io_size = max_t(u64, io_size, SZ_256M); /* 256M seems to be the common lower limit */
+
+ pr_info("%s with ps=%llx, io_size=%llx, total=%llx\n",
+ __func__, ps, io_size, total);
+
+ mr = mock_region_create(i915, 0, total, ps, 0, io_size);
+ if (IS_ERR(mr)) {
+ err = PTR_ERR(mr);
+ goto out_err;
+ }
+
+ mappable_theft_total = 0;
+ rem = total - io_size;
+ do {
+ div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
+ size = round_down(size, ps);
+ size = max(size, ps);
+
+ obj = igt_object_create(mr, &objects, size,
+ I915_BO_ALLOC_TOPDOWN);
+ if (IS_ERR(obj)) {
+ pr_err("%s TOPDOWN failed with rem=%llx, size=%llx\n",
+ __func__, rem, size);
+ err = PTR_ERR(obj);
+ goto out_close;
+ }
+
+ mappable_theft_total += igt_object_mappable_total(obj);
+ rem -= size;
+ } while (rem);
+
+ pr_info("%s mappable theft=(%lluMiB/%lluMiB), total=%lluMiB\n",
+ __func__,
+ (u64)mappable_theft_total >> 20,
+ (u64)io_size >> 20,
+ (u64)total >> 20);
+
+ /*
+ * Even if we allocate all of the non-mappable portion, we should still
+ * be able to dip into the mappable portion.
+ */
+ obj = igt_object_create(mr, &objects, io_size,
+ I915_BO_ALLOC_TOPDOWN);
+ if (IS_ERR(obj)) {
+ pr_err("%s allocation unexpectedly failed\n", __func__);
+ err = PTR_ERR(obj);
+ goto out_close;
+ }
+
+ close_objects(mr, &objects);
+
+ rem = io_size;
+ do {
+ div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
+ size = round_down(size, ps);
+ size = max(size, ps);
+
+ obj = igt_object_create(mr, &objects, size, 0);
+ if (IS_ERR(obj)) {
+ pr_err("%s MAPPABLE failed with rem=%llx, size=%llx\n",
+ __func__, rem, size);
+ err = PTR_ERR(obj);
+ goto out_close;
+ }
+
+ if (igt_object_mappable_total(obj) != size) {
+ pr_err("%s allocation is not mappable(size=%llx)\n",
+ __func__, size);
+ err = -EINVAL;
+ goto out_close;
+ }
+ rem -= size;
+ } while (rem);
+
+ /*
+ * We assume CPU access is required by default, which should result in a
+ * failure here, even though the non-mappable portion is free.
+ */
+ obj = igt_object_create(mr, &objects, ps, 0);
+ if (!IS_ERR(obj)) {
+ pr_err("%s allocation unexpectedly succeeded\n", __func__);
+ err = -EINVAL;
+ goto out_close;
+ }
+
+out_close:
+ close_objects(mr, &objects);
+ intel_memory_region_destroy(mr);
+out_err:
+ if (err == -ENOMEM)
+ err = 0;
+
+ return err;
+}
+
static int igt_gpu_write_dw(struct intel_context *ce,
struct i915_vma *vma,
u32 dword,
@@ -1179,6 +1321,7 @@ int intel_memory_region_mock_selftests(void)
SUBTEST(igt_mock_contiguous),
SUBTEST(igt_mock_splintered_region),
SUBTEST(igt_mock_max_segment),
+ SUBTEST(igt_mock_io_size),
};
struct intel_memory_region *mem;
struct drm_i915_private *i915;
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 10/20] drm/i915/selftests: mock test io_size
2022-01-26 15:21 ` [PATCH 10/20] drm/i915/selftests: mock test io_size Matthew Auld
@ 2022-02-02 10:24 ` Thomas Hellström
0 siblings, 0 replies; 50+ messages in thread
From: Thomas Hellström @ 2022-02-02 10:24 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> Check that mappable vs non-mappable matches our expectations.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> .../drm/i915/selftests/intel_memory_region.c | 143 ++++++++++++++++++
> 1 file changed, 143 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> index 247f65f02bbf..04ae29779206 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> @@ -17,6 +17,7 @@
> #include "gem/i915_gem_context.h"
> #include "gem/i915_gem_lmem.h"
> #include "gem/i915_gem_region.h"
> +#include "gem/i915_gem_ttm.h"
> #include "gem/selftests/igt_gem_utils.h"
> #include "gem/selftests/mock_context.h"
> #include "gt/intel_engine_pm.h"
> @@ -512,6 +513,147 @@ static int igt_mock_max_segment(void *arg)
> return err;
> }
>
> +static u64 igt_object_mappable_total(struct drm_i915_gem_object *obj)
> +{
> + struct intel_memory_region *mr = obj->mm.region;
> + struct i915_ttm_buddy_resource *bman_res =
> + to_ttm_buddy_resource(obj->mm.res);
> + struct drm_buddy *mm = bman_res->mm;
> + struct drm_buddy_block *block;
> + u64 total;
> +
> + total = 0;
> + list_for_each_entry(block, &bman_res->blocks, link) {
> + u64 start = drm_buddy_block_offset(block);
> + u64 end = start + drm_buddy_block_size(mm, block);
> +
> + if (start < mr->io_size)
> + total += min_t(u64, end, mr->io_size) - start;
> + }
> +
> + return total;
> +}
> +
> +static int igt_mock_io_size(void *arg)
> +{
> + struct intel_memory_region *mr = arg;
> + struct drm_i915_private *i915 = mr->i915;
> + struct drm_i915_gem_object *obj;
> + u64 mappable_theft_total;
> + u64 io_size;
> + u64 total;
> + u64 ps;
> + u64 rem;
> + u64 size;
> + I915_RND_STATE(prng);
> + LIST_HEAD(objects);
> + int err = 0;
> +
> + ps = SZ_4K;
> + if (i915_prandom_u64_state(&prng) & 1)
> + ps = SZ_64K; /* For something like DG2 */
> +
> + div64_u64_rem(i915_prandom_u64_state(&prng), SZ_8G, &total);
> + total = round_down(total, ps);
> + total = max_t(u64, total, SZ_1G);
> +
> + div64_u64_rem(i915_prandom_u64_state(&prng), total - ps, &io_size);
> + io_size = round_down(io_size, ps);
> + io_size = max_t(u64, io_size, SZ_256M); /* 256M seems to be the common lower limit */
> +
> + pr_info("%s with ps=%llx, io_size=%llx, total=%llx\n",
> + __func__, ps, io_size, total);
> +
> + mr = mock_region_create(i915, 0, total, ps, 0, io_size);
> + if (IS_ERR(mr)) {
> + err = PTR_ERR(mr);
> + goto out_err;
> + }
> +
> + mappable_theft_total = 0;
> + rem = total - io_size;
> + do {
> + div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
> + size = round_down(size, ps);
> + size = max(size, ps);
> +
> + obj = igt_object_create(mr, &objects, size,
> + I915_BO_ALLOC_TOPDOWN);
> + if (IS_ERR(obj)) {
> + pr_err("%s TOPDOWN failed with rem=%llx, size=%llx\n",
> + __func__, rem, size);
> + err = PTR_ERR(obj);
> + goto out_close;
> + }
> +
> + mappable_theft_total += igt_object_mappable_total(obj);
> + rem -= size;
> + } while (rem);
> +
> + pr_info("%s mappable theft=(%lluMiB/%lluMiB), total=%lluMiB\n",
> + __func__,
> + (u64)mappable_theft_total >> 20,
> + (u64)io_size >> 20,
> + (u64)total >> 20);
> +
> + /*
> + * Even if we allocate all of the non-mappable portion, we should still
> + * be able to dip into the mappable portion.
> + */
> + obj = igt_object_create(mr, &objects, io_size,
> + I915_BO_ALLOC_TOPDOWN);
> + if (IS_ERR(obj)) {
> + pr_err("%s allocation unexpectedly failed\n", __func__);
> + err = PTR_ERR(obj);
> + goto out_close;
> + }
> +
> + close_objects(mr, &objects);
> +
> + rem = io_size;
> + do {
> + div64_u64_rem(i915_prandom_u64_state(&prng), rem, &size);
> + size = round_down(size, ps);
> + size = max(size, ps);
> +
> + obj = igt_object_create(mr, &objects, size, 0);
> + if (IS_ERR(obj)) {
> + pr_err("%s MAPPABLE failed with rem=%llx, size=%llx\n",
> + __func__, rem, size);
> + err = PTR_ERR(obj);
> + goto out_close;
> + }
> +
> + if (igt_object_mappable_total(obj) != size) {
> + pr_err("%s allocation is not mappable(size=%llx)\n",
> + __func__, size);
> + err = -EINVAL;
> + goto out_close;
> + }
> + rem -= size;
> + } while (rem);
> +
> + /*
> + * We assume CPU access is required by default, which should result in a
> + * failure here, even though the non-mappable portion is free.
> + */
> + obj = igt_object_create(mr, &objects, ps, 0);
> + if (!IS_ERR(obj)) {
> + pr_err("%s allocation unexpectedly succeeded\n", __func__);
> + err = -EINVAL;
> + goto out_close;
> + }
> +
> +out_close:
> + close_objects(mr, &objects);
> + intel_memory_region_destroy(mr);
> +out_err:
> + if (err == -ENOMEM)
> + err = 0;
> +
> + return err;
> +}
> +
> static int igt_gpu_write_dw(struct intel_context *ce,
> struct i915_vma *vma,
> u32 dword,
> @@ -1179,6 +1321,7 @@ int intel_memory_region_mock_selftests(void)
> SUBTEST(igt_mock_contiguous),
> SUBTEST(igt_mock_splintered_region),
> SUBTEST(igt_mock_max_segment),
> + SUBTEST(igt_mock_io_size),
> };
> struct intel_memory_region *mem;
> struct drm_i915_private *i915;
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 11/20] drm/i915/ttm: tweak priority hint selection
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (9 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 10/20] drm/i915/selftests: mock test io_size Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-02-02 13:34 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 12/20] drm/i915/ttm: make eviction mappable aware Matthew Auld
` (8 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
For some reason we are selecting PRIO_HAS_PAGES when we don't have
mm.pages, and vice versa. Perhaps something else is going on here.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index e60b677ecd54..e4cd6ccf5ab1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -848,11 +848,9 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
} else if (obj->mm.madv != I915_MADV_WILLNEED) {
bo->priority = I915_TTM_PRIO_PURGE;
} else if (!i915_gem_object_has_pages(obj)) {
- if (bo->priority < I915_TTM_PRIO_HAS_PAGES)
- bo->priority = I915_TTM_PRIO_HAS_PAGES;
+ bo->priority = I915_TTM_PRIO_NO_PAGES;
} else {
- if (bo->priority > I915_TTM_PRIO_NO_PAGES)
- bo->priority = I915_TTM_PRIO_NO_PAGES;
+ bo->priority = I915_TTM_PRIO_HAS_PAGES;
}
ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 11/20] drm/i915/ttm: tweak priority hint selection
2022-01-26 15:21 ` [PATCH 11/20] drm/i915/ttm: tweak priority hint selection Matthew Auld
@ 2022-02-02 13:34 ` Thomas Hellström
0 siblings, 0 replies; 50+ messages in thread
From: Thomas Hellström @ 2022-02-02 13:34 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> For some reason we are selecting PRIO_HAS_PAGES when we don't have
> mm.pages, and vice versa. Perhaps something else is going on here.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
LGTM. Should we add a Fixes: tag here?
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 6 ++----
> 1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index e60b677ecd54..e4cd6ccf5ab1 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -848,11 +848,9 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
> } else if (obj->mm.madv != I915_MADV_WILLNEED) {
> bo->priority = I915_TTM_PRIO_PURGE;
> } else if (!i915_gem_object_has_pages(obj)) {
> - if (bo->priority < I915_TTM_PRIO_HAS_PAGES)
> - bo->priority = I915_TTM_PRIO_HAS_PAGES;
> + bo->priority = I915_TTM_PRIO_NO_PAGES;
> } else {
> - if (bo->priority > I915_TTM_PRIO_NO_PAGES)
> - bo->priority = I915_TTM_PRIO_NO_PAGES;
> + bo->priority = I915_TTM_PRIO_HAS_PAGES;
> }
>
> ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 12/20] drm/i915/ttm: make eviction mappable aware
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (10 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 11/20] drm/i915/ttm: tweak priority hint selection Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-02-02 13:41 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 13/20] drm/i915/ttm: mappable migration on fault Matthew Auld
` (7 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
If we need to make room for some some mappable object, then we should
only victimize objects that have one or pages that occupy the visible
portion of LMEM. Let's also create a new priority hint for objects that
are placed in mappable memory, where we know that CPU access was
requested, that way we hopefully victimize these last.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 65 ++++++++++++++++++++++++-
1 file changed, 63 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index e4cd6ccf5ab1..8376e4c3d290 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -5,8 +5,10 @@
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
+#include <drm/drm_buddy.h>
#include "i915_drv.h"
+#include "i915_ttm_buddy_manager.h"
#include "intel_memory_region.h"
#include "intel_region_ttm.h"
@@ -20,6 +22,7 @@
#define I915_TTM_PRIO_PURGE 0
#define I915_TTM_PRIO_NO_PAGES 1
#define I915_TTM_PRIO_HAS_PAGES 2
+#define I915_TTM_PRIO_NEEDS_CPU_ACCESS 3
/*
* Size of struct ttm_place vector in on-stack struct ttm_placement allocs
@@ -337,6 +340,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
const struct ttm_place *place)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ struct ttm_resource *res = bo->resource;
if (!obj)
return false;
@@ -350,7 +354,48 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
return false;
/* Will do for now. Our pinned objects are still on TTM's LRU lists */
- return i915_gem_object_evictable(obj);
+ if (!i915_gem_object_evictable(obj))
+ return false;
+
+ switch (res->mem_type) {
+ case TTM_PL_PRIV: {
+ struct ttm_resource_manager *man =
+ ttm_manager_type(bo->bdev, res->mem_type);
+ struct i915_ttm_buddy_resource *bman_res =
+ to_ttm_buddy_resource(res);
+ struct drm_buddy *mm = bman_res->mm;
+ struct drm_buddy_block *block;
+
+ if (!place->fpfn && !place->lpfn)
+ return true;
+
+ GEM_BUG_ON(!place->lpfn);
+
+ /*
+ * If we just want something mappable then we can quickly check
+ * if the current victim resource is using any of the CPU
+ * visible portion.
+ */
+ if (!place->fpfn &&
+ place->lpfn == i915_ttm_buddy_man_visible_size(man))
+ return bman_res->used_visible_size > 0;
+
+ /* Real range allocation */
+ list_for_each_entry(block, &bman_res->blocks, link) {
+ unsigned long fpfn =
+ drm_buddy_block_offset(block) >> PAGE_SHIFT;
+ unsigned long lpfn = fpfn +
+ (drm_buddy_block_size(mm, block) >> PAGE_SHIFT);
+
+ if (place->fpfn < lpfn && place->lpfn > fpfn)
+ return true;
+ }
+ return false;
+ } default:
+ break;
+ }
+
+ return true;
}
static void i915_ttm_evict_flags(struct ttm_buffer_object *bo,
@@ -850,7 +895,23 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
} else if (!i915_gem_object_has_pages(obj)) {
bo->priority = I915_TTM_PRIO_NO_PAGES;
} else {
- bo->priority = I915_TTM_PRIO_HAS_PAGES;
+ struct ttm_resource_manager *man =
+ ttm_manager_type(bo->bdev, bo->resource->mem_type);
+
+ /*
+ * If we need to place an LMEM resource which doesn't need CPU
+ * access then we should try not to victimize mappable objects
+ * first, since we likely end up stealing more of the mappable
+ * portion. And likewise when we try to find space for a mappble
+ * object, we know not to ever victimize objects that don't
+ * occupy any mappable pages.
+ */
+ if (i915_ttm_cpu_maps_iomem(bo->resource) &&
+ i915_ttm_buddy_man_visible_size(man) < man->size &&
+ !(obj->flags & I915_BO_ALLOC_TOPDOWN))
+ bo->priority = I915_TTM_PRIO_NEEDS_CPU_ACCESS;
+ else
+ bo->priority = I915_TTM_PRIO_HAS_PAGES;
}
ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 12/20] drm/i915/ttm: make eviction mappable aware
2022-01-26 15:21 ` [PATCH 12/20] drm/i915/ttm: make eviction mappable aware Matthew Auld
@ 2022-02-02 13:41 ` Thomas Hellström
0 siblings, 0 replies; 50+ messages in thread
From: Thomas Hellström @ 2022-02-02 13:41 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> If we need to make room for some some mappable object, then we should
> only victimize objects that have one or pages that occupy the visible
> portion of LMEM. Let's also create a new priority hint for objects that
> are placed in mappable memory, where we know that CPU access was
> requested, that way we hopefully victimize these last.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 65 ++++++++++++++++++++++++-
> 1 file changed, 63 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index e4cd6ccf5ab1..8376e4c3d290 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -5,8 +5,10 @@
>
> #include <drm/ttm/ttm_bo_driver.h>
> #include <drm/ttm/ttm_placement.h>
> +#include <drm/drm_buddy.h>
>
> #include "i915_drv.h"
> +#include "i915_ttm_buddy_manager.h"
> #include "intel_memory_region.h"
> #include "intel_region_ttm.h"
>
> @@ -20,6 +22,7 @@
> #define I915_TTM_PRIO_PURGE 0
> #define I915_TTM_PRIO_NO_PAGES 1
> #define I915_TTM_PRIO_HAS_PAGES 2
> +#define I915_TTM_PRIO_NEEDS_CPU_ACCESS 3
>
> /*
> * Size of struct ttm_place vector in on-stack struct ttm_placement allocs
> @@ -337,6 +340,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
> const struct ttm_place *place)
> {
> struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> + struct ttm_resource *res = bo->resource;
>
> if (!obj)
> return false;
> @@ -350,7 +354,48 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
> return false;
>
> /* Will do for now. Our pinned objects are still on TTM's LRU lists */
> - return i915_gem_object_evictable(obj);
> + if (!i915_gem_object_evictable(obj))
> + return false;
> +
> + switch (res->mem_type) {
> + case TTM_PL_PRIV: {
We should use the I915_ placements for better readability.
Otherwise Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 13/20] drm/i915/ttm: mappable migration on fault
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (11 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 12/20] drm/i915/ttm: make eviction mappable aware Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-02-03 7:59 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 14/20] drm/i915/selftests: exercise mmap migration Matthew Auld
` (6 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
The end goal is to have userspace tell the kernel what buffers will
require CPU access, however if we ever reach the CPU fault handler, and
the current resource is not mappable, then we should attempt to migrate
the buffer to the mappable portion of LMEM, or even system memory, if the
allowable placements permit it.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 58 ++++++++++++++++++++++---
1 file changed, 52 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 8376e4c3d290..7299053fb1ec 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -636,11 +636,25 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
i915_ttm_purge(obj);
}
+static bool i915_ttm_resource_mappable(struct ttm_resource *res)
+{
+ struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
+
+ if (!i915_ttm_cpu_maps_iomem(res))
+ return true;
+
+ return bman_res->used_visible_size == bman_res->base.num_pages;
+}
+
static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem)
{
+
if (!i915_ttm_cpu_maps_iomem(mem))
return 0;
+ if (!i915_ttm_resource_mappable(mem))
+ return -EINVAL;
+
mem->bus.caching = ttm_write_combined;
mem->bus.is_iomem = true;
@@ -779,14 +793,15 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj)
* Gem forced migration using the i915_ttm_migrate() op, is allowed even
* to regions that are not in the object's list of allowable placements.
*/
-static int i915_ttm_migrate(struct drm_i915_gem_object *obj,
- struct intel_memory_region *mr)
+static int __i915_ttm_migrate(struct drm_i915_gem_object *obj,
+ struct intel_memory_region *mr,
+ unsigned int flags)
{
struct ttm_place requested;
struct ttm_placement placement;
int ret;
- i915_ttm_place_from_region(mr, &requested, obj->flags);
+ i915_ttm_place_from_region(mr, &requested, flags);
placement.num_placement = 1;
placement.num_busy_placement = 1;
placement.placement = &requested;
@@ -809,6 +824,12 @@ static int i915_ttm_migrate(struct drm_i915_gem_object *obj,
return 0;
}
+static int i915_ttm_migrate(struct drm_i915_gem_object *obj,
+ struct intel_memory_region *mr)
+{
+ return __i915_ttm_migrate(obj, mr, obj->flags);
+}
+
static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
struct sg_table *st)
{
@@ -940,6 +961,10 @@ static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj)
ttm_bo_put(i915_gem_to_ttm(obj));
}
+static int __i915_ttm_migrate(struct drm_i915_gem_object *obj,
+ struct intel_memory_region *mr,
+ unsigned int flags);
+
static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
{
struct vm_area_struct *area = vmf->vma;
@@ -953,9 +978,6 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
if (!obj)
return VM_FAULT_SIGBUS;
- if (obj->flags & I915_BO_ALLOC_TOPDOWN)
- return -EINVAL;
-
/* Sanity check that we allow writing into this object */
if (unlikely(i915_gem_object_is_readonly(obj) &&
area->vm_flags & VM_WRITE))
@@ -970,6 +992,30 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
+ if (!i915_ttm_resource_mappable(bo->resource)) {
+ int err = -ENODEV;
+ int i;
+
+ for (i = 0; i < obj->mm.n_placements; i++) {
+ struct intel_memory_region *mr = obj->mm.placements[i];
+ unsigned int flags;
+
+ if (!mr->io_size && mr->type != INTEL_MEMORY_SYSTEM)
+ continue;
+
+ flags = obj->flags;
+ flags &= ~I915_BO_ALLOC_TOPDOWN;
+ err = __i915_ttm_migrate(obj, mr, flags);
+ if (!err)
+ break;
+ }
+
+ if (err) {
+ dma_resv_unlock(bo->base.resv);
+ return VM_FAULT_SIGBUS;
+ }
+ }
+
if (drm_dev_enter(dev, &idx)) {
ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
TTM_BO_VM_NUM_PREFAULT);
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 13/20] drm/i915/ttm: mappable migration on fault
2022-01-26 15:21 ` [PATCH 13/20] drm/i915/ttm: mappable migration on fault Matthew Auld
@ 2022-02-03 7:59 ` Thomas Hellström
0 siblings, 0 replies; 50+ messages in thread
From: Thomas Hellström @ 2022-02-03 7:59 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> The end goal is to have userspace tell the kernel what buffers will
> require CPU access, however if we ever reach the CPU fault handler, and
> the current resource is not mappable, then we should attempt to migrate
> the buffer to the mappable portion of LMEM, or even system memory, if the
> allowable placements permit it.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 58 ++++++++++++++++++++++---
> 1 file changed, 52 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index 8376e4c3d290..7299053fb1ec 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -636,11 +636,25 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
> i915_ttm_purge(obj);
> }
>
> +static bool i915_ttm_resource_mappable(struct ttm_resource *res)
> +{
> + struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
> +
> + if (!i915_ttm_cpu_maps_iomem(res))
> + return true;
> +
> + return bman_res->used_visible_size == bman_res->base.num_pages;
> +}
> +
> static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem)
> {
> +
Stray line.
Otherwise LGTM.
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 14/20] drm/i915/selftests: exercise mmap migration
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (12 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 13/20] drm/i915/ttm: mappable migration on fault Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-02-03 9:01 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 15/20] drm/i915/selftests: handle allocation failures Matthew Auld
` (5 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
Exercise each of the migration scenarios, verifying that the final
placement and buffer contents match our expectations.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
.../drm/i915/gem/selftests/i915_gem_mman.c | 306 ++++++++++++++++++
1 file changed, 306 insertions(+)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index ba29767348be..d2c1071df98a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -10,6 +10,7 @@
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
+#include "gt/intel_migrate.h"
#include "gem/i915_gem_region.h"
#include "huge_gem_object.h"
#include "i915_selftest.h"
@@ -999,6 +1000,310 @@ static int igt_mmap(void *arg)
return 0;
}
+static void igt_close_objects(struct drm_i915_private *i915,
+ struct list_head *objects)
+{
+ struct drm_i915_gem_object *obj, *on;
+
+ list_for_each_entry_safe(obj, on, objects, st_link) {
+ i915_gem_object_lock(obj, NULL);
+ if (i915_gem_object_has_pinned_pages(obj))
+ i915_gem_object_unpin_pages(obj);
+ /* No polluting the memory region between tests */
+ __i915_gem_object_put_pages(obj);
+ i915_gem_object_unlock(obj);
+ list_del(&obj->st_link);
+ i915_gem_object_put(obj);
+ }
+
+ cond_resched();
+
+ i915_gem_drain_freed_objects(i915);
+}
+
+static void igt_make_evictable(struct list_head *objects)
+{
+ struct drm_i915_gem_object *obj;
+
+ list_for_each_entry(obj, objects, st_link) {
+ i915_gem_object_lock(obj, NULL);
+ if (i915_gem_object_has_pinned_pages(obj))
+ i915_gem_object_unpin_pages(obj);
+ i915_gem_object_unlock(obj);
+ }
+
+ cond_resched();
+}
+
+static int igt_fill_mappable(struct intel_memory_region *mr,
+ struct list_head *objects)
+{
+ u64 size, total;
+ int err;
+
+ total = 0;
+ size = mr->io_size;
+ do {
+ struct drm_i915_gem_object *obj;
+
+ obj = i915_gem_object_create_region(mr, size, 0, 0);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto err_close;
+ }
+
+ list_add(&obj->st_link, objects);
+
+ err = i915_gem_object_pin_pages_unlocked(obj);
+ if (err) {
+ if (err != -ENXIO && err != -ENOMEM)
+ goto err_close;
+
+ if (size == mr->min_page_size) {
+ err = 0;
+ break;
+ }
+
+ size >>= 1;
+ continue;
+ }
+
+ total += obj->base.size;
+ } while (1);
+
+ pr_info("%s filled=%lluMiB\n", __func__, total >> 20);
+ return 0;
+
+err_close:
+ igt_close_objects(mr->i915, objects);
+ return err;
+}
+
+static int ___igt_mmap_migrate(struct drm_i915_private *i915,
+ struct drm_i915_gem_object *obj,
+ unsigned long addr,
+ bool unfaultable)
+{
+ struct vm_area_struct *area;
+ int err = 0, i;
+
+ pr_info("igt_mmap(%s, %d) @ %lx\n",
+ obj->mm.region->name, I915_MMAP_TYPE_FIXED, addr);
+
+ mmap_read_lock(current->mm);
+ area = vma_lookup(current->mm, addr);
+ mmap_read_unlock(current->mm);
+ if (!area) {
+ pr_err("%s: Did not create a vm_area_struct for the mmap\n",
+ obj->mm.region->name);
+ err = -EINVAL;
+ goto out_unmap;
+ }
+
+ for (i = 0; i < obj->base.size / sizeof(u32); i++) {
+ u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux)));
+ u32 x;
+
+ if (get_user(x, ux)) {
+ err = -EFAULT;
+ if (!unfaultable) {
+ pr_err("%s: Unable to read from mmap, offset:%zd\n",
+ obj->mm.region->name, i * sizeof(x));
+ goto out_unmap;
+ }
+
+ continue;
+ }
+
+ if (unfaultable) {
+ pr_err("%s: Faulted unmappable memory\n",
+ obj->mm.region->name);
+ err = -EINVAL;
+ goto out_unmap;
+ }
+
+ if (x != expand32(POISON_INUSE)) {
+ pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n",
+ obj->mm.region->name,
+ i * sizeof(x), x, expand32(POISON_INUSE));
+ err = -EINVAL;
+ goto out_unmap;
+ }
+
+ x = expand32(POISON_FREE);
+ if (put_user(x, ux)) {
+ pr_err("%s: Unable to write to mmap, offset:%zd\n",
+ obj->mm.region->name, i * sizeof(x));
+ err = -EFAULT;
+ goto out_unmap;
+ }
+ }
+
+ if (unfaultable) {
+ if (err == -EFAULT)
+ err = 0;
+ } else {
+ obj->flags &= ~I915_BO_ALLOC_TOPDOWN;
+ err = wc_check(obj);
+ }
+out_unmap:
+ vm_munmap(addr, obj->base.size);
+ return err;
+}
+
+#define IGT_MMAP_MIGRATE_TOPDOWN (1<<0)
+#define IGT_MMAP_MIGRATE_FILL (1<<1)
+#define IGT_MMAP_MIGRATE_EVICTABLE (1<<2)
+#define IGT_MMAP_MIGRATE_UNFAULTABLE (1<<3)
+static int __igt_mmap_migrate(struct intel_memory_region **placements,
+ int n_placements,
+ struct intel_memory_region *expected_mr,
+ unsigned int flags)
+{
+ struct drm_i915_private *i915 = placements[0]->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_gem_ww_ctx ww;
+ struct i915_request *rq = NULL;
+ unsigned long addr;
+ LIST_HEAD(objects);
+ u64 offset;
+ int err;
+
+ obj = __i915_gem_object_create_user(i915, PAGE_SIZE,
+ placements,
+ n_placements);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ if (flags & IGT_MMAP_MIGRATE_TOPDOWN)
+ obj->flags |= I915_BO_ALLOC_TOPDOWN;
+
+ err = __assign_mmap_offset(obj, I915_MMAP_TYPE_FIXED, &offset, NULL);
+ if (err)
+ goto out_put;
+
+ /*
+ * This will eventually create a GEM context, due to opening dummy drm
+ * file, which needs a tiny amount of mappable device memory for the top
+ * level paging structures(and perhaps scratch), so make sure we
+ * allocate early, to avoid tears.
+ */
+ addr = igt_mmap_offset(i915, offset, obj->base.size,
+ PROT_WRITE, MAP_SHARED);
+ if (IS_ERR_VALUE(addr)) {
+ err = addr;
+ goto out_put;
+ }
+
+ if (flags & IGT_MMAP_MIGRATE_FILL) {
+ err = igt_fill_mappable(placements[0], &objects);
+ if (err)
+ goto out_put;
+ }
+
+ for_i915_gem_ww(&ww, err, true) {
+ err = i915_gem_object_lock(obj, &ww);
+ if (err)
+ continue;
+
+ err = i915_gem_object_pin_pages(obj);
+ if (err)
+ continue;
+
+ err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL,
+ obj->mm.pages->sgl, obj->cache_level,
+ i915_gem_object_is_lmem(obj),
+ expand32(POISON_INUSE), &rq);
+ i915_gem_object_unpin_pages(obj);
+ if (rq) {
+ dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
+ i915_gem_object_set_moving_fence(obj, &rq->fence);
+ i915_request_put(rq);
+ }
+ if (err)
+ continue;
+ }
+ if (err)
+ goto out_put;
+
+ if (flags & IGT_MMAP_MIGRATE_EVICTABLE)
+ igt_make_evictable(&objects);
+
+ err = ___igt_mmap_migrate(i915, obj, addr,
+ flags & IGT_MMAP_MIGRATE_UNFAULTABLE);
+ if (!err && obj->mm.region != expected_mr) {
+ pr_err("%s region mismatch %s\n", __func__, expected_mr->name);
+ err = -EINVAL;
+ }
+
+out_put:
+ i915_gem_object_put(obj);
+ igt_close_objects(i915, &objects);
+ return err;
+}
+
+static int igt_mmap_migrate(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_memory_region *system = i915->mm.regions[INTEL_REGION_SMEM];
+ struct intel_memory_region *mr;
+ enum intel_region_id id;
+
+ for_each_memory_region(mr, i915, id) {
+ struct intel_memory_region *mixed[] = { mr, system };
+ struct intel_memory_region *single[] = { mr };
+ int err;
+
+ if (mr->private)
+ continue;
+
+ if (!mr->io_size || mr->io_size == mr->total)
+ continue;
+
+ /*
+ * Allocate in the mappable portion, should be no suprises here.
+ */
+ err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), mr, 0);
+ if (err)
+ return err;
+
+ /*
+ * Allocate in the non-mappable portion, but force migrating to
+ * the mappable portion on fault (LMEM -> LMEM)
+ */
+ err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr,
+ IGT_MMAP_MIGRATE_TOPDOWN |
+ IGT_MMAP_MIGRATE_FILL |
+ IGT_MMAP_MIGRATE_EVICTABLE);
+ if (err)
+ return err;
+
+ /*
+ * Allocate in the non-mappable portion, but force spilling into
+ * system memory on fault (LMEM -> SMEM)
+ */
+ err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), system,
+ IGT_MMAP_MIGRATE_TOPDOWN |
+ IGT_MMAP_MIGRATE_FILL);
+ if (err)
+ return err;
+
+ /*
+ * Allocate in the non-mappable portion, but since the mappable
+ * portion is already full, and we can't spill to system memory,
+ * then we should expect the fault to fail.
+ */
+ err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr,
+ IGT_MMAP_MIGRATE_TOPDOWN |
+ IGT_MMAP_MIGRATE_FILL |
+ IGT_MMAP_MIGRATE_UNFAULTABLE);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static const char *repr_mmap_type(enum i915_mmap_type type)
{
switch (type) {
@@ -1424,6 +1729,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_smoke_tiling),
SUBTEST(igt_mmap_offset_exhaustion),
SUBTEST(igt_mmap),
+ SUBTEST(igt_mmap_migrate),
SUBTEST(igt_mmap_access),
SUBTEST(igt_mmap_revoke),
SUBTEST(igt_mmap_gpu),
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 14/20] drm/i915/selftests: exercise mmap migration
2022-01-26 15:21 ` [PATCH 14/20] drm/i915/selftests: exercise mmap migration Matthew Auld
@ 2022-02-03 9:01 ` Thomas Hellström
2022-02-03 9:12 ` Matthew Auld
0 siblings, 1 reply; 50+ messages in thread
From: Thomas Hellström @ 2022-02-03 9:01 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> Exercise each of the migration scenarios, verifying that the final
> placement and buffer contents match our expectations.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> .../drm/i915/gem/selftests/i915_gem_mman.c | 306 ++++++++++++++++++
> 1 file changed, 306 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> index ba29767348be..d2c1071df98a 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
> @@ -10,6 +10,7 @@
> #include "gt/intel_gpu_commands.h"
> #include "gt/intel_gt.h"
> #include "gt/intel_gt_pm.h"
> +#include "gt/intel_migrate.h"
> #include "gem/i915_gem_region.h"
> #include "huge_gem_object.h"
> #include "i915_selftest.h"
> @@ -999,6 +1000,310 @@ static int igt_mmap(void *arg)
> return 0;
> }
>
> +static void igt_close_objects(struct drm_i915_private *i915,
> + struct list_head *objects)
> +{
> + struct drm_i915_gem_object *obj, *on;
> +
> + list_for_each_entry_safe(obj, on, objects, st_link) {
> + i915_gem_object_lock(obj, NULL);
> + if (i915_gem_object_has_pinned_pages(obj))
> + i915_gem_object_unpin_pages(obj);
> + /* No polluting the memory region between tests */
> + __i915_gem_object_put_pages(obj);
> + i915_gem_object_unlock(obj);
> + list_del(&obj->st_link);
> + i915_gem_object_put(obj);
> + }
> +
> + cond_resched();
> +
> + i915_gem_drain_freed_objects(i915);
> +}
> +
> +static void igt_make_evictable(struct list_head *objects)
> +{
> + struct drm_i915_gem_object *obj;
> +
> + list_for_each_entry(obj, objects, st_link) {
> + i915_gem_object_lock(obj, NULL);
> + if (i915_gem_object_has_pinned_pages(obj))
> + i915_gem_object_unpin_pages(obj);
> + i915_gem_object_unlock(obj);
> + }
> +
> + cond_resched();
> +}
> +
> +static int igt_fill_mappable(struct intel_memory_region *mr,
> + struct list_head *objects)
> +{
> + u64 size, total;
> + int err;
> +
> + total = 0;
> + size = mr->io_size;
> + do {
> + struct drm_i915_gem_object *obj;
> +
> + obj = i915_gem_object_create_region(mr, size, 0, 0);
> + if (IS_ERR(obj)) {
> + err = PTR_ERR(obj);
> + goto err_close;
> + }
> +
> + list_add(&obj->st_link, objects);
> +
> + err = i915_gem_object_pin_pages_unlocked(obj);
> + if (err) {
> + if (err != -ENXIO && err != -ENOMEM)
> + goto err_close;
> +
> + if (size == mr->min_page_size) {
> + err = 0;
> + break;
> + }
> +
> + size >>= 1;
> + continue;
> + }
> +
> + total += obj->base.size;
> + } while (1);
> +
> + pr_info("%s filled=%lluMiB\n", __func__, total >> 20);
> + return 0;
> +
> +err_close:
> + igt_close_objects(mr->i915, objects);
> + return err;
> +}
> +
> +static int ___igt_mmap_migrate(struct drm_i915_private *i915,
> + struct drm_i915_gem_object *obj,
> + unsigned long addr,
> + bool unfaultable)
> +{
> + struct vm_area_struct *area;
> + int err = 0, i;
> +
> + pr_info("igt_mmap(%s, %d) @ %lx\n",
> + obj->mm.region->name, I915_MMAP_TYPE_FIXED, addr);
> +
> + mmap_read_lock(current->mm);
> + area = vma_lookup(current->mm, addr);
> + mmap_read_unlock(current->mm);
> + if (!area) {
> + pr_err("%s: Did not create a vm_area_struct for the mmap\n",
> + obj->mm.region->name);
> + err = -EINVAL;
> + goto out_unmap;
> + }
> +
> + for (i = 0; i < obj->base.size / sizeof(u32); i++) {
> + u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux)));
> + u32 x;
> +
> + if (get_user(x, ux)) {
> + err = -EFAULT;
> + if (!unfaultable) {
> + pr_err("%s: Unable to read from mmap, offset:%zd\n",
> + obj->mm.region->name, i * sizeof(x));
> + goto out_unmap;
> + }
> +
> + continue;
> + }
> +
> + if (unfaultable) {
> + pr_err("%s: Faulted unmappable memory\n",
> + obj->mm.region->name);
> + err = -EINVAL;
> + goto out_unmap;
> + }
> +
> + if (x != expand32(POISON_INUSE)) {
> + pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n",
> + obj->mm.region->name,
> + i * sizeof(x), x, expand32(POISON_INUSE));
> + err = -EINVAL;
> + goto out_unmap;
> + }
> +
> + x = expand32(POISON_FREE);
> + if (put_user(x, ux)) {
> + pr_err("%s: Unable to write to mmap, offset:%zd\n",
> + obj->mm.region->name, i * sizeof(x));
> + err = -EFAULT;
> + goto out_unmap;
> + }
> + }
> +
> + if (unfaultable) {
> + if (err == -EFAULT)
> + err = 0;
> + } else {
> + obj->flags &= ~I915_BO_ALLOC_TOPDOWN;
> + err = wc_check(obj);
> + }
> +out_unmap:
> + vm_munmap(addr, obj->base.size);
> + return err;
> +}
> +
> +#define IGT_MMAP_MIGRATE_TOPDOWN (1<<0)
> +#define IGT_MMAP_MIGRATE_FILL (1<<1)
> +#define IGT_MMAP_MIGRATE_EVICTABLE (1<<2)
> +#define IGT_MMAP_MIGRATE_UNFAULTABLE (1<<3)
> +static int __igt_mmap_migrate(struct intel_memory_region **placements,
> + int n_placements,
> + struct intel_memory_region *expected_mr,
> + unsigned int flags)
> +{
> + struct drm_i915_private *i915 = placements[0]->i915;
> + struct drm_i915_gem_object *obj;
> + struct i915_gem_ww_ctx ww;
> + struct i915_request *rq = NULL;
> + unsigned long addr;
> + LIST_HEAD(objects);
> + u64 offset;
> + int err;
> +
> + obj = __i915_gem_object_create_user(i915, PAGE_SIZE,
> + placements,
> + n_placements);
> + if (IS_ERR(obj))
> + return PTR_ERR(obj);
> +
> + if (flags & IGT_MMAP_MIGRATE_TOPDOWN)
> + obj->flags |= I915_BO_ALLOC_TOPDOWN;
> +
> + err = __assign_mmap_offset(obj, I915_MMAP_TYPE_FIXED, &offset, NULL);
> + if (err)
> + goto out_put;
> +
> + /*
> + * This will eventually create a GEM context, due to opening dummy drm
> + * file, which needs a tiny amount of mappable device memory for the top
> + * level paging structures(and perhaps scratch), so make sure we
> + * allocate early, to avoid tears.
> + */
> + addr = igt_mmap_offset(i915, offset, obj->base.size,
> + PROT_WRITE, MAP_SHARED);
> + if (IS_ERR_VALUE(addr)) {
> + err = addr;
> + goto out_put;
> + }
> +
> + if (flags & IGT_MMAP_MIGRATE_FILL) {
> + err = igt_fill_mappable(placements[0], &objects);
> + if (err)
> + goto out_put;
> + }
> +
> + for_i915_gem_ww(&ww, err, true) {
Do we need a full ww transaction here? Sufficient to only lock the
object with NULL?
> + err = i915_gem_object_lock(obj, &ww);
> + if (err)
> + continue;
> +
> + err = i915_gem_object_pin_pages(obj);
> + if (err)
> + continue;
> +
> + err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL,
> + obj->mm.pages->sgl, obj->cache_level,
> + i915_gem_object_is_lmem(obj),
> + expand32(POISON_INUSE), &rq);
> + i915_gem_object_unpin_pages(obj);
> + if (rq) {
> + dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
> + i915_gem_object_set_moving_fence(obj, &rq->fence);
> + i915_request_put(rq);
> + }
> + if (err)
> + continue;
Not needed?
> + }
> + if (err)
> + goto out_put;
> +
> + if (flags & IGT_MMAP_MIGRATE_EVICTABLE)
> + igt_make_evictable(&objects);
> +
> + err = ___igt_mmap_migrate(i915, obj, addr,
> + flags & IGT_MMAP_MIGRATE_UNFAULTABLE);
> + if (!err && obj->mm.region != expected_mr) {
> + pr_err("%s region mismatch %s\n", __func__, expected_mr->name);
> + err = -EINVAL;
> + }
> +
> +out_put:
> + i915_gem_object_put(obj);
> + igt_close_objects(i915, &objects);
> + return err;
> +}
> +
> +static int igt_mmap_migrate(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct intel_memory_region *system = i915->mm.regions[INTEL_REGION_SMEM];
> + struct intel_memory_region *mr;
> + enum intel_region_id id;
> +
> + for_each_memory_region(mr, i915, id) {
> + struct intel_memory_region *mixed[] = { mr, system };
> + struct intel_memory_region *single[] = { mr };
> + int err;
> +
> + if (mr->private)
> + continue;
> +
> + if (!mr->io_size || mr->io_size == mr->total)
> + continue;
> +
> + /*
> + * Allocate in the mappable portion, should be no suprises here.
> + */
> + err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), mr, 0);
> + if (err)
> + return err;
> +
> + /*
> + * Allocate in the non-mappable portion, but force migrating to
> + * the mappable portion on fault (LMEM -> LMEM)
> + */
> + err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr,
> + IGT_MMAP_MIGRATE_TOPDOWN |
> + IGT_MMAP_MIGRATE_FILL |
> + IGT_MMAP_MIGRATE_EVICTABLE);
> + if (err)
> + return err;
> +
> + /*
> + * Allocate in the non-mappable portion, but force spilling into
> + * system memory on fault (LMEM -> SMEM)
> + */
> + err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), system,
> + IGT_MMAP_MIGRATE_TOPDOWN |
> + IGT_MMAP_MIGRATE_FILL);
> + if (err)
> + return err;
> +
> + /*
> + * Allocate in the non-mappable portion, but since the mappable
> + * portion is already full, and we can't spill to system memory,
> + * then we should expect the fault to fail.
> + */
> + err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr,
> + IGT_MMAP_MIGRATE_TOPDOWN |
> + IGT_MMAP_MIGRATE_FILL |
> + IGT_MMAP_MIGRATE_UNFAULTABLE);
> + if (err)
> + return err;
> + }
> +
> + return 0;
> +}
> +
> static const char *repr_mmap_type(enum i915_mmap_type type)
> {
> switch (type) {
> @@ -1424,6 +1729,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
> SUBTEST(igt_smoke_tiling),
> SUBTEST(igt_mmap_offset_exhaustion),
> SUBTEST(igt_mmap),
> + SUBTEST(igt_mmap_migrate),
> SUBTEST(igt_mmap_access),
> SUBTEST(igt_mmap_revoke),
> SUBTEST(igt_mmap_gpu),
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 14/20] drm/i915/selftests: exercise mmap migration
2022-02-03 9:01 ` Thomas Hellström
@ 2022-02-03 9:12 ` Matthew Auld
0 siblings, 0 replies; 50+ messages in thread
From: Matthew Auld @ 2022-02-03 9:12 UTC (permalink / raw)
To: Thomas Hellström, intel-gfx; +Cc: dri-devel
On 03/02/2022 09:01, Thomas Hellström wrote:
>
> On 1/26/22 16:21, Matthew Auld wrote:
>> Exercise each of the migration scenarios, verifying that the final
>> placement and buffer contents match our expectations.
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> ---
>> .../drm/i915/gem/selftests/i915_gem_mman.c | 306 ++++++++++++++++++
>> 1 file changed, 306 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
>> b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
>> index ba29767348be..d2c1071df98a 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
>> @@ -10,6 +10,7 @@
>> #include "gt/intel_gpu_commands.h"
>> #include "gt/intel_gt.h"
>> #include "gt/intel_gt_pm.h"
>> +#include "gt/intel_migrate.h"
>> #include "gem/i915_gem_region.h"
>> #include "huge_gem_object.h"
>> #include "i915_selftest.h"
>> @@ -999,6 +1000,310 @@ static int igt_mmap(void *arg)
>> return 0;
>> }
>> +static void igt_close_objects(struct drm_i915_private *i915,
>> + struct list_head *objects)
>> +{
>> + struct drm_i915_gem_object *obj, *on;
>> +
>> + list_for_each_entry_safe(obj, on, objects, st_link) {
>> + i915_gem_object_lock(obj, NULL);
>> + if (i915_gem_object_has_pinned_pages(obj))
>> + i915_gem_object_unpin_pages(obj);
>> + /* No polluting the memory region between tests */
>> + __i915_gem_object_put_pages(obj);
>> + i915_gem_object_unlock(obj);
>> + list_del(&obj->st_link);
>> + i915_gem_object_put(obj);
>> + }
>> +
>> + cond_resched();
>> +
>> + i915_gem_drain_freed_objects(i915);
>> +}
>> +
>> +static void igt_make_evictable(struct list_head *objects)
>> +{
>> + struct drm_i915_gem_object *obj;
>> +
>> + list_for_each_entry(obj, objects, st_link) {
>> + i915_gem_object_lock(obj, NULL);
>> + if (i915_gem_object_has_pinned_pages(obj))
>> + i915_gem_object_unpin_pages(obj);
>> + i915_gem_object_unlock(obj);
>> + }
>> +
>> + cond_resched();
>> +}
>> +
>> +static int igt_fill_mappable(struct intel_memory_region *mr,
>> + struct list_head *objects)
>> +{
>> + u64 size, total;
>> + int err;
>> +
>> + total = 0;
>> + size = mr->io_size;
>> + do {
>> + struct drm_i915_gem_object *obj;
>> +
>> + obj = i915_gem_object_create_region(mr, size, 0, 0);
>> + if (IS_ERR(obj)) {
>> + err = PTR_ERR(obj);
>> + goto err_close;
>> + }
>> +
>> + list_add(&obj->st_link, objects);
>> +
>> + err = i915_gem_object_pin_pages_unlocked(obj);
>> + if (err) {
>> + if (err != -ENXIO && err != -ENOMEM)
>> + goto err_close;
>> +
>> + if (size == mr->min_page_size) {
>> + err = 0;
>> + break;
>> + }
>> +
>> + size >>= 1;
>> + continue;
>> + }
>> +
>> + total += obj->base.size;
>> + } while (1);
>> +
>> + pr_info("%s filled=%lluMiB\n", __func__, total >> 20);
>> + return 0;
>> +
>> +err_close:
>> + igt_close_objects(mr->i915, objects);
>> + return err;
>> +}
>> +
>> +static int ___igt_mmap_migrate(struct drm_i915_private *i915,
>> + struct drm_i915_gem_object *obj,
>> + unsigned long addr,
>> + bool unfaultable)
>> +{
>> + struct vm_area_struct *area;
>> + int err = 0, i;
>> +
>> + pr_info("igt_mmap(%s, %d) @ %lx\n",
>> + obj->mm.region->name, I915_MMAP_TYPE_FIXED, addr);
>> +
>> + mmap_read_lock(current->mm);
>> + area = vma_lookup(current->mm, addr);
>> + mmap_read_unlock(current->mm);
>> + if (!area) {
>> + pr_err("%s: Did not create a vm_area_struct for the mmap\n",
>> + obj->mm.region->name);
>> + err = -EINVAL;
>> + goto out_unmap;
>> + }
>> +
>> + for (i = 0; i < obj->base.size / sizeof(u32); i++) {
>> + u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux)));
>> + u32 x;
>> +
>> + if (get_user(x, ux)) {
>> + err = -EFAULT;
>> + if (!unfaultable) {
>> + pr_err("%s: Unable to read from mmap, offset:%zd\n",
>> + obj->mm.region->name, i * sizeof(x));
>> + goto out_unmap;
>> + }
>> +
>> + continue;
>> + }
>> +
>> + if (unfaultable) {
>> + pr_err("%s: Faulted unmappable memory\n",
>> + obj->mm.region->name);
>> + err = -EINVAL;
>> + goto out_unmap;
>> + }
>> +
>> + if (x != expand32(POISON_INUSE)) {
>> + pr_err("%s: Read incorrect value from mmap, offset:%zd,
>> found:%x, expected:%x\n",
>> + obj->mm.region->name,
>> + i * sizeof(x), x, expand32(POISON_INUSE));
>> + err = -EINVAL;
>> + goto out_unmap;
>> + }
>> +
>> + x = expand32(POISON_FREE);
>> + if (put_user(x, ux)) {
>> + pr_err("%s: Unable to write to mmap, offset:%zd\n",
>> + obj->mm.region->name, i * sizeof(x));
>> + err = -EFAULT;
>> + goto out_unmap;
>> + }
>> + }
>> +
>> + if (unfaultable) {
>> + if (err == -EFAULT)
>> + err = 0;
>> + } else {
>> + obj->flags &= ~I915_BO_ALLOC_TOPDOWN;
>> + err = wc_check(obj);
>> + }
>> +out_unmap:
>> + vm_munmap(addr, obj->base.size);
>> + return err;
>> +}
>> +
>> +#define IGT_MMAP_MIGRATE_TOPDOWN (1<<0)
>> +#define IGT_MMAP_MIGRATE_FILL (1<<1)
>> +#define IGT_MMAP_MIGRATE_EVICTABLE (1<<2)
>> +#define IGT_MMAP_MIGRATE_UNFAULTABLE (1<<3)
>> +static int __igt_mmap_migrate(struct intel_memory_region **placements,
>> + int n_placements,
>> + struct intel_memory_region *expected_mr,
>> + unsigned int flags)
>> +{
>> + struct drm_i915_private *i915 = placements[0]->i915;
>> + struct drm_i915_gem_object *obj;
>> + struct i915_gem_ww_ctx ww;
>> + struct i915_request *rq = NULL;
>> + unsigned long addr;
>> + LIST_HEAD(objects);
>> + u64 offset;
>> + int err;
>> +
>> + obj = __i915_gem_object_create_user(i915, PAGE_SIZE,
>> + placements,
>> + n_placements);
>> + if (IS_ERR(obj))
>> + return PTR_ERR(obj);
>> +
>> + if (flags & IGT_MMAP_MIGRATE_TOPDOWN)
>> + obj->flags |= I915_BO_ALLOC_TOPDOWN;
>> +
>> + err = __assign_mmap_offset(obj, I915_MMAP_TYPE_FIXED, &offset,
>> NULL);
>> + if (err)
>> + goto out_put;
>> +
>> + /*
>> + * This will eventually create a GEM context, due to opening
>> dummy drm
>> + * file, which needs a tiny amount of mappable device memory for
>> the top
>> + * level paging structures(and perhaps scratch), so make sure we
>> + * allocate early, to avoid tears.
>> + */
>> + addr = igt_mmap_offset(i915, offset, obj->base.size,
>> + PROT_WRITE, MAP_SHARED);
>> + if (IS_ERR_VALUE(addr)) {
>> + err = addr;
>> + goto out_put;
>> + }
>> +
>> + if (flags & IGT_MMAP_MIGRATE_FILL) {
>> + err = igt_fill_mappable(placements[0], &objects);
>> + if (err)
>> + goto out_put;
>> + }
>> +
>> + for_i915_gem_ww(&ww, err, true) {
>
> Do we need a full ww transaction here? Sufficient to only lock the
> object with NULL?
I think so, will change.
>
>
>> + err = i915_gem_object_lock(obj, &ww);
>> + if (err)
>> + continue;
>> +
>> + err = i915_gem_object_pin_pages(obj);
>> + if (err)
>> + continue;
>> +
>> + err =
>> intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL,
>> + obj->mm.pages->sgl, obj->cache_level,
>> + i915_gem_object_is_lmem(obj),
>> + expand32(POISON_INUSE), &rq);
>> + i915_gem_object_unpin_pages(obj);
>> + if (rq) {
>> + dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
>> + i915_gem_object_set_moving_fence(obj, &rq->fence);
>> + i915_request_put(rq);
>> + }
>> + if (err)
>> + continue;
> Not needed?
>> + }
>> + if (err)
>> + goto out_put;
>> +
>> + if (flags & IGT_MMAP_MIGRATE_EVICTABLE)
>> + igt_make_evictable(&objects);
>> +
>> + err = ___igt_mmap_migrate(i915, obj, addr,
>> + flags & IGT_MMAP_MIGRATE_UNFAULTABLE);
>> + if (!err && obj->mm.region != expected_mr) {
>> + pr_err("%s region mismatch %s\n", __func__, expected_mr->name);
>> + err = -EINVAL;
>> + }
>> +
>> +out_put:
>> + i915_gem_object_put(obj);
>> + igt_close_objects(i915, &objects);
>> + return err;
>> +}
>> +
>> +static int igt_mmap_migrate(void *arg)
>> +{
>> + struct drm_i915_private *i915 = arg;
>> + struct intel_memory_region *system =
>> i915->mm.regions[INTEL_REGION_SMEM];
>> + struct intel_memory_region *mr;
>> + enum intel_region_id id;
>> +
>> + for_each_memory_region(mr, i915, id) {
>> + struct intel_memory_region *mixed[] = { mr, system };
>> + struct intel_memory_region *single[] = { mr };
>> + int err;
>> +
>> + if (mr->private)
>> + continue;
>> +
>> + if (!mr->io_size || mr->io_size == mr->total)
>> + continue;
>> +
>> + /*
>> + * Allocate in the mappable portion, should be no suprises here.
>> + */
>> + err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), mr, 0);
>> + if (err)
>> + return err;
>> +
>> + /*
>> + * Allocate in the non-mappable portion, but force migrating to
>> + * the mappable portion on fault (LMEM -> LMEM)
>> + */
>> + err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr,
>> + IGT_MMAP_MIGRATE_TOPDOWN |
>> + IGT_MMAP_MIGRATE_FILL |
>> + IGT_MMAP_MIGRATE_EVICTABLE);
>> + if (err)
>> + return err;
>> +
>> + /*
>> + * Allocate in the non-mappable portion, but force spilling into
>> + * system memory on fault (LMEM -> SMEM)
>> + */
>> + err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), system,
>> + IGT_MMAP_MIGRATE_TOPDOWN |
>> + IGT_MMAP_MIGRATE_FILL);
>> + if (err)
>> + return err;
>> +
>> + /*
>> + * Allocate in the non-mappable portion, but since the mappable
>> + * portion is already full, and we can't spill to system memory,
>> + * then we should expect the fault to fail.
>> + */
>> + err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr,
>> + IGT_MMAP_MIGRATE_TOPDOWN |
>> + IGT_MMAP_MIGRATE_FILL |
>> + IGT_MMAP_MIGRATE_UNFAULTABLE);
>> + if (err)
>> + return err;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static const char *repr_mmap_type(enum i915_mmap_type type)
>> {
>> switch (type) {
>> @@ -1424,6 +1729,7 @@ int i915_gem_mman_live_selftests(struct
>> drm_i915_private *i915)
>> SUBTEST(igt_smoke_tiling),
>> SUBTEST(igt_mmap_offset_exhaustion),
>> SUBTEST(igt_mmap),
>> + SUBTEST(igt_mmap_migrate),
>> SUBTEST(igt_mmap_access),
>> SUBTEST(igt_mmap_revoke),
>> SUBTEST(igt_mmap_gpu),
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 15/20] drm/i915/selftests: handle allocation failures
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (13 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 14/20] drm/i915/selftests: exercise mmap migration Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-02-03 9:05 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 16/20] drm/i915/create: apply ALLOC_TOPDOWN by default Matthew Auld
` (4 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
If we have to contend with non-mappable LMEM, then we need to ensure the
object fits within the mappable portion, like in the selftests, where we
later try to CPU access the pages. However if it can't then we need to
gracefully handle this, without throwing an error.
Also it looks like TTM will return -ENOMEM if the object can't be
placed.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +-
drivers/gpu/drm/i915/selftests/intel_memory_region.c | 8 +++++++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 42db9cd30978..3caa178bbd07 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1344,7 +1344,7 @@ static int igt_ppgtt_smoke_huge(void *arg)
err = i915_gem_object_pin_pages_unlocked(obj);
if (err) {
- if (err == -ENXIO || err == -E2BIG) {
+ if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
i915_gem_object_put(obj);
size >>= 1;
goto try_again;
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 04ae29779206..87bff7f83554 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -822,8 +822,14 @@ static int igt_lmem_create_with_ps(void *arg)
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_pin_pages(obj);
- if (err)
+ if (err) {
+ if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
+ pr_info("%s not enough lmem for ps(%u) err=%d\n",
+ __func__, ps, err);
+ err = 0;
+ }
goto out_put;
+ }
daddr = i915_gem_object_get_dma_address(obj, 0);
if (!IS_ALIGNED(daddr, ps)) {
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 15/20] drm/i915/selftests: handle allocation failures
2022-01-26 15:21 ` [PATCH 15/20] drm/i915/selftests: handle allocation failures Matthew Auld
@ 2022-02-03 9:05 ` Thomas Hellström
2022-02-03 9:11 ` Matthew Auld
0 siblings, 1 reply; 50+ messages in thread
From: Thomas Hellström @ 2022-02-03 9:05 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> If we have to contend with non-mappable LMEM, then we need to ensure the
> object fits within the mappable portion, like in the selftests, where we
> later try to CPU access the pages. However if it can't then we need to
> gracefully handle this, without throwing an error.
>
> Also it looks like TTM will return -ENOMEM if the object can't be
> placed.
We should probably have a look at why that happens. I thought ttm would
return -ENOSPC, which we then converted to -ENXIO in i915_ttm_err_to_gem().
/Thomas
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +-
> drivers/gpu/drm/i915/selftests/intel_memory_region.c | 8 +++++++-
> 2 files changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> index 42db9cd30978..3caa178bbd07 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> @@ -1344,7 +1344,7 @@ static int igt_ppgtt_smoke_huge(void *arg)
>
> err = i915_gem_object_pin_pages_unlocked(obj);
> if (err) {
> - if (err == -ENXIO || err == -E2BIG) {
> + if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
> i915_gem_object_put(obj);
> size >>= 1;
> goto try_again;
> diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> index 04ae29779206..87bff7f83554 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> @@ -822,8 +822,14 @@ static int igt_lmem_create_with_ps(void *arg)
>
> i915_gem_object_lock(obj, NULL);
> err = i915_gem_object_pin_pages(obj);
> - if (err)
> + if (err) {
> + if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
> + pr_info("%s not enough lmem for ps(%u) err=%d\n",
> + __func__, ps, err);
> + err = 0;
> + }
> goto out_put;
> + }
>
> daddr = i915_gem_object_get_dma_address(obj, 0);
> if (!IS_ALIGNED(daddr, ps)) {
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 15/20] drm/i915/selftests: handle allocation failures
2022-02-03 9:05 ` Thomas Hellström
@ 2022-02-03 9:11 ` Matthew Auld
0 siblings, 0 replies; 50+ messages in thread
From: Matthew Auld @ 2022-02-03 9:11 UTC (permalink / raw)
To: Thomas Hellström, intel-gfx; +Cc: dri-devel
On 03/02/2022 09:05, Thomas Hellström wrote:
>
> On 1/26/22 16:21, Matthew Auld wrote:
>> If we have to contend with non-mappable LMEM, then we need to ensure the
>> object fits within the mappable portion, like in the selftests, where we
>> later try to CPU access the pages. However if it can't then we need to
>> gracefully handle this, without throwing an error.
>>
>> Also it looks like TTM will return -ENOMEM if the object can't be
>> placed.
>
> We should probably have a look at why that happens. I thought ttm would
> return -ENOSPC, which we then converted to -ENXIO in i915_ttm_err_to_gem().
IIRC it was in ttm_bo_mem_space(), where right at the end it does ret =
-ENOMEM, after failing to evict buffers.
>
> /Thomas
>
>
>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> ---
>> drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +-
>> drivers/gpu/drm/i915/selftests/intel_memory_region.c | 8 +++++++-
>> 2 files changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
>> b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
>> index 42db9cd30978..3caa178bbd07 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
>> @@ -1344,7 +1344,7 @@ static int igt_ppgtt_smoke_huge(void *arg)
>> err = i915_gem_object_pin_pages_unlocked(obj);
>> if (err) {
>> - if (err == -ENXIO || err == -E2BIG) {
>> + if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
>> i915_gem_object_put(obj);
>> size >>= 1;
>> goto try_again;
>> diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
>> b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
>> index 04ae29779206..87bff7f83554 100644
>> --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
>> +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
>> @@ -822,8 +822,14 @@ static int igt_lmem_create_with_ps(void *arg)
>> i915_gem_object_lock(obj, NULL);
>> err = i915_gem_object_pin_pages(obj);
>> - if (err)
>> + if (err) {
>> + if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
>> + pr_info("%s not enough lmem for ps(%u) err=%d\n",
>> + __func__, ps, err);
>> + err = 0;
>> + }
>> goto out_put;
>> + }
>> daddr = i915_gem_object_get_dma_address(obj, 0);
>> if (!IS_ALIGNED(daddr, ps)) {
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 16/20] drm/i915/create: apply ALLOC_TOPDOWN by default
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (14 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 15/20] drm/i915/selftests: handle allocation failures Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-02-03 9:17 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 17/20] drm/i915/uapi: add NEEDS_CPU_ACCESS hint Matthew Auld
` (3 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
Starting from DG2+, when dealing with LMEM, we assume that by default
all userspace allocations should be placed in the non-mappable portion
of LMEM. Note that dumb buffers are not included here, since these are
not "GPU accelerated" and likely need CPU access.
In a later patch userspace will be able to provide a hint if CPU access
to the buffer is needed.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_create.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 9402d4bf4ffc..e7456443f163 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -424,6 +424,15 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
ext_data.n_placements = 1;
}
+ /*
+ * TODO: add a userspace hint to force CPU_ACCESS for the object, which
+ * can override this.
+ */
+ if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
+ ext_data.placements[0]->type !=
+ INTEL_MEMORY_SYSTEM))
+ ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
+
obj = __i915_gem_object_create_user_ext(i915, args->size,
ext_data.placements,
ext_data.n_placements,
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 16/20] drm/i915/create: apply ALLOC_TOPDOWN by default
2022-01-26 15:21 ` [PATCH 16/20] drm/i915/create: apply ALLOC_TOPDOWN by default Matthew Auld
@ 2022-02-03 9:17 ` Thomas Hellström
2022-02-03 9:32 ` Matthew Auld
0 siblings, 1 reply; 50+ messages in thread
From: Thomas Hellström @ 2022-02-03 9:17 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> Starting from DG2+, when dealing with LMEM, we assume that by default
> all userspace allocations should be placed in the non-mappable portion
> of LMEM. Note that dumb buffers are not included here, since these are
> not "GPU accelerated" and likely need CPU access.
>
> In a later patch userspace will be able to provide a hint if CPU access
> to the buffer is needed.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_create.c | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> index 9402d4bf4ffc..e7456443f163 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> @@ -424,6 +424,15 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
> ext_data.n_placements = 1;
> }
>
> + /*
> + * TODO: add a userspace hint to force CPU_ACCESS for the object, which
> + * can override this.
> + */
> + if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
> + ext_data.placements[0]->type !=
> + INTEL_MEMORY_SYSTEM))
> + ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
Perhaps we should include DG1 here as well, so that the same paths are
taken regardless whether this is only a test on DG1?
> +
> obj = __i915_gem_object_create_user_ext(i915, args->size,
> ext_data.placements,
> ext_data.n_placements,
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 16/20] drm/i915/create: apply ALLOC_TOPDOWN by default
2022-02-03 9:17 ` Thomas Hellström
@ 2022-02-03 9:32 ` Matthew Auld
0 siblings, 0 replies; 50+ messages in thread
From: Matthew Auld @ 2022-02-03 9:32 UTC (permalink / raw)
To: Thomas Hellström, intel-gfx; +Cc: dri-devel
On 03/02/2022 09:17, Thomas Hellström wrote:
>
> On 1/26/22 16:21, Matthew Auld wrote:
>> Starting from DG2+, when dealing with LMEM, we assume that by default
>> all userspace allocations should be placed in the non-mappable portion
>> of LMEM. Note that dumb buffers are not included here, since these are
>> not "GPU accelerated" and likely need CPU access.
>>
>> In a later patch userspace will be able to provide a hint if CPU access
>> to the buffer is needed.
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> ---
>> drivers/gpu/drm/i915/gem/i915_gem_create.c | 9 +++++++++
>> 1 file changed, 9 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_create.c
>> index 9402d4bf4ffc..e7456443f163 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
>> @@ -424,6 +424,15 @@ i915_gem_create_ext_ioctl(struct drm_device *dev,
>> void *data,
>> ext_data.n_placements = 1;
>> }
>> + /*
>> + * TODO: add a userspace hint to force CPU_ACCESS for the object,
>> which
>> + * can override this.
>> + */
>> + if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
>> + ext_data.placements[0]->type !=
>> + INTEL_MEMORY_SYSTEM))
>> + ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
>
> Perhaps we should include DG1 here as well, so that the same paths are
> taken regardless whether this is only a test on DG1?
I think the only reason was EXEC_CAPTURE, where atm we just reject
anything marked with I915_BO_ALLOC_TOPDOWN, but that must not break
existing DG1 uapi.
>
>
>
>
>> +
>> obj = __i915_gem_object_create_user_ext(i915, args->size,
>> ext_data.placements,
>> ext_data.n_placements,
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 17/20] drm/i915/uapi: add NEEDS_CPU_ACCESS hint
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (15 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 16/20] drm/i915/create: apply ALLOC_TOPDOWN by default Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-02-03 9:28 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 18/20] drm/i915/uapi: forbid ALLOC_TOPDOWN for error capture Matthew Auld
` (2 subsequent siblings)
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
If set, force the allocation to be placed in the mappable portion of
LMEM. One big restriction here is that system memory must be given as a
potential placement for the object, that way we can always spill the
object into system memory if we can't make space.
XXX: Still very much WIP and needs IGTs. Including now just for the sake
of having more complete picture.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_create.c | 28 ++++++++++++-------
include/uapi/drm/i915_drm.h | 31 +++++++++++++++++++++-
2 files changed, 49 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index e7456443f163..98d63cb21e94 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -238,6 +238,7 @@ struct create_ext {
struct drm_i915_private *i915;
struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
unsigned int n_placements;
+ unsigned int placement_mask;
unsigned long flags;
};
@@ -334,6 +335,7 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args,
for (i = 0; i < args->num_regions; i++)
ext_data->placements[i] = placements[i];
+ ext_data->placement_mask = mask;
return 0;
out_dump:
@@ -408,7 +410,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
struct drm_i915_gem_object *obj;
int ret;
- if (args->flags)
+ if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
@@ -424,14 +426,22 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
ext_data.n_placements = 1;
}
- /*
- * TODO: add a userspace hint to force CPU_ACCESS for the object, which
- * can override this.
- */
- if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
- ext_data.placements[0]->type !=
- INTEL_MEMORY_SYSTEM))
- ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
+ if (args->flags & I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) {
+ if (ext_data.n_placements == 1)
+ return -EINVAL;
+
+ /*
+ * We always need to be able to spill to system memory, if we
+ * can't place in the mappable part of LMEM.
+ */
+ if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM)))
+ return -EINVAL;
+ } else {
+ if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
+ ext_data.placements[0]->type !=
+ INTEL_MEMORY_SYSTEM))
+ ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
+ }
obj = __i915_gem_object_create_user_ext(i915, args->size,
ext_data.placements,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 914ebd9290e5..ecfa805549a7 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3157,7 +3157,36 @@ struct drm_i915_gem_create_ext {
* Object handles are nonzero.
*/
__u32 handle;
- /** @flags: MBZ */
+ /**
+ * @flags: Optional flags.
+ *
+ * Supported values:
+ *
+ * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that
+ * the object will need to be accessed via the CPU.
+ *
+ * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and
+ * only strictly required on platforms where only some of the device
+ * memory is directly visible or mappable through the CPU, like on DG2+.
+ *
+ * One of the placements MUST also be I915_MEMORY_CLASS_SYSTEM, to
+ * ensure we can always spill the allocation to system memory, if we
+ * can't place the object in the mappable part of
+ * I915_MEMORY_CLASS_DEVICE.
+ *
+ * Note that buffers that need to be captured with EXEC_OBJECT_CAPTURE,
+ * will need to enable this hint, if the object can also be placed in
+ * I915_MEMORY_CLASS_DEVICE, starting from DG2+. The execbuf call will
+ * throw an error otherwise. This also means that such objects will need
+ * I915_MEMORY_CLASS_SYSTEM set as a possible placement.
+ *
+ * Without this hint, the kernel will assume that non-mappable
+ * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the
+ * kernel can still migrate the object to the mappable part, as a last
+ * resort, if userspace ever CPU faults this object, but this might be
+ * expensive, and so ideally should be avoided.
+ */
+#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1<<0)
__u32 flags;
/**
* @extensions: The chain of extensions to apply to this object.
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 17/20] drm/i915/uapi: add NEEDS_CPU_ACCESS hint
2022-01-26 15:21 ` [PATCH 17/20] drm/i915/uapi: add NEEDS_CPU_ACCESS hint Matthew Auld
@ 2022-02-03 9:28 ` Thomas Hellström
2022-02-03 11:38 ` Matthew Auld
0 siblings, 1 reply; 50+ messages in thread
From: Thomas Hellström @ 2022-02-03 9:28 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> If set, force the allocation to be placed in the mappable portion of
> LMEM. One big restriction here is that system memory must be given as a
> potential placement for the object, that way we can always spill the
> object into system memory if we can't make space.
>
> XXX: Still very much WIP and needs IGTs. Including now just for the sake
> of having more complete picture.
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_create.c | 28 ++++++++++++-------
> include/uapi/drm/i915_drm.h | 31 +++++++++++++++++++++-
> 2 files changed, 49 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> index e7456443f163..98d63cb21e94 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> @@ -238,6 +238,7 @@ struct create_ext {
> struct drm_i915_private *i915;
> struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
> unsigned int n_placements;
> + unsigned int placement_mask;
> unsigned long flags;
> };
>
> @@ -334,6 +335,7 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args,
> for (i = 0; i < args->num_regions; i++)
> ext_data->placements[i] = placements[i];
>
> + ext_data->placement_mask = mask;
> return 0;
>
> out_dump:
> @@ -408,7 +410,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
> struct drm_i915_gem_object *obj;
> int ret;
>
> - if (args->flags)
> + if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
> return -EINVAL;
>
> ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
> @@ -424,14 +426,22 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
> ext_data.n_placements = 1;
> }
>
> - /*
> - * TODO: add a userspace hint to force CPU_ACCESS for the object, which
> - * can override this.
> - */
> - if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
> - ext_data.placements[0]->type !=
> - INTEL_MEMORY_SYSTEM))
> - ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
> + if (args->flags & I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) {
> + if (ext_data.n_placements == 1)
> + return -EINVAL;
> +
> + /*
> + * We always need to be able to spill to system memory, if we
> + * can't place in the mappable part of LMEM.
> + */
> + if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM)))
> + return -EINVAL;
> + } else {
> + if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
> + ext_data.placements[0]->type !=
> + INTEL_MEMORY_SYSTEM))
> + ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
> + }
>
> obj = __i915_gem_object_create_user_ext(i915, args->size,
> ext_data.placements,
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 914ebd9290e5..ecfa805549a7 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -3157,7 +3157,36 @@ struct drm_i915_gem_create_ext {
> * Object handles are nonzero.
> */
> __u32 handle;
> - /** @flags: MBZ */
> + /**
> + * @flags: Optional flags.
> + *
> + * Supported values:
> + *
> + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that
> + * the object will need to be accessed via the CPU.
> + *
> + * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and
> + * only strictly required on platforms where only some of the device
> + * memory is directly visible or mappable through the CPU, like on DG2+.
> + *
> + * One of the placements MUST also be I915_MEMORY_CLASS_SYSTEM, to
> + * ensure we can always spill the allocation to system memory, if we
> + * can't place the object in the mappable part of
> + * I915_MEMORY_CLASS_DEVICE.
> + *
> + * Note that buffers that need to be captured with EXEC_OBJECT_CAPTURE,
> + * will need to enable this hint, if the object can also be placed in
> + * I915_MEMORY_CLASS_DEVICE, starting from DG2+. The execbuf call will
> + * throw an error otherwise. This also means that such objects will need
> + * I915_MEMORY_CLASS_SYSTEM set as a possible placement.
> + *
I wonder, should we try to migrate capture objects at execbuf time
instead on an on-demand basis? If migration fails, then we just skip
capturing that object, similar to how the capture code handles errors?
> + * Without this hint, the kernel will assume that non-mappable
> + * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the
> + * kernel can still migrate the object to the mappable part, as a last
> + * resort, if userspace ever CPU faults this object, but this might be
> + * expensive, and so ideally should be avoided.
> + */
> +#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1<<0)
> __u32 flags;
> /**
> * @extensions: The chain of extensions to apply to this object.
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 17/20] drm/i915/uapi: add NEEDS_CPU_ACCESS hint
2022-02-03 9:28 ` Thomas Hellström
@ 2022-02-03 11:38 ` Matthew Auld
2022-02-03 13:29 ` Thomas Hellström
0 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-02-03 11:38 UTC (permalink / raw)
To: Thomas Hellström, intel-gfx, Bloomfield, Jon; +Cc: dri-devel
On 03/02/2022 09:28, Thomas Hellström wrote:
>
> On 1/26/22 16:21, Matthew Auld wrote:
>> If set, force the allocation to be placed in the mappable portion of
>> LMEM. One big restriction here is that system memory must be given as a
>> potential placement for the object, that way we can always spill the
>> object into system memory if we can't make space.
>>
>> XXX: Still very much WIP and needs IGTs. Including now just for the sake
>> of having more complete picture.
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> ---
>> drivers/gpu/drm/i915/gem/i915_gem_create.c | 28 ++++++++++++-------
>> include/uapi/drm/i915_drm.h | 31 +++++++++++++++++++++-
>> 2 files changed, 49 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_create.c
>> index e7456443f163..98d63cb21e94 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
>> @@ -238,6 +238,7 @@ struct create_ext {
>> struct drm_i915_private *i915;
>> struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
>> unsigned int n_placements;
>> + unsigned int placement_mask;
>> unsigned long flags;
>> };
>> @@ -334,6 +335,7 @@ static int set_placements(struct
>> drm_i915_gem_create_ext_memory_regions *args,
>> for (i = 0; i < args->num_regions; i++)
>> ext_data->placements[i] = placements[i];
>> + ext_data->placement_mask = mask;
>> return 0;
>> out_dump:
>> @@ -408,7 +410,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev,
>> void *data,
>> struct drm_i915_gem_object *obj;
>> int ret;
>> - if (args->flags)
>> + if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
>> return -EINVAL;
>> ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
>> @@ -424,14 +426,22 @@ i915_gem_create_ext_ioctl(struct drm_device
>> *dev, void *data,
>> ext_data.n_placements = 1;
>> }
>> - /*
>> - * TODO: add a userspace hint to force CPU_ACCESS for the object,
>> which
>> - * can override this.
>> - */
>> - if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
>> - ext_data.placements[0]->type !=
>> - INTEL_MEMORY_SYSTEM))
>> - ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
>> + if (args->flags & I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) {
>> + if (ext_data.n_placements == 1)
>> + return -EINVAL;
>> +
>> + /*
>> + * We always need to be able to spill to system memory, if we
>> + * can't place in the mappable part of LMEM.
>> + */
>> + if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM)))
>> + return -EINVAL;
>> + } else {
>> + if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
>> + ext_data.placements[0]->type !=
>> + INTEL_MEMORY_SYSTEM))
>> + ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
>> + }
>> obj = __i915_gem_object_create_user_ext(i915, args->size,
>> ext_data.placements,
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 914ebd9290e5..ecfa805549a7 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -3157,7 +3157,36 @@ struct drm_i915_gem_create_ext {
>> * Object handles are nonzero.
>> */
>> __u32 handle;
>> - /** @flags: MBZ */
>> + /**
>> + * @flags: Optional flags.
>> + *
>> + * Supported values:
>> + *
>> + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the
>> kernel that
>> + * the object will need to be accessed via the CPU.
>> + *
>> + * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and
>> + * only strictly required on platforms where only some of the device
>> + * memory is directly visible or mappable through the CPU, like
>> on DG2+.
>> + *
>> + * One of the placements MUST also be I915_MEMORY_CLASS_SYSTEM, to
>> + * ensure we can always spill the allocation to system memory, if we
>> + * can't place the object in the mappable part of
>> + * I915_MEMORY_CLASS_DEVICE.
>> + *
>> + * Note that buffers that need to be captured with
>> EXEC_OBJECT_CAPTURE,
>> + * will need to enable this hint, if the object can also be
>> placed in
>> + * I915_MEMORY_CLASS_DEVICE, starting from DG2+. The execbuf call
>> will
>> + * throw an error otherwise. This also means that such objects
>> will need
>> + * I915_MEMORY_CLASS_SYSTEM set as a possible placement.
>> + *
>
> I wonder, should we try to migrate capture objects at execbuf time
> instead on an on-demand basis? If migration fails, then we just skip
> capturing that object, similar to how the capture code handles errors?
So IIUC if the object has been marked for capture, unmark the TOPDOWN
annotation, if it has been set, to force allocating in the mappable
portion, or spill to system memory(if the placements allow it)? I think
that should work. Jon any thoughts?
>
>> + * Without this hint, the kernel will assume that non-mappable
>> + * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note
>> that the
>> + * kernel can still migrate the object to the mappable part, as a
>> last
>> + * resort, if userspace ever CPU faults this object, but this
>> might be
>> + * expensive, and so ideally should be avoided.
>> + */
>> +#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1<<0)
>> __u32 flags;
>> /**
>> * @extensions: The chain of extensions to apply to this object.
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 17/20] drm/i915/uapi: add NEEDS_CPU_ACCESS hint
2022-02-03 11:38 ` Matthew Auld
@ 2022-02-03 13:29 ` Thomas Hellström
0 siblings, 0 replies; 50+ messages in thread
From: Thomas Hellström @ 2022-02-03 13:29 UTC (permalink / raw)
To: Matthew Auld, intel-gfx, Bloomfield, Jon; +Cc: dri-devel
On Thu, 2022-02-03 at 11:38 +0000, Matthew Auld wrote:
> On 03/02/2022 09:28, Thomas Hellström wrote:
> >
> > On 1/26/22 16:21, Matthew Auld wrote:
> > > If set, force the allocation to be placed in the mappable portion
> > > of
> > > LMEM. One big restriction here is that system memory must be
> > > given as a
> > > potential placement for the object, that way we can always spill
> > > the
> > > object into system memory if we can't make space.
> > >
> > > XXX: Still very much WIP and needs IGTs. Including now just for
> > > the sake
> > > of having more complete picture.
> > >
> > > Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> > > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > ---
> > > drivers/gpu/drm/i915/gem/i915_gem_create.c | 28 ++++++++++++---
> > > ----
> > > include/uapi/drm/i915_drm.h | 31
> > > +++++++++++++++++++++-
> > > 2 files changed, 49 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > > b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > > index e7456443f163..98d63cb21e94 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > > @@ -238,6 +238,7 @@ struct create_ext {
> > > struct drm_i915_private *i915;
> > > struct intel_memory_region
> > > *placements[INTEL_REGION_UNKNOWN];
> > > unsigned int n_placements;
> > > + unsigned int placement_mask;
> > > unsigned long flags;
> > > };
> > > @@ -334,6 +335,7 @@ static int set_placements(struct
> > > drm_i915_gem_create_ext_memory_regions *args,
> > > for (i = 0; i < args->num_regions; i++)
> > > ext_data->placements[i] = placements[i];
> > > + ext_data->placement_mask = mask;
> > > return 0;
> > > out_dump:
> > > @@ -408,7 +410,7 @@ i915_gem_create_ext_ioctl(struct drm_device
> > > *dev,
> > > void *data,
> > > struct drm_i915_gem_object *obj;
> > > int ret;
> > > - if (args->flags)
> > > + if (args->flags &
> > > ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
> > > return -EINVAL;
> > > ret = i915_user_extensions(u64_to_user_ptr(args-
> > > >extensions),
> > > @@ -424,14 +426,22 @@ i915_gem_create_ext_ioctl(struct drm_device
> > > *dev, void *data,
> > > ext_data.n_placements = 1;
> > > }
> > > - /*
> > > - * TODO: add a userspace hint to force CPU_ACCESS for the
> > > object,
> > > which
> > > - * can override this.
> > > - */
> > > - if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
> > > - ext_data.placements[0]->type !=
> > > - INTEL_MEMORY_SYSTEM))
> > > - ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
> > > + if (args->flags & I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
> > > {
> > > + if (ext_data.n_placements == 1)
> > > + return -EINVAL;
> > > +
> > > + /*
> > > + * We always need to be able to spill to system memory,
> > > if we
> > > + * can't place in the mappable part of LMEM.
> > > + */
> > > + if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM)))
> > > + return -EINVAL;
> > > + } else {
> > > + if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
> > > + ext_data.placements[0]->type !=
> > > + INTEL_MEMORY_SYSTEM))
> > > + ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
> > > + }
> > > obj = __i915_gem_object_create_user_ext(i915, args->size,
> > > ext_data.placements,
> > > diff --git a/include/uapi/drm/i915_drm.h
> > > b/include/uapi/drm/i915_drm.h
> > > index 914ebd9290e5..ecfa805549a7 100644
> > > --- a/include/uapi/drm/i915_drm.h
> > > +++ b/include/uapi/drm/i915_drm.h
> > > @@ -3157,7 +3157,36 @@ struct drm_i915_gem_create_ext {
> > > * Object handles are nonzero.
> > > */
> > > __u32 handle;
> > > - /** @flags: MBZ */
> > > + /**
> > > + * @flags: Optional flags.
> > > + *
> > > + * Supported values:
> > > + *
> > > + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the
> > > kernel that
> > > + * the object will need to be accessed via the CPU.
> > > + *
> > > + * Only valid when placing objects in
> > > I915_MEMORY_CLASS_DEVICE, and
> > > + * only strictly required on platforms where only some of
> > > the device
> > > + * memory is directly visible or mappable through the CPU,
> > > like
> > > on DG2+.
> > > + *
> > > + * One of the placements MUST also be
> > > I915_MEMORY_CLASS_SYSTEM, to
> > > + * ensure we can always spill the allocation to system
> > > memory, if we
> > > + * can't place the object in the mappable part of
> > > + * I915_MEMORY_CLASS_DEVICE.
> > > + *
> > > + * Note that buffers that need to be captured with
> > > EXEC_OBJECT_CAPTURE,
> > > + * will need to enable this hint, if the object can also be
> > > placed in
> > > + * I915_MEMORY_CLASS_DEVICE, starting from DG2+. The execbuf
> > > call
> > > will
> > > + * throw an error otherwise. This also means that such
> > > objects
> > > will need
> > > + * I915_MEMORY_CLASS_SYSTEM set as a possible placement.
> > > + *
> >
> > I wonder, should we try to migrate capture objects at execbuf time
> > instead on an on-demand basis? If migration fails, then we just
> > skip
> > capturing that object, similar to how the capture code handles
> > errors?
>
> So IIUC if the object has been marked for capture, unmark the TOPDOWN
> annotation, if it has been set, to force allocating in the mappable
> portion, or spill to system memory(if the placements allow it)? I
> think
> that should work.
Yes that would temporarily mean drop the TOPDOWN flag and migrate the
object if needed (we can do that async from inside execbuf AFAICT).
We'd need to make the TOPDOWN flag mutable and part of object-
>mem_flags
> Jon any thoughts
And on that subject, the TOPDOWN flag name is IMHO a bit misleading. It
really has a "GPU_ONLY" meaning, which translates to TOPDOWN
allocations in some memory regions only. For others (no small bar,
small bar + multi-tile, it might translate to nothing or to tile
selection)?
/Thomas
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 18/20] drm/i915/uapi: forbid ALLOC_TOPDOWN for error capture
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (16 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 17/20] drm/i915/uapi: add NEEDS_CPU_ACCESS hint Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-01-26 19:42 ` [Intel-gfx] " kernel test robot
` (2 more replies)
2022-01-26 15:21 ` [PATCH 19/20] drm/i915/lmem: don't treat small BAR as an error Matthew Auld
2022-01-26 15:21 ` [PATCH 20/20] HAX: DG1 small BAR Matthew Auld
19 siblings, 3 replies; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
On platforms where there might be non-mappable LMEM, force userspace to
mark the buffers with the correct hint. When dumping the BO contents
during capture we need CPU access. Note this only applies to buffers
that can be placed in LMEM, and also doesn't impact DG1.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 498b458fd784..3c8083852620 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1965,7 +1965,7 @@ eb_find_first_request_added(struct i915_execbuffer *eb)
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
-static void eb_capture_stage(struct i915_execbuffer *eb)
+static int eb_capture_stage(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
unsigned int i = count, j;
@@ -1978,6 +1978,9 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
if (!(flags & EXEC_OBJECT_CAPTURE))
continue;
+ if (vma->obj->flags & I915_BO_ALLOC_TOPDOWN)
+ return -EINVAL;
+
for_each_batch_create_order(eb, j) {
struct i915_capture_list *capture;
@@ -1990,6 +1993,8 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
eb->capture_lists[j] = capture;
}
}
+
+ return 0;
}
/* Commit once we're in the critical path */
@@ -3418,7 +3423,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
ww_acquire_done(&eb.ww.ctx);
- eb_capture_stage(&eb);
+ err = eb_capture_stage(&eb);
+ if (err)
+ goto err_vma;
out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
if (IS_ERR(out_fence)) {
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [Intel-gfx] [PATCH 18/20] drm/i915/uapi: forbid ALLOC_TOPDOWN for error capture
2022-01-26 15:21 ` [PATCH 18/20] drm/i915/uapi: forbid ALLOC_TOPDOWN for error capture Matthew Auld
@ 2022-01-26 19:42 ` kernel test robot
2022-01-26 20:03 ` kernel test robot
2022-02-03 9:43 ` Thomas Hellström
2 siblings, 0 replies; 50+ messages in thread
From: kernel test robot @ 2022-01-26 19:42 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: Thomas Hellström, kbuild-all, dri-devel
Hi Matthew,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm/drm-next v5.17-rc1 next-20220125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Matthew-Auld/Initial-support-for-small-BAR-recovery/20220126-232640
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-randconfig-a002-20220124 (https://download.01.org/0day-ci/archive/20220127/202201270314.tWKIUNdM-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# https://github.com/0day-ci/linux/commit/33b0a9f1f9810bd16cef89ce1e5787751583661e
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Matthew-Auld/Initial-support-for-small-BAR-recovery/20220126-232640
git checkout 33b0a9f1f9810bd16cef89ce1e5787751583661e
# save the config file to linux build tree
mkdir build_dir
make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/gpu/drm/i915/
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c: In function 'i915_gem_do_execbuffer':
>> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:3426:6: error: void value not ignored as it ought to be
3426 | err = eb_capture_stage(&eb);
| ^
vim +3426 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
3381
3382 if (args->flags & I915_EXEC_FENCE_OUT) {
3383 out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
3384 if (out_fence_fd < 0) {
3385 err = out_fence_fd;
3386 goto err_in_fence;
3387 }
3388 }
3389
3390 err = eb_create(&eb);
3391 if (err)
3392 goto err_out_fence;
3393
3394 GEM_BUG_ON(!eb.lut_size);
3395
3396 err = eb_select_context(&eb);
3397 if (unlikely(err))
3398 goto err_destroy;
3399
3400 err = eb_select_engine(&eb);
3401 if (unlikely(err))
3402 goto err_context;
3403
3404 err = eb_lookup_vmas(&eb);
3405 if (err) {
3406 eb_release_vmas(&eb, true);
3407 goto err_engine;
3408 }
3409
3410 i915_gem_ww_ctx_init(&eb.ww, true);
3411
3412 err = eb_relocate_parse(&eb);
3413 if (err) {
3414 /*
3415 * If the user expects the execobject.offset and
3416 * reloc.presumed_offset to be an exact match,
3417 * as for using NO_RELOC, then we cannot update
3418 * the execobject.offset until we have completed
3419 * relocation.
3420 */
3421 args->flags &= ~__EXEC_HAS_RELOC;
3422 goto err_vma;
3423 }
3424
3425 ww_acquire_done(&eb.ww.ctx);
> 3426 err = eb_capture_stage(&eb);
3427 if (err)
3428 goto err_vma;
3429
3430 out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
3431 if (IS_ERR(out_fence)) {
3432 err = PTR_ERR(out_fence);
3433 out_fence = NULL;
3434 if (eb.requests[0])
3435 goto err_request;
3436 else
3437 goto err_vma;
3438 }
3439
3440 err = eb_submit(&eb);
3441
3442 err_request:
3443 eb_requests_get(&eb);
3444 err = eb_requests_add(&eb, err);
3445
3446 if (eb.fences)
3447 signal_fence_array(&eb, eb.composite_fence ?
3448 eb.composite_fence :
3449 &eb.requests[0]->fence);
3450
3451 if (out_fence) {
3452 if (err == 0) {
3453 fd_install(out_fence_fd, out_fence->file);
3454 args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
3455 args->rsvd2 |= (u64)out_fence_fd << 32;
3456 out_fence_fd = -1;
3457 } else {
3458 fput(out_fence->file);
3459 }
3460 }
3461
3462 if (unlikely(eb.gem_context->syncobj)) {
3463 drm_syncobj_replace_fence(eb.gem_context->syncobj,
3464 eb.composite_fence ?
3465 eb.composite_fence :
3466 &eb.requests[0]->fence);
3467 }
3468
3469 if (!out_fence && eb.composite_fence)
3470 dma_fence_put(eb.composite_fence);
3471
3472 eb_requests_put(&eb);
3473
3474 err_vma:
3475 eb_release_vmas(&eb, true);
3476 WARN_ON(err == -EDEADLK);
3477 i915_gem_ww_ctx_fini(&eb.ww);
3478
3479 if (eb.batch_pool)
3480 intel_gt_buffer_pool_put(eb.batch_pool);
3481 err_engine:
3482 eb_put_engine(&eb);
3483 err_context:
3484 i915_gem_context_put(eb.gem_context);
3485 err_destroy:
3486 eb_destroy(&eb);
3487 err_out_fence:
3488 if (out_fence_fd != -1)
3489 put_unused_fd(out_fence_fd);
3490 err_in_fence:
3491 dma_fence_put(in_fence);
3492 err_ext:
3493 put_fence_array(eb.fences, eb.num_fences);
3494 return err;
3495 }
3496
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [Intel-gfx] [PATCH 18/20] drm/i915/uapi: forbid ALLOC_TOPDOWN for error capture
2022-01-26 15:21 ` [PATCH 18/20] drm/i915/uapi: forbid ALLOC_TOPDOWN for error capture Matthew Auld
2022-01-26 19:42 ` [Intel-gfx] " kernel test robot
@ 2022-01-26 20:03 ` kernel test robot
2022-02-03 9:43 ` Thomas Hellström
2 siblings, 0 replies; 50+ messages in thread
From: kernel test robot @ 2022-01-26 20:03 UTC (permalink / raw)
To: Matthew Auld, intel-gfx
Cc: Thomas Hellström, llvm, kbuild-all, dri-devel
Hi Matthew,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm/drm-next v5.17-rc1 next-20220125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Matthew-Auld/Initial-support-for-small-BAR-recovery/20220126-232640
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a013-20220124 (https://download.01.org/0day-ci/archive/20220127/202201270346.FZrPMvZl-lkp@intel.com/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 2a1b7aa016c0f4b5598806205bdfbab1ea2d92c4)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/33b0a9f1f9810bd16cef89ce1e5787751583661e
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Matthew-Auld/Initial-support-for-small-BAR-recovery/20220126-232640
git checkout 33b0a9f1f9810bd16cef89ce1e5787751583661e
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/gpu/drm/i915/
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:3426:6: error: assigning to 'int' from incompatible type 'void'
err = eb_capture_stage(&eb);
^ ~~~~~~~~~~~~~~~~~~~~~
1 error generated.
vim +3426 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
3381
3382 if (args->flags & I915_EXEC_FENCE_OUT) {
3383 out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
3384 if (out_fence_fd < 0) {
3385 err = out_fence_fd;
3386 goto err_in_fence;
3387 }
3388 }
3389
3390 err = eb_create(&eb);
3391 if (err)
3392 goto err_out_fence;
3393
3394 GEM_BUG_ON(!eb.lut_size);
3395
3396 err = eb_select_context(&eb);
3397 if (unlikely(err))
3398 goto err_destroy;
3399
3400 err = eb_select_engine(&eb);
3401 if (unlikely(err))
3402 goto err_context;
3403
3404 err = eb_lookup_vmas(&eb);
3405 if (err) {
3406 eb_release_vmas(&eb, true);
3407 goto err_engine;
3408 }
3409
3410 i915_gem_ww_ctx_init(&eb.ww, true);
3411
3412 err = eb_relocate_parse(&eb);
3413 if (err) {
3414 /*
3415 * If the user expects the execobject.offset and
3416 * reloc.presumed_offset to be an exact match,
3417 * as for using NO_RELOC, then we cannot update
3418 * the execobject.offset until we have completed
3419 * relocation.
3420 */
3421 args->flags &= ~__EXEC_HAS_RELOC;
3422 goto err_vma;
3423 }
3424
3425 ww_acquire_done(&eb.ww.ctx);
> 3426 err = eb_capture_stage(&eb);
3427 if (err)
3428 goto err_vma;
3429
3430 out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
3431 if (IS_ERR(out_fence)) {
3432 err = PTR_ERR(out_fence);
3433 out_fence = NULL;
3434 if (eb.requests[0])
3435 goto err_request;
3436 else
3437 goto err_vma;
3438 }
3439
3440 err = eb_submit(&eb);
3441
3442 err_request:
3443 eb_requests_get(&eb);
3444 err = eb_requests_add(&eb, err);
3445
3446 if (eb.fences)
3447 signal_fence_array(&eb, eb.composite_fence ?
3448 eb.composite_fence :
3449 &eb.requests[0]->fence);
3450
3451 if (out_fence) {
3452 if (err == 0) {
3453 fd_install(out_fence_fd, out_fence->file);
3454 args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
3455 args->rsvd2 |= (u64)out_fence_fd << 32;
3456 out_fence_fd = -1;
3457 } else {
3458 fput(out_fence->file);
3459 }
3460 }
3461
3462 if (unlikely(eb.gem_context->syncobj)) {
3463 drm_syncobj_replace_fence(eb.gem_context->syncobj,
3464 eb.composite_fence ?
3465 eb.composite_fence :
3466 &eb.requests[0]->fence);
3467 }
3468
3469 if (!out_fence && eb.composite_fence)
3470 dma_fence_put(eb.composite_fence);
3471
3472 eb_requests_put(&eb);
3473
3474 err_vma:
3475 eb_release_vmas(&eb, true);
3476 WARN_ON(err == -EDEADLK);
3477 i915_gem_ww_ctx_fini(&eb.ww);
3478
3479 if (eb.batch_pool)
3480 intel_gt_buffer_pool_put(eb.batch_pool);
3481 err_engine:
3482 eb_put_engine(&eb);
3483 err_context:
3484 i915_gem_context_put(eb.gem_context);
3485 err_destroy:
3486 eb_destroy(&eb);
3487 err_out_fence:
3488 if (out_fence_fd != -1)
3489 put_unused_fd(out_fence_fd);
3490 err_in_fence:
3491 dma_fence_put(in_fence);
3492 err_ext:
3493 put_fence_array(eb.fences, eb.num_fences);
3494 return err;
3495 }
3496
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 18/20] drm/i915/uapi: forbid ALLOC_TOPDOWN for error capture
2022-01-26 15:21 ` [PATCH 18/20] drm/i915/uapi: forbid ALLOC_TOPDOWN for error capture Matthew Auld
2022-01-26 19:42 ` [Intel-gfx] " kernel test robot
2022-01-26 20:03 ` kernel test robot
@ 2022-02-03 9:43 ` Thomas Hellström
2022-02-03 9:44 ` Matthew Auld
2 siblings, 1 reply; 50+ messages in thread
From: Thomas Hellström @ 2022-02-03 9:43 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> On platforms where there might be non-mappable LMEM, force userspace to
> mark the buffers with the correct hint. When dumping the BO contents
> during capture we need CPU access. Note this only applies to buffers
> that can be placed in LMEM, and also doesn't impact DG1.
Oddly enough this seems to break DG1. We probably need to understand why.
/Thomas
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 11 +++++++++--
> 1 file changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 498b458fd784..3c8083852620 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -1965,7 +1965,7 @@ eb_find_first_request_added(struct i915_execbuffer *eb)
> #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
>
> /* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
> -static void eb_capture_stage(struct i915_execbuffer *eb)
> +static int eb_capture_stage(struct i915_execbuffer *eb)
> {
> const unsigned int count = eb->buffer_count;
> unsigned int i = count, j;
> @@ -1978,6 +1978,9 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
> if (!(flags & EXEC_OBJECT_CAPTURE))
> continue;
>
> + if (vma->obj->flags & I915_BO_ALLOC_TOPDOWN)
> + return -EINVAL;
> +
> for_each_batch_create_order(eb, j) {
> struct i915_capture_list *capture;
>
> @@ -1990,6 +1993,8 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
> eb->capture_lists[j] = capture;
> }
> }
> +
> + return 0;
> }
>
> /* Commit once we're in the critical path */
> @@ -3418,7 +3423,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> }
>
> ww_acquire_done(&eb.ww.ctx);
> - eb_capture_stage(&eb);
> + err = eb_capture_stage(&eb);
> + if (err)
> + goto err_vma;
>
> out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
> if (IS_ERR(out_fence)) {
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 18/20] drm/i915/uapi: forbid ALLOC_TOPDOWN for error capture
2022-02-03 9:43 ` Thomas Hellström
@ 2022-02-03 9:44 ` Matthew Auld
0 siblings, 0 replies; 50+ messages in thread
From: Matthew Auld @ 2022-02-03 9:44 UTC (permalink / raw)
To: Thomas Hellström, intel-gfx; +Cc: dri-devel
On 03/02/2022 09:43, Thomas Hellström wrote:
> On 1/26/22 16:21, Matthew Auld wrote:
>> On platforms where there might be non-mappable LMEM, force userspace to
>> mark the buffers with the correct hint. When dumping the BO contents
>> during capture we need CPU access. Note this only applies to buffers
>> that can be placed in LMEM, and also doesn't impact DG1.
>
> Oddly enough this seems to break DG1. We probably need to understand why.
I think that's just because of the last HAX patch.
>
> /Thomas
>
>
>
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> ---
>> drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 11 +++++++++--
>> 1 file changed, 9 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index 498b458fd784..3c8083852620 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -1965,7 +1965,7 @@ eb_find_first_request_added(struct
>> i915_execbuffer *eb)
>> #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
>> /* Stage with GFP_KERNEL allocations before we enter the signaling
>> critical path */
>> -static void eb_capture_stage(struct i915_execbuffer *eb)
>> +static int eb_capture_stage(struct i915_execbuffer *eb)
>> {
>> const unsigned int count = eb->buffer_count;
>> unsigned int i = count, j;
>> @@ -1978,6 +1978,9 @@ static void eb_capture_stage(struct
>> i915_execbuffer *eb)
>> if (!(flags & EXEC_OBJECT_CAPTURE))
>> continue;
>> + if (vma->obj->flags & I915_BO_ALLOC_TOPDOWN)
>> + return -EINVAL;
>> +
>> for_each_batch_create_order(eb, j) {
>> struct i915_capture_list *capture;
>> @@ -1990,6 +1993,8 @@ static void eb_capture_stage(struct
>> i915_execbuffer *eb)
>> eb->capture_lists[j] = capture;
>> }
>> }
>> +
>> + return 0;
>> }
>> /* Commit once we're in the critical path */
>> @@ -3418,7 +3423,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>> }
>> ww_acquire_done(&eb.ww.ctx);
>> - eb_capture_stage(&eb);
>> + err = eb_capture_stage(&eb);
>> + if (err)
>> + goto err_vma;
>> out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
>> if (IS_ERR(out_fence)) {
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 19/20] drm/i915/lmem: don't treat small BAR as an error
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (17 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 18/20] drm/i915/uapi: forbid ALLOC_TOPDOWN for error capture Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
2022-02-03 9:48 ` Thomas Hellström
2022-01-26 15:21 ` [PATCH 20/20] HAX: DG1 small BAR Matthew Auld
19 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: Thomas Hellström, dri-devel
Just pass along the probed io_size. The backend should be able to
utilize the entire range here, even if some of it is non-mappable.
It does leave open with what to do with stolen local-memory.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_region_lmem.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index 2c7ec7ff79fd..b788fc2b3df8 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -200,6 +200,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
struct intel_memory_region *mem;
resource_size_t min_page_size;
resource_size_t io_start;
+ resource_size_t io_size;
resource_size_t lmem_size;
int err;
@@ -210,7 +211,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
lmem_size = intel_uncore_read64(uncore, GEN12_GSMBASE);
io_start = pci_resource_start(pdev, 2);
- if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2)))
+ io_size = min(pci_resource_len(pdev, 2), lmem_size);
+ if (!io_size)
return ERR_PTR(-ENODEV);
min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
@@ -220,7 +222,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
lmem_size,
min_page_size,
io_start,
- lmem_size,
+ io_size,
INTEL_MEMORY_LOCAL,
0,
&intel_region_lmem_ops);
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread
* Re: [PATCH 19/20] drm/i915/lmem: don't treat small BAR as an error
2022-01-26 15:21 ` [PATCH 19/20] drm/i915/lmem: don't treat small BAR as an error Matthew Auld
@ 2022-02-03 9:48 ` Thomas Hellström
2022-02-03 11:18 ` Matthew Auld
0 siblings, 1 reply; 50+ messages in thread
From: Thomas Hellström @ 2022-02-03 9:48 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On 1/26/22 16:21, Matthew Auld wrote:
> Just pass along the probed io_size. The backend should be able to
> utilize the entire range here, even if some of it is non-mappable.
Changes here LGTM.
>
> It does leave open with what to do with stolen local-memory.
Are objects in stolen local required to be mappable?
/Thomas
>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_region_lmem.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
> index 2c7ec7ff79fd..b788fc2b3df8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
> +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
> @@ -200,6 +200,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
> struct intel_memory_region *mem;
> resource_size_t min_page_size;
> resource_size_t io_start;
> + resource_size_t io_size;
> resource_size_t lmem_size;
> int err;
>
> @@ -210,7 +211,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
> lmem_size = intel_uncore_read64(uncore, GEN12_GSMBASE);
>
> io_start = pci_resource_start(pdev, 2);
> - if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2)))
> + io_size = min(pci_resource_len(pdev, 2), lmem_size);
> + if (!io_size)
> return ERR_PTR(-ENODEV);
>
> min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
> @@ -220,7 +222,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
> lmem_size,
> min_page_size,
> io_start,
> - lmem_size,
> + io_size,
> INTEL_MEMORY_LOCAL,
> 0,
> &intel_region_lmem_ops);
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 19/20] drm/i915/lmem: don't treat small BAR as an error
2022-02-03 9:48 ` Thomas Hellström
@ 2022-02-03 11:18 ` Matthew Auld
2022-02-03 13:56 ` Thomas Hellström
0 siblings, 1 reply; 50+ messages in thread
From: Matthew Auld @ 2022-02-03 11:18 UTC (permalink / raw)
To: Thomas Hellström, intel-gfx; +Cc: dri-devel
On 03/02/2022 09:48, Thomas Hellström wrote:
>
> On 1/26/22 16:21, Matthew Auld wrote:
>> Just pass along the probed io_size. The backend should be able to
>> utilize the entire range here, even if some of it is non-mappable.
> Changes here LGTM.
>>
>> It does leave open with what to do with stolen local-memory.
>
> Are objects in stolen local required to be mappable?
From a quick look I don't really see such users on discrete, outside of
maybe intelfb_create(), where I guess the initial fb might be located in
stolen on DG1. But from DG2+ it looks like it will just be located in
normal LMEM. For that I was thinking we add something like
i915_gem_object_create_region_at(), and somehow wire that up to the
{fpfn, lpfn}...
>
> /Thomas
>
>
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> ---
>> drivers/gpu/drm/i915/gt/intel_region_lmem.c | 6 ++++--
>> 1 file changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
>> b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
>> index 2c7ec7ff79fd..b788fc2b3df8 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
>> @@ -200,6 +200,7 @@ static struct intel_memory_region
>> *setup_lmem(struct intel_gt *gt)
>> struct intel_memory_region *mem;
>> resource_size_t min_page_size;
>> resource_size_t io_start;
>> + resource_size_t io_size;
>> resource_size_t lmem_size;
>> int err;
>> @@ -210,7 +211,8 @@ static struct intel_memory_region
>> *setup_lmem(struct intel_gt *gt)
>> lmem_size = intel_uncore_read64(uncore, GEN12_GSMBASE);
>> io_start = pci_resource_start(pdev, 2);
>> - if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2)))
>> + io_size = min(pci_resource_len(pdev, 2), lmem_size);
>> + if (!io_size)
>> return ERR_PTR(-ENODEV);
>> min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
>> @@ -220,7 +222,7 @@ static struct intel_memory_region
>> *setup_lmem(struct intel_gt *gt)
>> lmem_size,
>> min_page_size,
>> io_start,
>> - lmem_size,
>> + io_size,
>> INTEL_MEMORY_LOCAL,
>> 0,
>> &intel_region_lmem_ops);
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 19/20] drm/i915/lmem: don't treat small BAR as an error
2022-02-03 11:18 ` Matthew Auld
@ 2022-02-03 13:56 ` Thomas Hellström
2022-02-03 14:09 ` Matthew Auld
0 siblings, 1 reply; 50+ messages in thread
From: Thomas Hellström @ 2022-02-03 13:56 UTC (permalink / raw)
To: Matthew Auld, intel-gfx; +Cc: dri-devel
On Thu, 2022-02-03 at 11:18 +0000, Matthew Auld wrote:
> On 03/02/2022 09:48, Thomas Hellström wrote:
> >
> > On 1/26/22 16:21, Matthew Auld wrote:
> > > Just pass along the probed io_size. The backend should be able to
> > > utilize the entire range here, even if some of it is non-
> > > mappable.
> > Changes here LGTM.
> > >
> > > It does leave open with what to do with stolen local-memory.
> >
> > Are objects in stolen local required to be mappable?
>
> From a quick look I don't really see such users on discrete, outside
> of
> maybe intelfb_create(), where I guess the initial fb might be located
> in
> stolen on DG1. But from DG2+ it looks like it will just be located in
> normal LMEM. For that I was thinking we add something like
> i915_gem_object_create_region_at(), and somehow wire that up to the
> {fpfn, lpfn}...
So we could then skip STOLEN completely on DG2+? Could we then also do
the same on DG1, at least assuming that creating and pinning an object
for that initial fb would be done before any other pinning into LMEM?
/Thomas
^ permalink raw reply [flat|nested] 50+ messages in thread
* Re: [PATCH 19/20] drm/i915/lmem: don't treat small BAR as an error
2022-02-03 13:56 ` Thomas Hellström
@ 2022-02-03 14:09 ` Matthew Auld
0 siblings, 0 replies; 50+ messages in thread
From: Matthew Auld @ 2022-02-03 14:09 UTC (permalink / raw)
To: Thomas Hellström, intel-gfx; +Cc: dri-devel
On 03/02/2022 13:56, Thomas Hellström wrote:
> On Thu, 2022-02-03 at 11:18 +0000, Matthew Auld wrote:
>> On 03/02/2022 09:48, Thomas Hellström wrote:
>>>
>>> On 1/26/22 16:21, Matthew Auld wrote:
>>>> Just pass along the probed io_size. The backend should be able to
>>>> utilize the entire range here, even if some of it is non-
>>>> mappable.
>>> Changes here LGTM.
>>>>
>>>> It does leave open with what to do with stolen local-memory.
>>>
>>> Are objects in stolen local required to be mappable?
>>
>> From a quick look I don't really see such users on discrete, outside
>> of
>> maybe intelfb_create(), where I guess the initial fb might be located
>> in
>> stolen on DG1. But from DG2+ it looks like it will just be located in
>> normal LMEM. For that I was thinking we add something like
>> i915_gem_object_create_region_at(), and somehow wire that up to the
>> {fpfn, lpfn}...
>
> So we could then skip STOLEN completely on DG2+? Could we then also do
> the same on DG1, at least assuming that creating and pinning an object
> for that initial fb would be done before any other pinning into LMEM?
It looks like fbc is the main user on discrete, AFAICT, but that doesn't
seem to use the gem object interface, and instead just plugs into the
underlying drm_mm directly. So AFAIK we still want stolen on DG2/DG1 for
that.
>
> /Thomas
>
>
^ permalink raw reply [flat|nested] 50+ messages in thread
* [PATCH 20/20] HAX: DG1 small BAR
2022-01-26 15:21 [PATCH 00/20] Initial support for small BAR recovery Matthew Auld
` (18 preceding siblings ...)
2022-01-26 15:21 ` [PATCH 19/20] drm/i915/lmem: don't treat small BAR as an error Matthew Auld
@ 2022-01-26 15:21 ` Matthew Auld
19 siblings, 0 replies; 50+ messages in thread
From: Matthew Auld @ 2022-01-26 15:21 UTC (permalink / raw)
To: intel-gfx; +Cc: dri-devel
Just for CI.
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_create.c | 5 ++---
drivers/gpu/drm/i915/gt/intel_region_lmem.c | 2 +-
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 98d63cb21e94..6e6a3f6685ab 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -437,9 +437,8 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM)))
return -EINVAL;
} else {
- if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
- ext_data.placements[0]->type !=
- INTEL_MEMORY_SYSTEM))
+ if (ext_data.n_placements > 1 ||
+ ext_data.placements[0]->type != INTEL_MEMORY_SYSTEM)
ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index b788fc2b3df8..a99516d2b706 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -211,7 +211,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
lmem_size = intel_uncore_read64(uncore, GEN12_GSMBASE);
io_start = pci_resource_start(pdev, 2);
- io_size = min(pci_resource_len(pdev, 2), lmem_size);
+ io_size = SZ_256M;
if (!io_size)
return ERR_PTR(-ENODEV);
--
2.34.1
^ permalink raw reply related [flat|nested] 50+ messages in thread