* [PATCH 1/3] drm/ttm: move swapout logic around @ 2021-03-15 16:04 Christian König 2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König ` (3 more replies) 0 siblings, 4 replies; 18+ messages in thread From: Christian König @ 2021-03-15 16:04 UTC (permalink / raw) To: dri-devel; +Cc: ray.huang Move the iteration of the global lru into the new function ttm_global_swapout() and use that instead in drivers. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++--------------------- drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++ drivers/gpu/drm/ttm/ttm_tt.c | 2 +- drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/ttm/ttm_bo_api.h | 3 +- include/drm/ttm/ttm_device.h | 2 + 7 files changed, 53 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a08dec7281fc..56d2e38af273 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1186,56 +1186,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_wait); -/* - * A buffer object shrink method that tries to swap out the first - * buffer object on the bo_global::swap_lru list. - */ -int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob; - struct ttm_buffer_object *bo; - int ret = -EBUSY; bool locked; - unsigned i; - - spin_lock(&glob->lru_lock); - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - list_for_each_entry(bo, &glob->swap_lru[i], swap) { - if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, - NULL)) - continue; - - if (!ttm_bo_get_unless_zero(bo)) { - if (locked) - dma_resv_unlock(bo->base.resv); - continue; - } + int ret; - ret = 0; - break; - } - if (!ret) - break; - } + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL)) + return -EBUSY; - if (ret) { - spin_unlock(&glob->lru_lock); - return ret; + if (!ttm_bo_get_unless_zero(bo)) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; } if (bo->deleted) { - ret = ttm_bo_cleanup_refs(bo, false, false, locked); + ttm_bo_cleanup_refs(bo, false, false, locked); ttm_bo_put(bo); - return ret; + return 0; } ttm_bo_del_from_lru(bo); + /* TODO: Cleanup the locking */ spin_unlock(&glob->lru_lock); - /** + /* * Move to system cached */ - if (bo->mem.mem_type != TTM_PL_SYSTEM) { struct ttm_operation_ctx ctx = { false, false }; struct ttm_resource evict_mem; @@ -1255,29 +1234,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) } } - /** + /* * Make sure BO is idle. */ - ret = ttm_bo_wait(bo, false, false); if (unlikely(ret != 0)) goto out; ttm_bo_unmap_virtual(bo); - /** + /* * Swap out. Buffer will be swapped in again as soon as * anyone tries to access a ttm page. */ - if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo); ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); out: - /** - * + /* * Unreserve without putting on LRU to avoid swapping out an * already swapped buffer. */ @@ -1286,7 +1262,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) ttm_bo_put(bo); return ret; } -EXPORT_SYMBOL(ttm_bo_swapout); void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) { diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 95e1b7b1f2e6..dfc2a7e4e490 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -102,6 +102,35 @@ static int ttm_global_init(void) return ret; } +/** + * A buffer object shrink method that tries to swap out the first + * buffer object on the global::swap_lru list. + */ +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) +{ + struct ttm_global *glob = &ttm_glob; + struct ttm_buffer_object *bo; + unsigned i; + int ret; + + spin_lock(&glob->lru_lock); + for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { + list_for_each_entry(bo, &glob->swap_lru[i], swap) { + uint32_t num_pages = bo->ttm->num_pages; + + ret = ttm_bo_swapout(bo, ctx, gfp_flags); + /* ttm_bo_swapout has dropped the lru_lock */ + if (!ret) + return num_pages; + if (ret != -EBUSY) + return ret; + } + } + spin_unlock(&glob->lru_lock); + return 0; +} +EXPORT_SYMBOL(ttm_global_swapout); + static void ttm_init_sysman(struct ttm_device *bdev) { struct ttm_resource_manager *man = &bdev->sysman; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 2f0833c98d2c..95b5cff25f4c 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink, }; int ret; - ret = ttm_bo_swapout(&ctx, GFP_NOFS); + ret = ttm_global_swapout(&ctx, GFP_NOFS); return ret < 0 ? SHRINK_EMPTY : ret; } diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c index e972af07d029..104b95a8c7a2 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_memory.c +++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c @@ -38,6 +38,7 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> +#include <drm/ttm/ttm_device.h> #include "ttm_memory.h" @@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq, while (ttm_zones_above_swap_target(glob, from_wq, extra)) { spin_unlock(&glob->lock); - ret = ttm_bo_swapout(ctx, GFP_KERNEL); + ret = ttm_global_swapout(ctx, GFP_KERNEL); spin_lock(&glob->lock); if (unlikely(ret < 0)) break; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 6910111099c8..b991422e156c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv); - while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0); + while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0); if (dev_priv->enable_fb) vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) { diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 4fb523dfab32..5044ac330858 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp, const char __user *wbuf, char __user *rbuf, size_t count, loff_t *f_pos, bool write); -int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags); /** * ttm_bo_uses_embedded_gem_object - check if the given bo uses the diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 035bbc044a3b..6a0b267d4fe6 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -297,6 +297,8 @@ struct ttm_device { struct delayed_work wq; }; +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); + static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) { -- 2.25.1 _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 2/3] drm/ttm: remove swap LRU v2 2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König @ 2021-03-15 16:04 ` Christian König 2021-03-15 18:54 ` kernel test robot 2021-03-15 18:54 ` Matthew Auld 2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König ` (2 subsequent siblings) 3 siblings, 2 replies; 18+ messages in thread From: Christian König @ 2021-03-15 16:04 UTC (permalink / raw) To: dri-devel; +Cc: ray.huang Instead evict round robin from each devices SYSTEM and TT domain. v2: reorder num_pages access reported by Dan's script Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/ttm/ttm_bo.c | 33 ++-------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - drivers/gpu/drm/ttm/ttm_device.c | 60 +++++++++++++++++++++-------- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/ttm/ttm_bo_api.h | 1 - include/drm/ttm/ttm_bo_driver.h | 1 - include/drm/ttm/ttm_device.h | 7 +--- 7 files changed, 52 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 56d2e38af273..a1be88be357b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_device *bdev = bo->bdev; - list_del_init(&bo->swap); list_del_init(&bo->lru); if (bdev->funcs->del_from_lru_notify) @@ -104,16 +103,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]); - if (man->use_tt && bo->ttm && - !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | - TTM_PAGE_FLAG_SWAPPED))) { - struct list_head *swap; - - swap = &ttm_glob.swap_lru[bo->priority]; - list_move_tail(&bo->swap, swap); - } else { - list_del_init(&bo->swap); - } if (bdev->funcs->del_from_lru_notify) bdev->funcs->del_from_lru_notify(bo); @@ -128,9 +117,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo); break; } - if (bo->ttm && !(bo->ttm->page_flags & - (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) - ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo); } } EXPORT_SYMBOL(ttm_bo_move_to_lru_tail); @@ -168,20 +154,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk) list_bulk_move_tail(&man->lru[i], &pos->first->lru, &pos->last->lru); } - - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i]; - struct list_head *lru; - - if (!pos->first) - continue; - - dma_resv_assert_held(pos->first->base.resv); - dma_resv_assert_held(pos->last->base.resv); - - lru = &ttm_glob.swap_lru[i]; - list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap); - } } EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail); @@ -1058,7 +1030,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, kref_init(&bo->kref); INIT_LIST_HEAD(&bo->lru); INIT_LIST_HEAD(&bo->ddestroy); - INIT_LIST_HEAD(&bo->swap); bo->bdev = bdev; bo->type = type; bo->mem.mem_type = TTM_PL_SYSTEM; @@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, bool locked; int ret; + if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | + TTM_PAGE_FLAG_SWAPPED)) + return false; + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL)) return -EBUSY; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 031e5819fec4..a2a17c84ceb3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, atomic_inc(&ttm_glob.bo_count); INIT_LIST_HEAD(&fbo->base.ddestroy); INIT_LIST_HEAD(&fbo->base.lru); - INIT_LIST_HEAD(&fbo->base.swap); fbo->base.moving = NULL; drm_vma_node_reset(&fbo->base.base.vma_node); diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index dfc2a7e4e490..2c280fb1e992 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -67,7 +67,6 @@ static int ttm_global_init(void) unsigned long num_pages; struct sysinfo si; int ret = 0; - unsigned i; mutex_lock(&ttm_global_mutex); if (++ttm_glob_use_count > 1) @@ -90,8 +89,6 @@ static int ttm_global_init(void) goto out; } - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) - INIT_LIST_HEAD(&glob->swap_lru[i]); INIT_LIST_HEAD(&glob->device_list); atomic_set(&glob->bo_count, 0); @@ -109,27 +106,60 @@ static int ttm_global_init(void) long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob; + struct ttm_device *bdev; + int ret = -EBUSY; + + mutex_lock(&ttm_global_mutex); + list_for_each_entry(bdev, &glob->device_list, device_list) { + ret = ttm_device_swapout(bdev, ctx, gfp_flags); + if (ret > 0) { + list_move_tail(&bdev->device_list, &glob->device_list); + break; + } + } + mutex_unlock(&ttm_global_mutex); + return ret; +} +EXPORT_SYMBOL(ttm_global_swapout); + +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags) +{ + struct ttm_global *glob = &ttm_glob; + struct ttm_resource_manager *man; struct ttm_buffer_object *bo; - unsigned i; + unsigned i, j; int ret; spin_lock(&glob->lru_lock); - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - list_for_each_entry(bo, &glob->swap_lru[i], swap) { - uint32_t num_pages = bo->ttm->num_pages; - - ret = ttm_bo_swapout(bo, ctx, gfp_flags); - /* ttm_bo_swapout has dropped the lru_lock */ - if (!ret) - return num_pages; - if (ret != -EBUSY) - return ret; + for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { + man = ttm_manager_type(bdev, i); + if (!man || !man->use_tt) + continue; + + for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { + list_for_each_entry(bo, &man->lru[j], lru) { + long num_pages; + + if (!bo->ttm || + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) + continue; + + num_pages = bo->ttm->num_pages; + ret = ttm_bo_swapout(bo, ctx, gfp_flags); + /* ttm_bo_swapout has dropped the lru_lock */ + if (!ret) + return num_pages; + if (ret != -EBUSY) + return ret; + } } } spin_unlock(&glob->lru_lock); return 0; } -EXPORT_SYMBOL(ttm_global_swapout); +EXPORT_SYMBOL(ttm_device_swapout); static void ttm_init_sysman(struct ttm_device *bdev) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index b991422e156c..0e82b0662d9e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv); - while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0); + while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0); if (dev_priv->enable_fb) vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) { diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 5044ac330858..3587f660e8f4 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -144,7 +144,6 @@ struct ttm_buffer_object { struct list_head lru; struct list_head ddestroy; - struct list_head swap; /** * Members protected by a bo reservation. diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 8959c0075cfd..d007feef7676 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos { struct ttm_lru_bulk_move { struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY]; struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY]; - struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY]; }; /* diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 6a0b267d4fe6..cda6efb4c34b 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -63,11 +63,6 @@ extern struct ttm_global { */ struct list_head device_list; - /** - * Protected by the lru_lock. - */ - struct list_head swap_lru[TTM_MAX_BO_PRIORITY]; - /** * Internal protection. */ @@ -298,6 +293,8 @@ struct ttm_device { }; long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags); static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) -- 2.25.1 _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2 2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König @ 2021-03-15 18:54 ` kernel test robot 2021-03-15 18:54 ` Matthew Auld 1 sibling, 0 replies; 18+ messages in thread From: kernel test robot @ 2021-03-15 18:54 UTC (permalink / raw) To: Christian König, dri-devel; +Cc: ray.huang, kbuild-all [-- Attachment #1: Type: text/plain, Size: 3216 bytes --] Hi "Christian, I love your patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-randconfig-s002-20210315 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce: # apt-get install sparse # sparse version: v0.6.3-277-gc089cd2d-dirty # https://github.com/0day-ci/linux/commit/70ae63f3a85b9791dfcf38034c304aedda122e7b git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 git checkout 70ae63f3a85b9791dfcf38034c304aedda122e7b # save the attached .config to linux build tree make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> "sparse warnings: (new ones prefixed by >>)" drivers/gpu/drm/ttm/ttm_device.c:42:1: sparse: sparse: symbol 'ttm_global_mutex' was not declared. Should it be static? drivers/gpu/drm/ttm/ttm_device.c:43:10: sparse: sparse: symbol 'ttm_glob_use_count' was not declared. Should it be static? >> drivers/gpu/drm/ttm/ttm_device.c:125:6: sparse: sparse: context imbalance in 'ttm_device_swapout' - wrong count at exit vim +/ttm_device_swapout +125 drivers/gpu/drm/ttm/ttm_device.c 124 > 125 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, 126 gfp_t gfp_flags) 127 { 128 struct ttm_global *glob = &ttm_glob; 129 struct ttm_resource_manager *man; 130 struct ttm_buffer_object *bo; 131 unsigned i, j; 132 int ret; 133 134 spin_lock(&glob->lru_lock); 135 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { 136 man = ttm_manager_type(bdev, i); 137 if (!man || !man->use_tt) 138 continue; 139 140 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { 141 list_for_each_entry(bo, &man->lru[j], lru) { 142 long num_pages; 143 144 if (!bo->ttm || 145 bo->ttm->page_flags & TTM_PAGE_FLAG_SG || 146 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) 147 continue; 148 149 num_pages = bo->ttm->num_pages; 150 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 151 /* ttm_bo_swapout has dropped the lru_lock */ 152 if (!ret) 153 return num_pages; 154 if (ret != -EBUSY) 155 return ret; 156 } 157 } 158 } 159 spin_unlock(&glob->lru_lock); 160 return 0; 161 } 162 EXPORT_SYMBOL(ttm_device_swapout); 163 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org [-- Attachment #2: .config.gz --] [-- Type: application/gzip, Size: 39354 bytes --] [-- Attachment #3: Type: text/plain, Size: 160 bytes --] _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2 @ 2021-03-15 18:54 ` kernel test robot 0 siblings, 0 replies; 18+ messages in thread From: kernel test robot @ 2021-03-15 18:54 UTC (permalink / raw) To: kbuild-all [-- Attachment #1: Type: text/plain, Size: 3297 bytes --] Hi "Christian, I love your patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-randconfig-s002-20210315 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce: # apt-get install sparse # sparse version: v0.6.3-277-gc089cd2d-dirty # https://github.com/0day-ci/linux/commit/70ae63f3a85b9791dfcf38034c304aedda122e7b git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 git checkout 70ae63f3a85b9791dfcf38034c304aedda122e7b # save the attached .config to linux build tree make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> "sparse warnings: (new ones prefixed by >>)" drivers/gpu/drm/ttm/ttm_device.c:42:1: sparse: sparse: symbol 'ttm_global_mutex' was not declared. Should it be static? drivers/gpu/drm/ttm/ttm_device.c:43:10: sparse: sparse: symbol 'ttm_glob_use_count' was not declared. Should it be static? >> drivers/gpu/drm/ttm/ttm_device.c:125:6: sparse: sparse: context imbalance in 'ttm_device_swapout' - wrong count at exit vim +/ttm_device_swapout +125 drivers/gpu/drm/ttm/ttm_device.c 124 > 125 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, 126 gfp_t gfp_flags) 127 { 128 struct ttm_global *glob = &ttm_glob; 129 struct ttm_resource_manager *man; 130 struct ttm_buffer_object *bo; 131 unsigned i, j; 132 int ret; 133 134 spin_lock(&glob->lru_lock); 135 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { 136 man = ttm_manager_type(bdev, i); 137 if (!man || !man->use_tt) 138 continue; 139 140 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { 141 list_for_each_entry(bo, &man->lru[j], lru) { 142 long num_pages; 143 144 if (!bo->ttm || 145 bo->ttm->page_flags & TTM_PAGE_FLAG_SG || 146 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) 147 continue; 148 149 num_pages = bo->ttm->num_pages; 150 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 151 /* ttm_bo_swapout has dropped the lru_lock */ 152 if (!ret) 153 return num_pages; 154 if (ret != -EBUSY) 155 return ret; 156 } 157 } 158 } 159 spin_unlock(&glob->lru_lock); 160 return 0; 161 } 162 EXPORT_SYMBOL(ttm_device_swapout); 163 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org [-- Attachment #2: config.gz --] [-- Type: application/gzip, Size: 39354 bytes --] ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2 2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König 2021-03-15 18:54 ` kernel test robot @ 2021-03-15 18:54 ` Matthew Auld 2021-03-15 19:27 ` Christian König 1 sibling, 1 reply; 18+ messages in thread From: Matthew Auld @ 2021-03-15 18:54 UTC (permalink / raw) To: Christian König; +Cc: ray.huang, ML dri-devel On Mon, 15 Mar 2021 at 16:04, Christian König <ckoenig.leichtzumerken@gmail.com> wrote: > > Instead evict round robin from each devices SYSTEM and TT domain. > > v2: reorder num_pages access reported by Dan's script > > Signed-off-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/ttm/ttm_bo.c | 33 ++-------------- > drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - > drivers/gpu/drm/ttm/ttm_device.c | 60 +++++++++++++++++++++-------- > drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- > include/drm/ttm/ttm_bo_api.h | 1 - > include/drm/ttm/ttm_bo_driver.h | 1 - > include/drm/ttm/ttm_device.h | 7 +--- > 7 files changed, 52 insertions(+), 53 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index 56d2e38af273..a1be88be357b 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) > { > struct ttm_device *bdev = bo->bdev; > > - list_del_init(&bo->swap); > list_del_init(&bo->lru); > > if (bdev->funcs->del_from_lru_notify) > @@ -104,16 +103,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, > > man = ttm_manager_type(bdev, mem->mem_type); > list_move_tail(&bo->lru, &man->lru[bo->priority]); > - if (man->use_tt && bo->ttm && > - !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | > - TTM_PAGE_FLAG_SWAPPED))) { > - struct list_head *swap; > - > - swap = &ttm_glob.swap_lru[bo->priority]; > - list_move_tail(&bo->swap, swap); > - } else { > - list_del_init(&bo->swap); > - } > > if (bdev->funcs->del_from_lru_notify) > bdev->funcs->del_from_lru_notify(bo); > @@ -128,9 +117,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, > ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo); > break; > } > - if (bo->ttm && !(bo->ttm->page_flags & > - (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) > - ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo); > } > } > EXPORT_SYMBOL(ttm_bo_move_to_lru_tail); > @@ -168,20 +154,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk) > list_bulk_move_tail(&man->lru[i], &pos->first->lru, > &pos->last->lru); > } > - > - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > - struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i]; > - struct list_head *lru; > - > - if (!pos->first) > - continue; > - > - dma_resv_assert_held(pos->first->base.resv); > - dma_resv_assert_held(pos->last->base.resv); > - > - lru = &ttm_glob.swap_lru[i]; > - list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap); > - } > } > EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail); > > @@ -1058,7 +1030,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, > kref_init(&bo->kref); > INIT_LIST_HEAD(&bo->lru); > INIT_LIST_HEAD(&bo->ddestroy); > - INIT_LIST_HEAD(&bo->swap); > bo->bdev = bdev; > bo->type = type; > bo->mem.mem_type = TTM_PL_SYSTEM; > @@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, > bool locked; > int ret; > > + if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | > + TTM_PAGE_FLAG_SWAPPED)) > + return false; > + return 0; ? Seems inconsistent to return zero here and not drop the lru lock? Or maybe turn this into a programmer error, since the current caller already checks for the above? > if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL)) > return -EBUSY; > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c > index 031e5819fec4..a2a17c84ceb3 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, > atomic_inc(&ttm_glob.bo_count); > INIT_LIST_HEAD(&fbo->base.ddestroy); > INIT_LIST_HEAD(&fbo->base.lru); > - INIT_LIST_HEAD(&fbo->base.swap); > fbo->base.moving = NULL; > drm_vma_node_reset(&fbo->base.base.vma_node); > > diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c > index dfc2a7e4e490..2c280fb1e992 100644 > --- a/drivers/gpu/drm/ttm/ttm_device.c > +++ b/drivers/gpu/drm/ttm/ttm_device.c > @@ -67,7 +67,6 @@ static int ttm_global_init(void) > unsigned long num_pages; > struct sysinfo si; > int ret = 0; > - unsigned i; > > mutex_lock(&ttm_global_mutex); > if (++ttm_glob_use_count > 1) > @@ -90,8 +89,6 @@ static int ttm_global_init(void) > goto out; > } > > - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) > - INIT_LIST_HEAD(&glob->swap_lru[i]); > INIT_LIST_HEAD(&glob->device_list); > atomic_set(&glob->bo_count, 0); > > @@ -109,27 +106,60 @@ static int ttm_global_init(void) > long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) > { > struct ttm_global *glob = &ttm_glob; > + struct ttm_device *bdev; > + int ret = -EBUSY; > + > + mutex_lock(&ttm_global_mutex); > + list_for_each_entry(bdev, &glob->device_list, device_list) { > + ret = ttm_device_swapout(bdev, ctx, gfp_flags); Mixing int and long for num_pages. Does ttm enforce a maximum page count somewhere for object sizes? Something like INT_MAX, since it doesn't look like ttm is consistently using the same type(unsigned long?) when representing the number of pages for an object? > + if (ret > 0) { > + list_move_tail(&bdev->device_list, &glob->device_list); > + break; > + } > + } > + mutex_unlock(&ttm_global_mutex); > + return ret; > +} > +EXPORT_SYMBOL(ttm_global_swapout); > + > +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, > + gfp_t gfp_flags) > +{ > + struct ttm_global *glob = &ttm_glob; > + struct ttm_resource_manager *man; > struct ttm_buffer_object *bo; > - unsigned i; > + unsigned i, j; > int ret; > > spin_lock(&glob->lru_lock); > - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > - list_for_each_entry(bo, &glob->swap_lru[i], swap) { > - uint32_t num_pages = bo->ttm->num_pages; > - > - ret = ttm_bo_swapout(bo, ctx, gfp_flags); > - /* ttm_bo_swapout has dropped the lru_lock */ > - if (!ret) > - return num_pages; > - if (ret != -EBUSY) > - return ret; > + for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { > + man = ttm_manager_type(bdev, i); > + if (!man || !man->use_tt) > + continue; > + > + for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { > + list_for_each_entry(bo, &man->lru[j], lru) { > + long num_pages; > + > + if (!bo->ttm || > + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || > + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) > + continue; > + > + num_pages = bo->ttm->num_pages; > + ret = ttm_bo_swapout(bo, ctx, gfp_flags); > + /* ttm_bo_swapout has dropped the lru_lock */ > + if (!ret) > + return num_pages; > + if (ret != -EBUSY) > + return ret; > + } > } > } > spin_unlock(&glob->lru_lock); > return 0; > } > -EXPORT_SYMBOL(ttm_global_swapout); > +EXPORT_SYMBOL(ttm_device_swapout); > > static void ttm_init_sysman(struct ttm_device *bdev) > { > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > index b991422e156c..0e82b0662d9e 100644 > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) > vmw_execbuf_release_pinned_bo(dev_priv); > vmw_resource_evict_all(dev_priv); > vmw_release_device_early(dev_priv); > - while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0); > + while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0); Is this the intended behaviour? ttm_device_swapout() still just returns num_pages if it swapped something out. I assume this wants to keep swapping stuff out, until it can't anymore. Or am I missing something? > if (dev_priv->enable_fb) > vmw_fifo_resource_dec(dev_priv); > if (atomic_read(&dev_priv->num_fifo_resources) != 0) { > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > index 5044ac330858..3587f660e8f4 100644 > --- a/include/drm/ttm/ttm_bo_api.h > +++ b/include/drm/ttm/ttm_bo_api.h > @@ -144,7 +144,6 @@ struct ttm_buffer_object { > > struct list_head lru; > struct list_head ddestroy; > - struct list_head swap; > > /** > * Members protected by a bo reservation. > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h > index 8959c0075cfd..d007feef7676 100644 > --- a/include/drm/ttm/ttm_bo_driver.h > +++ b/include/drm/ttm/ttm_bo_driver.h > @@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos { > struct ttm_lru_bulk_move { > struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY]; > struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY]; > - struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY]; > }; > > /* > diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h > index 6a0b267d4fe6..cda6efb4c34b 100644 > --- a/include/drm/ttm/ttm_device.h > +++ b/include/drm/ttm/ttm_device.h > @@ -63,11 +63,6 @@ extern struct ttm_global { > */ > struct list_head device_list; > > - /** > - * Protected by the lru_lock. > - */ > - struct list_head swap_lru[TTM_MAX_BO_PRIORITY]; > - > /** > * Internal protection. > */ > @@ -298,6 +293,8 @@ struct ttm_device { > }; > > long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); > +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, > + gfp_t gfp_flags); > > static inline struct ttm_resource_manager * > ttm_manager_type(struct ttm_device *bdev, int mem_type) > -- > 2.25.1 > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2 2021-03-15 18:54 ` Matthew Auld @ 2021-03-15 19:27 ` Christian König 0 siblings, 0 replies; 18+ messages in thread From: Christian König @ 2021-03-15 19:27 UTC (permalink / raw) To: Matthew Auld; +Cc: ray.huang, ML dri-devel Am 15.03.21 um 19:54 schrieb Matthew Auld: > On Mon, 15 Mar 2021 at 16:04, Christian König > <ckoenig.leichtzumerken@gmail.com> wrote: >> [SNIP] >> @@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, >> bool locked; >> int ret; >> >> + if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | >> + TTM_PAGE_FLAG_SWAPPED)) >> + return false; >> + > return 0; ? > > Seems inconsistent to return zero here and not drop the lru lock? Or > maybe turn this into a programmer error, since the current caller > already checks for the above? Thanks, that is just an artifact from rebasing and should be removed. >> [SNIP] >> >> @@ -109,27 +106,60 @@ static int ttm_global_init(void) >> long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) >> { >> struct ttm_global *glob = &ttm_glob; >> + struct ttm_device *bdev; >> + int ret = -EBUSY; >> + >> + mutex_lock(&ttm_global_mutex); >> + list_for_each_entry(bdev, &glob->device_list, device_list) { >> + ret = ttm_device_swapout(bdev, ctx, gfp_flags); > Mixing int and long for num_pages. > > Does ttm enforce a maximum page count somewhere for object sizes? We should use 32 bit values for the number of pages in TTM, even signed values allow for 8TB large BOs. And I really hope that we can get rid of the BO approach in general before we ever come close to that limit. > Something like INT_MAX, since it doesn't look like ttm is consistently > using the same type(unsigned long?) when representing the number of > pages for an object? I should probably add a check for that in the tt code, yes. > [SNIP] > static void ttm_init_sysman(struct ttm_device *bdev) > { > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > index b991422e156c..0e82b0662d9e 100644 > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c > @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev) > vmw_execbuf_release_pinned_bo(dev_priv); > vmw_resource_evict_all(dev_priv); > vmw_release_device_early(dev_priv); > - while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0); > + while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0); > Is this the intended behaviour? ttm_device_swapout() still just > returns num_pages if it swapped something out. I assume this wants to > keep swapping stuff out, until it can't anymore. Or am I missing > something? Indeed that's a mix up. Thanks for pointing that out. Christian. _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 3/3] drm/ttm: switch to per device LRU lock 2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König 2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König @ 2021-03-15 16:04 ` Christian König 2021-03-15 20:17 ` kernel test robot 2021-03-16 9:35 ` Daniel Vetter 2021-03-15 18:47 ` kernel test robot 2021-03-19 9:41 ` kernel test robot 3 siblings, 2 replies; 18+ messages in thread From: Christian König @ 2021-03-15 16:04 UTC (permalink / raw) To: dri-devel; +Cc: ray.huang Instead of having a global lock. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- drivers/gpu/drm/qxl/qxl_release.c | 5 +-- drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- include/drm/ttm/ttm_bo_driver.h | 4 +-- include/drm/ttm/ttm_device.h | 4 +-- 8 files changed, 43 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9d19078246c8..ae18c0e32347 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm_bo_base *bo_base; if (vm->bulk_moveable) { - spin_lock(&ttm_glob.lru_lock); + spin_lock(&adev->mman.bdev.lru_lock); ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&adev->mman.bdev.lru_lock); return; } memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); - spin_lock(&ttm_glob.lru_lock); + spin_lock(&adev->mman.bdev.lru_lock); list_for_each_entry(bo_base, &vm->idle, vm_status) { struct amdgpu_bo *bo = bo_base->bo; @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, &bo->shadow->tbo.mem, &vm->lru_bulk_move); } - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&adev->mman.bdev.lru_lock); vm->bulk_moveable = true; } diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index f5845c96d414..b19f2f00b215 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) release->id | 0xf0000000, release->base.seqno); trace_dma_fence_emit(&release->base); - spin_lock(&ttm_glob.lru_lock); - list_for_each_entry(entry, &release->bos, head) { bo = entry->bo; dma_resv_add_shared_fence(bo->base.resv, &release->base); - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); + ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } - spin_unlock(&ttm_glob.lru_lock); ww_acquire_fini(&release->ticket); } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a1be88be357b..a8103c8718a3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) * reference it any more. The only tricky case is the trylock on * the resv object while holding the lru_lock. */ - spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); bo->base.resv = &bo->base._resv; - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); } return r; @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (unlock_resv) dma_resv_unlock(bo->base.resv); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, 30 * HZ); @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, else if (lret == 0) return -EBUSY; - spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { /* * We raced, and lost, someone else holds the reservation now, @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, * delayed destruction would succeed, so just return success * here. */ - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); return 0; } ret = 0; @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, if (ret || unlikely(list_empty(&bo->ddestroy))) { if (unlock_resv) dma_resv_unlock(bo->base.resv); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); return ret; } ttm_bo_del_from_lru(bo); list_del_init(&bo->ddestroy); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); ttm_bo_cleanup_memtype_use(bo); if (unlock_resv) @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, */ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) { - struct ttm_global *glob = &ttm_glob; struct list_head removed; bool empty; INIT_LIST_HEAD(&removed); - spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); while (!list_empty(&bdev->ddestroy)) { struct ttm_buffer_object *bo; @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) continue; if (remove_all || bo->base.resv != &bo->base._resv) { - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); dma_resv_lock(bo->base.resv, NULL); - spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else if (dma_resv_trylock(bo->base.resv)) { ttm_bo_cleanup_refs(bo, false, !remove_all, true); } else { - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); } ttm_bo_put(bo); - spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); } list_splice_tail(&removed, &bdev->ddestroy); empty = list_empty(&bdev->ddestroy); - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); return empty; } @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref) ttm_bo_flush_all_fences(bo); bo->deleted = true; - spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); /* * Make pinned bos immediately available to @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref) kref_init(&bo->kref); list_add_tail(&bo->ddestroy, &bdev->ddestroy); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); schedule_delayed_work(&bdev->wq, ((HZ / 100) < 1) ? 1 : HZ / 100); return; } - spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); ttm_bo_del_from_lru(bo); list_del(&bo->ddestroy); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); ttm_bo_cleanup_memtype_use(bo); dma_resv_unlock(bo->base.resv); @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, unsigned i; int ret; - spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { list_for_each_entry(bo, &man->lru[i], lru) { bool busy; @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) busy_bo = NULL; - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); if (busy_bo) ttm_bo_put(busy_bo); @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, return ret; } - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); ret = ttm_bo_evict(bo, ctx); if (locked) @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, mem->mem_type = place->mem_type; mem->placement = place->flags; - spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, mem, NULL); - spin_unlock(&ttm_glob.lru_lock); - + spin_unlock(&bo->bdev->lru_lock); return 0; } @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { - struct ttm_global *glob = &ttm_glob; bool locked; int ret; @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, ttm_bo_del_from_lru(bo); /* TODO: Cleanup the locking */ - spin_unlock(&glob->lru_lock); + spin_unlock(&bo->bdev->lru_lock); /* * Move to system cached diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 2c280fb1e992..924d892109e8 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -81,7 +81,6 @@ static int ttm_global_init(void) ttm_pool_mgr_init(num_pages * 50 / 100); ttm_tt_mgr_init(); - spin_lock_init(&glob->lru_lock); glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); if (unlikely(glob->dummy_read_page == NULL)) { @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { - struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; struct ttm_buffer_object *bo; unsigned i, j; int ret; - spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { man = ttm_manager_type(bdev, i); if (!man || !man->use_tt) @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, } } } - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); return 0; } EXPORT_SYMBOL(ttm_device_swapout); @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs, bdev->vma_manager = vma_manager; INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue); + spin_lock_init(&bdev->lru_lock); INIT_LIST_HEAD(&bdev->ddestroy); bdev->dev_mapping = mapping; mutex_lock(&ttm_global_mutex); @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init); void ttm_device_fini(struct ttm_device *bdev) { - struct ttm_global *glob = &ttm_glob; struct ttm_resource_manager *man; unsigned i; @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) if (ttm_bo_delayed_delete(bdev, true)) pr_debug("Delayed destroy list was clean\n"); - spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) pr_debug("Swap list %d was clean\n", i); - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); ttm_pool_fini(&bdev->pool); ttm_global_release(); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 690ab97d52b7..071c48d672c6 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, if (list_empty(list)) return; - spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); + ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } - spin_unlock(&ttm_glob.lru_lock); if (ticket) ww_acquire_fini(ticket); @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, if (list_empty(list)) return; - spin_lock(&ttm_glob.lru_lock); list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, dma_resv_add_shared_fence(bo->base.resv, fence); else dma_resv_add_excl_fence(bo->base.resv, fence); - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); + ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } - spin_unlock(&ttm_glob.lru_lock); if (ticket) ww_acquire_fini(ticket); } diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index ed1672a9f332..04f2eef653ab 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, .no_wait_gpu = false, .force_alloc = true }; - struct ttm_global *glob = &ttm_glob; struct dma_fence *fence; int ret; unsigned i; @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, * Can't use standard list traversal since we're unlocking. */ - spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { while (!list_empty(&man->lru[i])) { - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, NULL); if (ret) return ret; - spin_lock(&glob->lru_lock); + spin_lock(&bdev->lru_lock); } } - spin_unlock(&glob->lru_lock); + spin_unlock(&bdev->lru_lock); spin_lock(&man->move_lock); fence = dma_fence_get(man->move); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index d007feef7676..dbccac957f8f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, static inline void ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) { - spin_lock(&ttm_glob.lru_lock); + spin_lock(&bo->bdev->lru_lock); ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); - spin_unlock(&ttm_glob.lru_lock); + spin_unlock(&bo->bdev->lru_lock); } static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index cda6efb4c34b..bae56d29e8ff 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -56,7 +56,6 @@ extern struct ttm_global { */ struct page *dummy_read_page; - spinlock_t lru_lock; /** * Protected by ttm_global_mutex. @@ -277,8 +276,9 @@ struct ttm_device { struct ttm_pool pool; /* - * Protected by the global:lru lock. + * Protection for the per manager LRU and ddestroy lists. */ + spinlock_t lru_lock; struct list_head ddestroy; /* -- 2.25.1 _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock 2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König @ 2021-03-15 20:17 ` kernel test robot 2021-03-16 9:35 ` Daniel Vetter 1 sibling, 0 replies; 18+ messages in thread From: kernel test robot @ 2021-03-15 20:17 UTC (permalink / raw) To: Christian König, dri-devel; +Cc: ray.huang, kbuild-all [-- Attachment #1: Type: text/plain, Size: 4014 bytes --] Hi "Christian, I love your patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: x86_64-randconfig-m001-20210315 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> smatch warnings: drivers/gpu/drm/ttm/ttm_device.c:158 ttm_device_swapout() warn: inconsistent returns '&bdev->lru_lock'. drivers/gpu/drm/ttm/ttm_bo.c:665 ttm_mem_evict_first() error: we previously assumed 'bo' could be null (see line 662) vim +158 drivers/gpu/drm/ttm/ttm_device.c 70ae63f3a85b97 Christian König 2021-03-15 123 70ae63f3a85b97 Christian König 2021-03-15 124 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, 70ae63f3a85b97 Christian König 2021-03-15 125 gfp_t gfp_flags) 70ae63f3a85b97 Christian König 2021-03-15 126 { 70ae63f3a85b97 Christian König 2021-03-15 127 struct ttm_resource_manager *man; 824dca26fe3958 Christian König 2021-03-15 128 struct ttm_buffer_object *bo; 70ae63f3a85b97 Christian König 2021-03-15 129 unsigned i, j; 824dca26fe3958 Christian König 2021-03-15 130 int ret; 824dca26fe3958 Christian König 2021-03-15 131 1ed8d8fc515b90 Christian König 2021-03-15 132 spin_lock(&bdev->lru_lock); 70ae63f3a85b97 Christian König 2021-03-15 133 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { 70ae63f3a85b97 Christian König 2021-03-15 134 man = ttm_manager_type(bdev, i); 70ae63f3a85b97 Christian König 2021-03-15 135 if (!man || !man->use_tt) 70ae63f3a85b97 Christian König 2021-03-15 136 continue; 70ae63f3a85b97 Christian König 2021-03-15 137 70ae63f3a85b97 Christian König 2021-03-15 138 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { 70ae63f3a85b97 Christian König 2021-03-15 139 list_for_each_entry(bo, &man->lru[j], lru) { 70ae63f3a85b97 Christian König 2021-03-15 140 long num_pages; 824dca26fe3958 Christian König 2021-03-15 141 70ae63f3a85b97 Christian König 2021-03-15 142 if (!bo->ttm || 70ae63f3a85b97 Christian König 2021-03-15 143 bo->ttm->page_flags & TTM_PAGE_FLAG_SG || 70ae63f3a85b97 Christian König 2021-03-15 144 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) 70ae63f3a85b97 Christian König 2021-03-15 145 continue; 70ae63f3a85b97 Christian König 2021-03-15 146 70ae63f3a85b97 Christian König 2021-03-15 147 num_pages = bo->ttm->num_pages; 824dca26fe3958 Christian König 2021-03-15 148 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 824dca26fe3958 Christian König 2021-03-15 149 /* ttm_bo_swapout has dropped the lru_lock */ 824dca26fe3958 Christian König 2021-03-15 150 if (!ret) 824dca26fe3958 Christian König 2021-03-15 151 return num_pages; 824dca26fe3958 Christian König 2021-03-15 152 if (ret != -EBUSY) 824dca26fe3958 Christian König 2021-03-15 153 return ret; 824dca26fe3958 Christian König 2021-03-15 154 } 824dca26fe3958 Christian König 2021-03-15 155 } 70ae63f3a85b97 Christian König 2021-03-15 156 } 1ed8d8fc515b90 Christian König 2021-03-15 157 spin_unlock(&bdev->lru_lock); 824dca26fe3958 Christian König 2021-03-15 @158 return 0; 824dca26fe3958 Christian König 2021-03-15 159 } 70ae63f3a85b97 Christian König 2021-03-15 160 EXPORT_SYMBOL(ttm_device_swapout); 824dca26fe3958 Christian König 2021-03-15 161 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org [-- Attachment #2: .config.gz --] [-- Type: application/gzip, Size: 36557 bytes --] [-- Attachment #3: Type: text/plain, Size: 160 bytes --] _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock @ 2021-03-15 20:17 ` kernel test robot 0 siblings, 0 replies; 18+ messages in thread From: kernel test robot @ 2021-03-15 20:17 UTC (permalink / raw) To: kbuild-all [-- Attachment #1: Type: text/plain, Size: 4122 bytes --] Hi "Christian, I love your patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: x86_64-randconfig-m001-20210315 (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> smatch warnings: drivers/gpu/drm/ttm/ttm_device.c:158 ttm_device_swapout() warn: inconsistent returns '&bdev->lru_lock'. drivers/gpu/drm/ttm/ttm_bo.c:665 ttm_mem_evict_first() error: we previously assumed 'bo' could be null (see line 662) vim +158 drivers/gpu/drm/ttm/ttm_device.c 70ae63f3a85b97 Christian König 2021-03-15 123 70ae63f3a85b97 Christian König 2021-03-15 124 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, 70ae63f3a85b97 Christian König 2021-03-15 125 gfp_t gfp_flags) 70ae63f3a85b97 Christian König 2021-03-15 126 { 70ae63f3a85b97 Christian König 2021-03-15 127 struct ttm_resource_manager *man; 824dca26fe3958 Christian König 2021-03-15 128 struct ttm_buffer_object *bo; 70ae63f3a85b97 Christian König 2021-03-15 129 unsigned i, j; 824dca26fe3958 Christian König 2021-03-15 130 int ret; 824dca26fe3958 Christian König 2021-03-15 131 1ed8d8fc515b90 Christian König 2021-03-15 132 spin_lock(&bdev->lru_lock); 70ae63f3a85b97 Christian König 2021-03-15 133 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { 70ae63f3a85b97 Christian König 2021-03-15 134 man = ttm_manager_type(bdev, i); 70ae63f3a85b97 Christian König 2021-03-15 135 if (!man || !man->use_tt) 70ae63f3a85b97 Christian König 2021-03-15 136 continue; 70ae63f3a85b97 Christian König 2021-03-15 137 70ae63f3a85b97 Christian König 2021-03-15 138 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { 70ae63f3a85b97 Christian König 2021-03-15 139 list_for_each_entry(bo, &man->lru[j], lru) { 70ae63f3a85b97 Christian König 2021-03-15 140 long num_pages; 824dca26fe3958 Christian König 2021-03-15 141 70ae63f3a85b97 Christian König 2021-03-15 142 if (!bo->ttm || 70ae63f3a85b97 Christian König 2021-03-15 143 bo->ttm->page_flags & TTM_PAGE_FLAG_SG || 70ae63f3a85b97 Christian König 2021-03-15 144 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) 70ae63f3a85b97 Christian König 2021-03-15 145 continue; 70ae63f3a85b97 Christian König 2021-03-15 146 70ae63f3a85b97 Christian König 2021-03-15 147 num_pages = bo->ttm->num_pages; 824dca26fe3958 Christian König 2021-03-15 148 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 824dca26fe3958 Christian König 2021-03-15 149 /* ttm_bo_swapout has dropped the lru_lock */ 824dca26fe3958 Christian König 2021-03-15 150 if (!ret) 824dca26fe3958 Christian König 2021-03-15 151 return num_pages; 824dca26fe3958 Christian König 2021-03-15 152 if (ret != -EBUSY) 824dca26fe3958 Christian König 2021-03-15 153 return ret; 824dca26fe3958 Christian König 2021-03-15 154 } 824dca26fe3958 Christian König 2021-03-15 155 } 70ae63f3a85b97 Christian König 2021-03-15 156 } 1ed8d8fc515b90 Christian König 2021-03-15 157 spin_unlock(&bdev->lru_lock); 824dca26fe3958 Christian König 2021-03-15 @158 return 0; 824dca26fe3958 Christian König 2021-03-15 159 } 70ae63f3a85b97 Christian König 2021-03-15 160 EXPORT_SYMBOL(ttm_device_swapout); 824dca26fe3958 Christian König 2021-03-15 161 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org [-- Attachment #2: config.gz --] [-- Type: application/gzip, Size: 36557 bytes --] ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock 2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König 2021-03-15 20:17 ` kernel test robot @ 2021-03-16 9:35 ` Daniel Vetter 2021-03-16 12:03 ` Christian König 1 sibling, 1 reply; 18+ messages in thread From: Daniel Vetter @ 2021-03-16 9:35 UTC (permalink / raw) To: Christian König; +Cc: ray.huang, dri-devel On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote: > Instead of having a global lock. > > Signed-off-by: Christian König <christian.koenig@amd.com> I guess per zone lru lock is a lot more work since then we need to handle ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm lingo. -Daniel > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- > drivers/gpu/drm/qxl/qxl_release.c | 5 +-- > drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- > drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- > drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- > drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- > include/drm/ttm/ttm_bo_driver.h | 4 +-- > include/drm/ttm/ttm_device.h | 4 +-- > 8 files changed, 43 insertions(+), 56 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 9d19078246c8..ae18c0e32347 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, > struct amdgpu_vm_bo_base *bo_base; > > if (vm->bulk_moveable) { > - spin_lock(&ttm_glob.lru_lock); > + spin_lock(&adev->mman.bdev.lru_lock); > ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&adev->mman.bdev.lru_lock); > return; > } > > memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); > > - spin_lock(&ttm_glob.lru_lock); > + spin_lock(&adev->mman.bdev.lru_lock); > list_for_each_entry(bo_base, &vm->idle, vm_status) { > struct amdgpu_bo *bo = bo_base->bo; > > @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, > &bo->shadow->tbo.mem, > &vm->lru_bulk_move); > } > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&adev->mman.bdev.lru_lock); > > vm->bulk_moveable = true; > } > diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c > index f5845c96d414..b19f2f00b215 100644 > --- a/drivers/gpu/drm/qxl/qxl_release.c > +++ b/drivers/gpu/drm/qxl/qxl_release.c > @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) > release->id | 0xf0000000, release->base.seqno); > trace_dma_fence_emit(&release->base); > > - spin_lock(&ttm_glob.lru_lock); > - > list_for_each_entry(entry, &release->bos, head) { > bo = entry->bo; > > dma_resv_add_shared_fence(bo->base.resv, &release->base); > - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); > + ttm_bo_move_to_lru_tail_unlocked(bo); > dma_resv_unlock(bo->base.resv); > } > - spin_unlock(&ttm_glob.lru_lock); > ww_acquire_fini(&release->ticket); > } > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index a1be88be357b..a8103c8718a3 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) > * reference it any more. The only tricky case is the trylock on > * the resv object while holding the lru_lock. > */ > - spin_lock(&ttm_glob.lru_lock); > + spin_lock(&bo->bdev->lru_lock); > bo->base.resv = &bo->base._resv; > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&bo->bdev->lru_lock); > } > > return r; > @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > > if (unlock_resv) > dma_resv_unlock(bo->base.resv); > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&bo->bdev->lru_lock); > > lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, > 30 * HZ); > @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > else if (lret == 0) > return -EBUSY; > > - spin_lock(&ttm_glob.lru_lock); > + spin_lock(&bo->bdev->lru_lock); > if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { > /* > * We raced, and lost, someone else holds the reservation now, > @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > * delayed destruction would succeed, so just return success > * here. > */ > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&bo->bdev->lru_lock); > return 0; > } > ret = 0; > @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > if (ret || unlikely(list_empty(&bo->ddestroy))) { > if (unlock_resv) > dma_resv_unlock(bo->base.resv); > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&bo->bdev->lru_lock); > return ret; > } > > ttm_bo_del_from_lru(bo); > list_del_init(&bo->ddestroy); > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&bo->bdev->lru_lock); > ttm_bo_cleanup_memtype_use(bo); > > if (unlock_resv) > @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > */ > bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) > { > - struct ttm_global *glob = &ttm_glob; > struct list_head removed; > bool empty; > > INIT_LIST_HEAD(&removed); > > - spin_lock(&glob->lru_lock); > + spin_lock(&bdev->lru_lock); > while (!list_empty(&bdev->ddestroy)) { > struct ttm_buffer_object *bo; > > @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) > continue; > > if (remove_all || bo->base.resv != &bo->base._resv) { > - spin_unlock(&glob->lru_lock); > + spin_unlock(&bdev->lru_lock); > dma_resv_lock(bo->base.resv, NULL); > > - spin_lock(&glob->lru_lock); > + spin_lock(&bdev->lru_lock); > ttm_bo_cleanup_refs(bo, false, !remove_all, true); > > } else if (dma_resv_trylock(bo->base.resv)) { > ttm_bo_cleanup_refs(bo, false, !remove_all, true); > } else { > - spin_unlock(&glob->lru_lock); > + spin_unlock(&bdev->lru_lock); > } > > ttm_bo_put(bo); > - spin_lock(&glob->lru_lock); > + spin_lock(&bdev->lru_lock); > } > list_splice_tail(&removed, &bdev->ddestroy); > empty = list_empty(&bdev->ddestroy); > - spin_unlock(&glob->lru_lock); > + spin_unlock(&bdev->lru_lock); > > return empty; > } > @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref) > ttm_bo_flush_all_fences(bo); > bo->deleted = true; > > - spin_lock(&ttm_glob.lru_lock); > + spin_lock(&bo->bdev->lru_lock); > > /* > * Make pinned bos immediately available to > @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref) > > kref_init(&bo->kref); > list_add_tail(&bo->ddestroy, &bdev->ddestroy); > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&bo->bdev->lru_lock); > > schedule_delayed_work(&bdev->wq, > ((HZ / 100) < 1) ? 1 : HZ / 100); > return; > } > > - spin_lock(&ttm_glob.lru_lock); > + spin_lock(&bo->bdev->lru_lock); > ttm_bo_del_from_lru(bo); > list_del(&bo->ddestroy); > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&bo->bdev->lru_lock); > > ttm_bo_cleanup_memtype_use(bo); > dma_resv_unlock(bo->base.resv); > @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, > unsigned i; > int ret; > > - spin_lock(&ttm_glob.lru_lock); > + spin_lock(&bo->bdev->lru_lock); > for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > list_for_each_entry(bo, &man->lru[i], lru) { > bool busy; > @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, > if (!bo) { > if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) > busy_bo = NULL; > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&bo->bdev->lru_lock); > ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); > if (busy_bo) > ttm_bo_put(busy_bo); > @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, > return ret; > } > > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&bo->bdev->lru_lock); > > ret = ttm_bo_evict(bo, ctx); > if (locked) > @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, > mem->mem_type = place->mem_type; > mem->placement = place->flags; > > - spin_lock(&ttm_glob.lru_lock); > + spin_lock(&bo->bdev->lru_lock); > ttm_bo_move_to_lru_tail(bo, mem, NULL); > - spin_unlock(&ttm_glob.lru_lock); > - > + spin_unlock(&bo->bdev->lru_lock); > return 0; > } > > @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait); > int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, > gfp_t gfp_flags) > { > - struct ttm_global *glob = &ttm_glob; > bool locked; > int ret; > > @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, > > ttm_bo_del_from_lru(bo); > /* TODO: Cleanup the locking */ > - spin_unlock(&glob->lru_lock); > + spin_unlock(&bo->bdev->lru_lock); > > /* > * Move to system cached > diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c > index 2c280fb1e992..924d892109e8 100644 > --- a/drivers/gpu/drm/ttm/ttm_device.c > +++ b/drivers/gpu/drm/ttm/ttm_device.c > @@ -81,7 +81,6 @@ static int ttm_global_init(void) > ttm_pool_mgr_init(num_pages * 50 / 100); > ttm_tt_mgr_init(); > > - spin_lock_init(&glob->lru_lock); > glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); > > if (unlikely(glob->dummy_read_page == NULL)) { > @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); > long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, > gfp_t gfp_flags) > { > - struct ttm_global *glob = &ttm_glob; > struct ttm_resource_manager *man; > struct ttm_buffer_object *bo; > unsigned i, j; > int ret; > > - spin_lock(&glob->lru_lock); > + spin_lock(&bdev->lru_lock); > for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { > man = ttm_manager_type(bdev, i); > if (!man || !man->use_tt) > @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, > } > } > } > - spin_unlock(&glob->lru_lock); > + spin_unlock(&bdev->lru_lock); > return 0; > } > EXPORT_SYMBOL(ttm_device_swapout); > @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs, > > bdev->vma_manager = vma_manager; > INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue); > + spin_lock_init(&bdev->lru_lock); > INIT_LIST_HEAD(&bdev->ddestroy); > bdev->dev_mapping = mapping; > mutex_lock(&ttm_global_mutex); > @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init); > > void ttm_device_fini(struct ttm_device *bdev) > { > - struct ttm_global *glob = &ttm_glob; > struct ttm_resource_manager *man; > unsigned i; > > @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) > if (ttm_bo_delayed_delete(bdev, true)) > pr_debug("Delayed destroy list was clean\n"); > > - spin_lock(&glob->lru_lock); > + spin_lock(&bdev->lru_lock); > for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) > if (list_empty(&man->lru[0])) > pr_debug("Swap list %d was clean\n", i); > - spin_unlock(&glob->lru_lock); > + spin_unlock(&bdev->lru_lock); > > ttm_pool_fini(&bdev->pool); > ttm_global_release(); > diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c > index 690ab97d52b7..071c48d672c6 100644 > --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c > +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c > @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, > if (list_empty(list)) > return; > > - spin_lock(&ttm_glob.lru_lock); > list_for_each_entry(entry, list, head) { > struct ttm_buffer_object *bo = entry->bo; > > - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); > + ttm_bo_move_to_lru_tail_unlocked(bo); > dma_resv_unlock(bo->base.resv); > } > - spin_unlock(&ttm_glob.lru_lock); > > if (ticket) > ww_acquire_fini(ticket); > @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, > if (list_empty(list)) > return; > > - spin_lock(&ttm_glob.lru_lock); > list_for_each_entry(entry, list, head) { > struct ttm_buffer_object *bo = entry->bo; > > @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, > dma_resv_add_shared_fence(bo->base.resv, fence); > else > dma_resv_add_excl_fence(bo->base.resv, fence); > - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); > + ttm_bo_move_to_lru_tail_unlocked(bo); > dma_resv_unlock(bo->base.resv); > } > - spin_unlock(&ttm_glob.lru_lock); > if (ticket) > ww_acquire_fini(ticket); > } > diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c > index ed1672a9f332..04f2eef653ab 100644 > --- a/drivers/gpu/drm/ttm/ttm_resource.c > +++ b/drivers/gpu/drm/ttm/ttm_resource.c > @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, > .no_wait_gpu = false, > .force_alloc = true > }; > - struct ttm_global *glob = &ttm_glob; > struct dma_fence *fence; > int ret; > unsigned i; > @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, > * Can't use standard list traversal since we're unlocking. > */ > > - spin_lock(&glob->lru_lock); > + spin_lock(&bdev->lru_lock); > for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > while (!list_empty(&man->lru[i])) { > - spin_unlock(&glob->lru_lock); > + spin_unlock(&bdev->lru_lock); > ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, > NULL); > if (ret) > return ret; > - spin_lock(&glob->lru_lock); > + spin_lock(&bdev->lru_lock); > } > } > - spin_unlock(&glob->lru_lock); > + spin_unlock(&bdev->lru_lock); > > spin_lock(&man->move_lock); > fence = dma_fence_get(man->move); > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h > index d007feef7676..dbccac957f8f 100644 > --- a/include/drm/ttm/ttm_bo_driver.h > +++ b/include/drm/ttm/ttm_bo_driver.h > @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, > static inline void > ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) > { > - spin_lock(&ttm_glob.lru_lock); > + spin_lock(&bo->bdev->lru_lock); > ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); > - spin_unlock(&ttm_glob.lru_lock); > + spin_unlock(&bo->bdev->lru_lock); > } > > static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, > diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h > index cda6efb4c34b..bae56d29e8ff 100644 > --- a/include/drm/ttm/ttm_device.h > +++ b/include/drm/ttm/ttm_device.h > @@ -56,7 +56,6 @@ extern struct ttm_global { > */ > > struct page *dummy_read_page; > - spinlock_t lru_lock; > > /** > * Protected by ttm_global_mutex. > @@ -277,8 +276,9 @@ struct ttm_device { > struct ttm_pool pool; > > /* > - * Protected by the global:lru lock. > + * Protection for the per manager LRU and ddestroy lists. > */ > + spinlock_t lru_lock; > struct list_head ddestroy; > > /* > -- > 2.25.1 > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock 2021-03-16 9:35 ` Daniel Vetter @ 2021-03-16 12:03 ` Christian König 2021-03-16 12:05 ` Daniel Vetter 0 siblings, 1 reply; 18+ messages in thread From: Christian König @ 2021-03-16 12:03 UTC (permalink / raw) To: Daniel Vetter; +Cc: ray.huang, dri-devel Am 16.03.21 um 10:35 schrieb Daniel Vetter: > On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote: >> Instead of having a global lock. >> >> Signed-off-by: Christian König <christian.koenig@amd.com> > I guess per zone lru lock is a lot more work since then we need to handle > ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm > lingo. Making the LRU per resource manager is the long term goal, yes. My key idea so far is that we make bo->mem a pointer and then move the LRU handling into the resource object instead of the BO. The resource object then just references the BO and so that we can figure out which BO to evict or which fence to wait for to free up a resource. Regards, Christian. > -Daniel > >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- >> drivers/gpu/drm/qxl/qxl_release.c | 5 +-- >> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- >> drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- >> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- >> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- >> include/drm/ttm/ttm_bo_driver.h | 4 +-- >> include/drm/ttm/ttm_device.h | 4 +-- >> 8 files changed, 43 insertions(+), 56 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index 9d19078246c8..ae18c0e32347 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, >> struct amdgpu_vm_bo_base *bo_base; >> >> if (vm->bulk_moveable) { >> - spin_lock(&ttm_glob.lru_lock); >> + spin_lock(&adev->mman.bdev.lru_lock); >> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&adev->mman.bdev.lru_lock); >> return; >> } >> >> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); >> >> - spin_lock(&ttm_glob.lru_lock); >> + spin_lock(&adev->mman.bdev.lru_lock); >> list_for_each_entry(bo_base, &vm->idle, vm_status) { >> struct amdgpu_bo *bo = bo_base->bo; >> >> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, >> &bo->shadow->tbo.mem, >> &vm->lru_bulk_move); >> } >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&adev->mman.bdev.lru_lock); >> >> vm->bulk_moveable = true; >> } >> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c >> index f5845c96d414..b19f2f00b215 100644 >> --- a/drivers/gpu/drm/qxl/qxl_release.c >> +++ b/drivers/gpu/drm/qxl/qxl_release.c >> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) >> release->id | 0xf0000000, release->base.seqno); >> trace_dma_fence_emit(&release->base); >> >> - spin_lock(&ttm_glob.lru_lock); >> - >> list_for_each_entry(entry, &release->bos, head) { >> bo = entry->bo; >> >> dma_resv_add_shared_fence(bo->base.resv, &release->base); >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); >> + ttm_bo_move_to_lru_tail_unlocked(bo); >> dma_resv_unlock(bo->base.resv); >> } >> - spin_unlock(&ttm_glob.lru_lock); >> ww_acquire_fini(&release->ticket); >> } >> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c >> index a1be88be357b..a8103c8718a3 100644 >> --- a/drivers/gpu/drm/ttm/ttm_bo.c >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c >> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) >> * reference it any more. The only tricky case is the trylock on >> * the resv object while holding the lru_lock. >> */ >> - spin_lock(&ttm_glob.lru_lock); >> + spin_lock(&bo->bdev->lru_lock); >> bo->base.resv = &bo->base._resv; >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&bo->bdev->lru_lock); >> } >> >> return r; >> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, >> >> if (unlock_resv) >> dma_resv_unlock(bo->base.resv); >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&bo->bdev->lru_lock); >> >> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, >> 30 * HZ); >> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, >> else if (lret == 0) >> return -EBUSY; >> >> - spin_lock(&ttm_glob.lru_lock); >> + spin_lock(&bo->bdev->lru_lock); >> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { >> /* >> * We raced, and lost, someone else holds the reservation now, >> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, >> * delayed destruction would succeed, so just return success >> * here. >> */ >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&bo->bdev->lru_lock); >> return 0; >> } >> ret = 0; >> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, >> if (ret || unlikely(list_empty(&bo->ddestroy))) { >> if (unlock_resv) >> dma_resv_unlock(bo->base.resv); >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&bo->bdev->lru_lock); >> return ret; >> } >> >> ttm_bo_del_from_lru(bo); >> list_del_init(&bo->ddestroy); >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&bo->bdev->lru_lock); >> ttm_bo_cleanup_memtype_use(bo); >> >> if (unlock_resv) >> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, >> */ >> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) >> { >> - struct ttm_global *glob = &ttm_glob; >> struct list_head removed; >> bool empty; >> >> INIT_LIST_HEAD(&removed); >> >> - spin_lock(&glob->lru_lock); >> + spin_lock(&bdev->lru_lock); >> while (!list_empty(&bdev->ddestroy)) { >> struct ttm_buffer_object *bo; >> >> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) >> continue; >> >> if (remove_all || bo->base.resv != &bo->base._resv) { >> - spin_unlock(&glob->lru_lock); >> + spin_unlock(&bdev->lru_lock); >> dma_resv_lock(bo->base.resv, NULL); >> >> - spin_lock(&glob->lru_lock); >> + spin_lock(&bdev->lru_lock); >> ttm_bo_cleanup_refs(bo, false, !remove_all, true); >> >> } else if (dma_resv_trylock(bo->base.resv)) { >> ttm_bo_cleanup_refs(bo, false, !remove_all, true); >> } else { >> - spin_unlock(&glob->lru_lock); >> + spin_unlock(&bdev->lru_lock); >> } >> >> ttm_bo_put(bo); >> - spin_lock(&glob->lru_lock); >> + spin_lock(&bdev->lru_lock); >> } >> list_splice_tail(&removed, &bdev->ddestroy); >> empty = list_empty(&bdev->ddestroy); >> - spin_unlock(&glob->lru_lock); >> + spin_unlock(&bdev->lru_lock); >> >> return empty; >> } >> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref) >> ttm_bo_flush_all_fences(bo); >> bo->deleted = true; >> >> - spin_lock(&ttm_glob.lru_lock); >> + spin_lock(&bo->bdev->lru_lock); >> >> /* >> * Make pinned bos immediately available to >> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref) >> >> kref_init(&bo->kref); >> list_add_tail(&bo->ddestroy, &bdev->ddestroy); >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&bo->bdev->lru_lock); >> >> schedule_delayed_work(&bdev->wq, >> ((HZ / 100) < 1) ? 1 : HZ / 100); >> return; >> } >> >> - spin_lock(&ttm_glob.lru_lock); >> + spin_lock(&bo->bdev->lru_lock); >> ttm_bo_del_from_lru(bo); >> list_del(&bo->ddestroy); >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&bo->bdev->lru_lock); >> >> ttm_bo_cleanup_memtype_use(bo); >> dma_resv_unlock(bo->base.resv); >> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, >> unsigned i; >> int ret; >> >> - spin_lock(&ttm_glob.lru_lock); >> + spin_lock(&bo->bdev->lru_lock); >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { >> list_for_each_entry(bo, &man->lru[i], lru) { >> bool busy; >> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, >> if (!bo) { >> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) >> busy_bo = NULL; >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&bo->bdev->lru_lock); >> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); >> if (busy_bo) >> ttm_bo_put(busy_bo); >> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, >> return ret; >> } >> >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&bo->bdev->lru_lock); >> >> ret = ttm_bo_evict(bo, ctx); >> if (locked) >> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, >> mem->mem_type = place->mem_type; >> mem->placement = place->flags; >> >> - spin_lock(&ttm_glob.lru_lock); >> + spin_lock(&bo->bdev->lru_lock); >> ttm_bo_move_to_lru_tail(bo, mem, NULL); >> - spin_unlock(&ttm_glob.lru_lock); >> - >> + spin_unlock(&bo->bdev->lru_lock); >> return 0; >> } >> >> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait); >> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, >> gfp_t gfp_flags) >> { >> - struct ttm_global *glob = &ttm_glob; >> bool locked; >> int ret; >> >> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, >> >> ttm_bo_del_from_lru(bo); >> /* TODO: Cleanup the locking */ >> - spin_unlock(&glob->lru_lock); >> + spin_unlock(&bo->bdev->lru_lock); >> >> /* >> * Move to system cached >> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c >> index 2c280fb1e992..924d892109e8 100644 >> --- a/drivers/gpu/drm/ttm/ttm_device.c >> +++ b/drivers/gpu/drm/ttm/ttm_device.c >> @@ -81,7 +81,6 @@ static int ttm_global_init(void) >> ttm_pool_mgr_init(num_pages * 50 / 100); >> ttm_tt_mgr_init(); >> >> - spin_lock_init(&glob->lru_lock); >> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); >> >> if (unlikely(glob->dummy_read_page == NULL)) { >> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); >> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, >> gfp_t gfp_flags) >> { >> - struct ttm_global *glob = &ttm_glob; >> struct ttm_resource_manager *man; >> struct ttm_buffer_object *bo; >> unsigned i, j; >> int ret; >> >> - spin_lock(&glob->lru_lock); >> + spin_lock(&bdev->lru_lock); >> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { >> man = ttm_manager_type(bdev, i); >> if (!man || !man->use_tt) >> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, >> } >> } >> } >> - spin_unlock(&glob->lru_lock); >> + spin_unlock(&bdev->lru_lock); >> return 0; >> } >> EXPORT_SYMBOL(ttm_device_swapout); >> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs, >> >> bdev->vma_manager = vma_manager; >> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue); >> + spin_lock_init(&bdev->lru_lock); >> INIT_LIST_HEAD(&bdev->ddestroy); >> bdev->dev_mapping = mapping; >> mutex_lock(&ttm_global_mutex); >> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init); >> >> void ttm_device_fini(struct ttm_device *bdev) >> { >> - struct ttm_global *glob = &ttm_glob; >> struct ttm_resource_manager *man; >> unsigned i; >> >> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) >> if (ttm_bo_delayed_delete(bdev, true)) >> pr_debug("Delayed destroy list was clean\n"); >> >> - spin_lock(&glob->lru_lock); >> + spin_lock(&bdev->lru_lock); >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) >> if (list_empty(&man->lru[0])) >> pr_debug("Swap list %d was clean\n", i); >> - spin_unlock(&glob->lru_lock); >> + spin_unlock(&bdev->lru_lock); >> >> ttm_pool_fini(&bdev->pool); >> ttm_global_release(); >> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c >> index 690ab97d52b7..071c48d672c6 100644 >> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c >> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c >> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, >> if (list_empty(list)) >> return; >> >> - spin_lock(&ttm_glob.lru_lock); >> list_for_each_entry(entry, list, head) { >> struct ttm_buffer_object *bo = entry->bo; >> >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); >> + ttm_bo_move_to_lru_tail_unlocked(bo); >> dma_resv_unlock(bo->base.resv); >> } >> - spin_unlock(&ttm_glob.lru_lock); >> >> if (ticket) >> ww_acquire_fini(ticket); >> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, >> if (list_empty(list)) >> return; >> >> - spin_lock(&ttm_glob.lru_lock); >> list_for_each_entry(entry, list, head) { >> struct ttm_buffer_object *bo = entry->bo; >> >> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, >> dma_resv_add_shared_fence(bo->base.resv, fence); >> else >> dma_resv_add_excl_fence(bo->base.resv, fence); >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); >> + ttm_bo_move_to_lru_tail_unlocked(bo); >> dma_resv_unlock(bo->base.resv); >> } >> - spin_unlock(&ttm_glob.lru_lock); >> if (ticket) >> ww_acquire_fini(ticket); >> } >> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c >> index ed1672a9f332..04f2eef653ab 100644 >> --- a/drivers/gpu/drm/ttm/ttm_resource.c >> +++ b/drivers/gpu/drm/ttm/ttm_resource.c >> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, >> .no_wait_gpu = false, >> .force_alloc = true >> }; >> - struct ttm_global *glob = &ttm_glob; >> struct dma_fence *fence; >> int ret; >> unsigned i; >> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, >> * Can't use standard list traversal since we're unlocking. >> */ >> >> - spin_lock(&glob->lru_lock); >> + spin_lock(&bdev->lru_lock); >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { >> while (!list_empty(&man->lru[i])) { >> - spin_unlock(&glob->lru_lock); >> + spin_unlock(&bdev->lru_lock); >> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, >> NULL); >> if (ret) >> return ret; >> - spin_lock(&glob->lru_lock); >> + spin_lock(&bdev->lru_lock); >> } >> } >> - spin_unlock(&glob->lru_lock); >> + spin_unlock(&bdev->lru_lock); >> >> spin_lock(&man->move_lock); >> fence = dma_fence_get(man->move); >> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h >> index d007feef7676..dbccac957f8f 100644 >> --- a/include/drm/ttm/ttm_bo_driver.h >> +++ b/include/drm/ttm/ttm_bo_driver.h >> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, >> static inline void >> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) >> { >> - spin_lock(&ttm_glob.lru_lock); >> + spin_lock(&bo->bdev->lru_lock); >> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); >> - spin_unlock(&ttm_glob.lru_lock); >> + spin_unlock(&bo->bdev->lru_lock); >> } >> >> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, >> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h >> index cda6efb4c34b..bae56d29e8ff 100644 >> --- a/include/drm/ttm/ttm_device.h >> +++ b/include/drm/ttm/ttm_device.h >> @@ -56,7 +56,6 @@ extern struct ttm_global { >> */ >> >> struct page *dummy_read_page; >> - spinlock_t lru_lock; >> >> /** >> * Protected by ttm_global_mutex. >> @@ -277,8 +276,9 @@ struct ttm_device { >> struct ttm_pool pool; >> >> /* >> - * Protected by the global:lru lock. >> + * Protection for the per manager LRU and ddestroy lists. >> */ >> + spinlock_t lru_lock; >> struct list_head ddestroy; >> >> /* >> -- >> 2.25.1 >> >> _______________________________________________ >> dri-devel mailing list >> dri-devel@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/dri-devel _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock 2021-03-16 12:03 ` Christian König @ 2021-03-16 12:05 ` Daniel Vetter 2021-03-16 15:13 ` Christian König 0 siblings, 1 reply; 18+ messages in thread From: Daniel Vetter @ 2021-03-16 12:05 UTC (permalink / raw) To: Christian König; +Cc: Huang Rui, dri-devel On Tue, Mar 16, 2021 at 1:03 PM Christian König <ckoenig.leichtzumerken@gmail.com> wrote: > > Am 16.03.21 um 10:35 schrieb Daniel Vetter: > > On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote: > >> Instead of having a global lock. > >> > >> Signed-off-by: Christian König <christian.koenig@amd.com> > > I guess per zone lru lock is a lot more work since then we need to handle > > ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm > > lingo. > > Making the LRU per resource manager is the long term goal, yes. > > My key idea so far is that we make bo->mem a pointer and then move the > LRU handling into the resource object instead of the BO. > > The resource object then just references the BO and so that we can > figure out which BO to evict or which fence to wait for to free up a > resource. Hm yeah that could work out fairly nicely. Both from locking but also refcounting pov. And maybe we could then use entirely free-standing mem objects instead of ghost objects? Since that's a part of ttm I don't grok and it always looks a bit like a hack to me. So for these ghost mem objects you'd only need the lru + dma_fence_wait (can grab a fence ref under the lru and then drop lru lock for that) for eviction, no dma_resv_lock. -Daniel > > Regards, > Christian. > > > -Daniel > > > >> --- > >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- > >> drivers/gpu/drm/qxl/qxl_release.c | 5 +-- > >> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- > >> drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- > >> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- > >> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- > >> include/drm/ttm/ttm_bo_driver.h | 4 +-- > >> include/drm/ttm/ttm_device.h | 4 +-- > >> 8 files changed, 43 insertions(+), 56 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > >> index 9d19078246c8..ae18c0e32347 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > >> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, > >> struct amdgpu_vm_bo_base *bo_base; > >> > >> if (vm->bulk_moveable) { > >> - spin_lock(&ttm_glob.lru_lock); > >> + spin_lock(&adev->mman.bdev.lru_lock); > >> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&adev->mman.bdev.lru_lock); > >> return; > >> } > >> > >> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); > >> > >> - spin_lock(&ttm_glob.lru_lock); > >> + spin_lock(&adev->mman.bdev.lru_lock); > >> list_for_each_entry(bo_base, &vm->idle, vm_status) { > >> struct amdgpu_bo *bo = bo_base->bo; > >> > >> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, > >> &bo->shadow->tbo.mem, > >> &vm->lru_bulk_move); > >> } > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&adev->mman.bdev.lru_lock); > >> > >> vm->bulk_moveable = true; > >> } > >> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c > >> index f5845c96d414..b19f2f00b215 100644 > >> --- a/drivers/gpu/drm/qxl/qxl_release.c > >> +++ b/drivers/gpu/drm/qxl/qxl_release.c > >> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) > >> release->id | 0xf0000000, release->base.seqno); > >> trace_dma_fence_emit(&release->base); > >> > >> - spin_lock(&ttm_glob.lru_lock); > >> - > >> list_for_each_entry(entry, &release->bos, head) { > >> bo = entry->bo; > >> > >> dma_resv_add_shared_fence(bo->base.resv, &release->base); > >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); > >> + ttm_bo_move_to_lru_tail_unlocked(bo); > >> dma_resv_unlock(bo->base.resv); > >> } > >> - spin_unlock(&ttm_glob.lru_lock); > >> ww_acquire_fini(&release->ticket); > >> } > >> > >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > >> index a1be88be357b..a8103c8718a3 100644 > >> --- a/drivers/gpu/drm/ttm/ttm_bo.c > >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c > >> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) > >> * reference it any more. The only tricky case is the trylock on > >> * the resv object while holding the lru_lock. > >> */ > >> - spin_lock(&ttm_glob.lru_lock); > >> + spin_lock(&bo->bdev->lru_lock); > >> bo->base.resv = &bo->base._resv; > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&bo->bdev->lru_lock); > >> } > >> > >> return r; > >> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > >> > >> if (unlock_resv) > >> dma_resv_unlock(bo->base.resv); > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&bo->bdev->lru_lock); > >> > >> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, > >> 30 * HZ); > >> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > >> else if (lret == 0) > >> return -EBUSY; > >> > >> - spin_lock(&ttm_glob.lru_lock); > >> + spin_lock(&bo->bdev->lru_lock); > >> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { > >> /* > >> * We raced, and lost, someone else holds the reservation now, > >> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > >> * delayed destruction would succeed, so just return success > >> * here. > >> */ > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&bo->bdev->lru_lock); > >> return 0; > >> } > >> ret = 0; > >> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > >> if (ret || unlikely(list_empty(&bo->ddestroy))) { > >> if (unlock_resv) > >> dma_resv_unlock(bo->base.resv); > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&bo->bdev->lru_lock); > >> return ret; > >> } > >> > >> ttm_bo_del_from_lru(bo); > >> list_del_init(&bo->ddestroy); > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&bo->bdev->lru_lock); > >> ttm_bo_cleanup_memtype_use(bo); > >> > >> if (unlock_resv) > >> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, > >> */ > >> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) > >> { > >> - struct ttm_global *glob = &ttm_glob; > >> struct list_head removed; > >> bool empty; > >> > >> INIT_LIST_HEAD(&removed); > >> > >> - spin_lock(&glob->lru_lock); > >> + spin_lock(&bdev->lru_lock); > >> while (!list_empty(&bdev->ddestroy)) { > >> struct ttm_buffer_object *bo; > >> > >> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) > >> continue; > >> > >> if (remove_all || bo->base.resv != &bo->base._resv) { > >> - spin_unlock(&glob->lru_lock); > >> + spin_unlock(&bdev->lru_lock); > >> dma_resv_lock(bo->base.resv, NULL); > >> > >> - spin_lock(&glob->lru_lock); > >> + spin_lock(&bdev->lru_lock); > >> ttm_bo_cleanup_refs(bo, false, !remove_all, true); > >> > >> } else if (dma_resv_trylock(bo->base.resv)) { > >> ttm_bo_cleanup_refs(bo, false, !remove_all, true); > >> } else { > >> - spin_unlock(&glob->lru_lock); > >> + spin_unlock(&bdev->lru_lock); > >> } > >> > >> ttm_bo_put(bo); > >> - spin_lock(&glob->lru_lock); > >> + spin_lock(&bdev->lru_lock); > >> } > >> list_splice_tail(&removed, &bdev->ddestroy); > >> empty = list_empty(&bdev->ddestroy); > >> - spin_unlock(&glob->lru_lock); > >> + spin_unlock(&bdev->lru_lock); > >> > >> return empty; > >> } > >> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref) > >> ttm_bo_flush_all_fences(bo); > >> bo->deleted = true; > >> > >> - spin_lock(&ttm_glob.lru_lock); > >> + spin_lock(&bo->bdev->lru_lock); > >> > >> /* > >> * Make pinned bos immediately available to > >> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref) > >> > >> kref_init(&bo->kref); > >> list_add_tail(&bo->ddestroy, &bdev->ddestroy); > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&bo->bdev->lru_lock); > >> > >> schedule_delayed_work(&bdev->wq, > >> ((HZ / 100) < 1) ? 1 : HZ / 100); > >> return; > >> } > >> > >> - spin_lock(&ttm_glob.lru_lock); > >> + spin_lock(&bo->bdev->lru_lock); > >> ttm_bo_del_from_lru(bo); > >> list_del(&bo->ddestroy); > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&bo->bdev->lru_lock); > >> > >> ttm_bo_cleanup_memtype_use(bo); > >> dma_resv_unlock(bo->base.resv); > >> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, > >> unsigned i; > >> int ret; > >> > >> - spin_lock(&ttm_glob.lru_lock); > >> + spin_lock(&bo->bdev->lru_lock); > >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > >> list_for_each_entry(bo, &man->lru[i], lru) { > >> bool busy; > >> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, > >> if (!bo) { > >> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) > >> busy_bo = NULL; > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&bo->bdev->lru_lock); > >> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); > >> if (busy_bo) > >> ttm_bo_put(busy_bo); > >> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, > >> return ret; > >> } > >> > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&bo->bdev->lru_lock); > >> > >> ret = ttm_bo_evict(bo, ctx); > >> if (locked) > >> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, > >> mem->mem_type = place->mem_type; > >> mem->placement = place->flags; > >> > >> - spin_lock(&ttm_glob.lru_lock); > >> + spin_lock(&bo->bdev->lru_lock); > >> ttm_bo_move_to_lru_tail(bo, mem, NULL); > >> - spin_unlock(&ttm_glob.lru_lock); > >> - > >> + spin_unlock(&bo->bdev->lru_lock); > >> return 0; > >> } > >> > >> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait); > >> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, > >> gfp_t gfp_flags) > >> { > >> - struct ttm_global *glob = &ttm_glob; > >> bool locked; > >> int ret; > >> > >> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, > >> > >> ttm_bo_del_from_lru(bo); > >> /* TODO: Cleanup the locking */ > >> - spin_unlock(&glob->lru_lock); > >> + spin_unlock(&bo->bdev->lru_lock); > >> > >> /* > >> * Move to system cached > >> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c > >> index 2c280fb1e992..924d892109e8 100644 > >> --- a/drivers/gpu/drm/ttm/ttm_device.c > >> +++ b/drivers/gpu/drm/ttm/ttm_device.c > >> @@ -81,7 +81,6 @@ static int ttm_global_init(void) > >> ttm_pool_mgr_init(num_pages * 50 / 100); > >> ttm_tt_mgr_init(); > >> > >> - spin_lock_init(&glob->lru_lock); > >> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); > >> > >> if (unlikely(glob->dummy_read_page == NULL)) { > >> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); > >> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, > >> gfp_t gfp_flags) > >> { > >> - struct ttm_global *glob = &ttm_glob; > >> struct ttm_resource_manager *man; > >> struct ttm_buffer_object *bo; > >> unsigned i, j; > >> int ret; > >> > >> - spin_lock(&glob->lru_lock); > >> + spin_lock(&bdev->lru_lock); > >> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { > >> man = ttm_manager_type(bdev, i); > >> if (!man || !man->use_tt) > >> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, > >> } > >> } > >> } > >> - spin_unlock(&glob->lru_lock); > >> + spin_unlock(&bdev->lru_lock); > >> return 0; > >> } > >> EXPORT_SYMBOL(ttm_device_swapout); > >> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs, > >> > >> bdev->vma_manager = vma_manager; > >> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue); > >> + spin_lock_init(&bdev->lru_lock); > >> INIT_LIST_HEAD(&bdev->ddestroy); > >> bdev->dev_mapping = mapping; > >> mutex_lock(&ttm_global_mutex); > >> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init); > >> > >> void ttm_device_fini(struct ttm_device *bdev) > >> { > >> - struct ttm_global *glob = &ttm_glob; > >> struct ttm_resource_manager *man; > >> unsigned i; > >> > >> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) > >> if (ttm_bo_delayed_delete(bdev, true)) > >> pr_debug("Delayed destroy list was clean\n"); > >> > >> - spin_lock(&glob->lru_lock); > >> + spin_lock(&bdev->lru_lock); > >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) > >> if (list_empty(&man->lru[0])) > >> pr_debug("Swap list %d was clean\n", i); > >> - spin_unlock(&glob->lru_lock); > >> + spin_unlock(&bdev->lru_lock); > >> > >> ttm_pool_fini(&bdev->pool); > >> ttm_global_release(); > >> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c > >> index 690ab97d52b7..071c48d672c6 100644 > >> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c > >> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c > >> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, > >> if (list_empty(list)) > >> return; > >> > >> - spin_lock(&ttm_glob.lru_lock); > >> list_for_each_entry(entry, list, head) { > >> struct ttm_buffer_object *bo = entry->bo; > >> > >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); > >> + ttm_bo_move_to_lru_tail_unlocked(bo); > >> dma_resv_unlock(bo->base.resv); > >> } > >> - spin_unlock(&ttm_glob.lru_lock); > >> > >> if (ticket) > >> ww_acquire_fini(ticket); > >> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, > >> if (list_empty(list)) > >> return; > >> > >> - spin_lock(&ttm_glob.lru_lock); > >> list_for_each_entry(entry, list, head) { > >> struct ttm_buffer_object *bo = entry->bo; > >> > >> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, > >> dma_resv_add_shared_fence(bo->base.resv, fence); > >> else > >> dma_resv_add_excl_fence(bo->base.resv, fence); > >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); > >> + ttm_bo_move_to_lru_tail_unlocked(bo); > >> dma_resv_unlock(bo->base.resv); > >> } > >> - spin_unlock(&ttm_glob.lru_lock); > >> if (ticket) > >> ww_acquire_fini(ticket); > >> } > >> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c > >> index ed1672a9f332..04f2eef653ab 100644 > >> --- a/drivers/gpu/drm/ttm/ttm_resource.c > >> +++ b/drivers/gpu/drm/ttm/ttm_resource.c > >> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, > >> .no_wait_gpu = false, > >> .force_alloc = true > >> }; > >> - struct ttm_global *glob = &ttm_glob; > >> struct dma_fence *fence; > >> int ret; > >> unsigned i; > >> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, > >> * Can't use standard list traversal since we're unlocking. > >> */ > >> > >> - spin_lock(&glob->lru_lock); > >> + spin_lock(&bdev->lru_lock); > >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { > >> while (!list_empty(&man->lru[i])) { > >> - spin_unlock(&glob->lru_lock); > >> + spin_unlock(&bdev->lru_lock); > >> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, > >> NULL); > >> if (ret) > >> return ret; > >> - spin_lock(&glob->lru_lock); > >> + spin_lock(&bdev->lru_lock); > >> } > >> } > >> - spin_unlock(&glob->lru_lock); > >> + spin_unlock(&bdev->lru_lock); > >> > >> spin_lock(&man->move_lock); > >> fence = dma_fence_get(man->move); > >> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h > >> index d007feef7676..dbccac957f8f 100644 > >> --- a/include/drm/ttm/ttm_bo_driver.h > >> +++ b/include/drm/ttm/ttm_bo_driver.h > >> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, > >> static inline void > >> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) > >> { > >> - spin_lock(&ttm_glob.lru_lock); > >> + spin_lock(&bo->bdev->lru_lock); > >> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); > >> - spin_unlock(&ttm_glob.lru_lock); > >> + spin_unlock(&bo->bdev->lru_lock); > >> } > >> > >> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, > >> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h > >> index cda6efb4c34b..bae56d29e8ff 100644 > >> --- a/include/drm/ttm/ttm_device.h > >> +++ b/include/drm/ttm/ttm_device.h > >> @@ -56,7 +56,6 @@ extern struct ttm_global { > >> */ > >> > >> struct page *dummy_read_page; > >> - spinlock_t lru_lock; > >> > >> /** > >> * Protected by ttm_global_mutex. > >> @@ -277,8 +276,9 @@ struct ttm_device { > >> struct ttm_pool pool; > >> > >> /* > >> - * Protected by the global:lru lock. > >> + * Protection for the per manager LRU and ddestroy lists. > >> */ > >> + spinlock_t lru_lock; > >> struct list_head ddestroy; > >> > >> /* > >> -- > >> 2.25.1 > >> > >> _______________________________________________ > >> dri-devel mailing list > >> dri-devel@lists.freedesktop.org > >> https://lists.freedesktop.org/mailman/listinfo/dri-devel > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock 2021-03-16 12:05 ` Daniel Vetter @ 2021-03-16 15:13 ` Christian König 0 siblings, 0 replies; 18+ messages in thread From: Christian König @ 2021-03-16 15:13 UTC (permalink / raw) To: Daniel Vetter; +Cc: Huang Rui, dri-devel Am 16.03.21 um 13:05 schrieb Daniel Vetter: > On Tue, Mar 16, 2021 at 1:03 PM Christian König > <ckoenig.leichtzumerken@gmail.com> wrote: >> Am 16.03.21 um 10:35 schrieb Daniel Vetter: >>> On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote: >>>> Instead of having a global lock. >>>> >>>> Signed-off-by: Christian König <christian.koenig@amd.com> >>> I guess per zone lru lock is a lot more work since then we need to handle >>> ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm >>> lingo. >> Making the LRU per resource manager is the long term goal, yes. >> >> My key idea so far is that we make bo->mem a pointer and then move the >> LRU handling into the resource object instead of the BO. >> >> The resource object then just references the BO and so that we can >> figure out which BO to evict or which fence to wait for to free up a >> resource. > Hm yeah that could work out fairly nicely. Both from locking but also > refcounting pov. And maybe we could then use entirely free-standing > mem objects instead of ghost objects? Since that's a part of ttm I > don't grok and it always looks a bit like a hack to me. So for these > ghost mem objects you'd only need the lru + dma_fence_wait (can grab a > fence ref under the lru and then drop lru lock for that) for eviction, > no dma_resv_lock. Exactly that's the background here, yes. Those ghost objects are more than just a bit of a hack and result in tons of checks in the driver if a BO is really a BO or a ghost. Moving all that handling into the resource objects not only allows us to remove that, but also makes things like delayed delete work out pretty nicely. Christian. > -Daniel >> Regards, >> Christian. >> >>> -Daniel >>> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++--- >>>> drivers/gpu/drm/qxl/qxl_release.c | 5 +-- >>>> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++-------------- >>>> drivers/gpu/drm/ttm/ttm_device.c | 12 +++---- >>>> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++--- >>>> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++-- >>>> include/drm/ttm/ttm_bo_driver.h | 4 +-- >>>> include/drm/ttm/ttm_device.h | 4 +-- >>>> 8 files changed, 43 insertions(+), 56 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> index 9d19078246c8..ae18c0e32347 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, >>>> struct amdgpu_vm_bo_base *bo_base; >>>> >>>> if (vm->bulk_moveable) { >>>> - spin_lock(&ttm_glob.lru_lock); >>>> + spin_lock(&adev->mman.bdev.lru_lock); >>>> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&adev->mman.bdev.lru_lock); >>>> return; >>>> } >>>> >>>> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); >>>> >>>> - spin_lock(&ttm_glob.lru_lock); >>>> + spin_lock(&adev->mman.bdev.lru_lock); >>>> list_for_each_entry(bo_base, &vm->idle, vm_status) { >>>> struct amdgpu_bo *bo = bo_base->bo; >>>> >>>> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, >>>> &bo->shadow->tbo.mem, >>>> &vm->lru_bulk_move); >>>> } >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&adev->mman.bdev.lru_lock); >>>> >>>> vm->bulk_moveable = true; >>>> } >>>> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c >>>> index f5845c96d414..b19f2f00b215 100644 >>>> --- a/drivers/gpu/drm/qxl/qxl_release.c >>>> +++ b/drivers/gpu/drm/qxl/qxl_release.c >>>> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) >>>> release->id | 0xf0000000, release->base.seqno); >>>> trace_dma_fence_emit(&release->base); >>>> >>>> - spin_lock(&ttm_glob.lru_lock); >>>> - >>>> list_for_each_entry(entry, &release->bos, head) { >>>> bo = entry->bo; >>>> >>>> dma_resv_add_shared_fence(bo->base.resv, &release->base); >>>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); >>>> + ttm_bo_move_to_lru_tail_unlocked(bo); >>>> dma_resv_unlock(bo->base.resv); >>>> } >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> ww_acquire_fini(&release->ticket); >>>> } >>>> >>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c >>>> index a1be88be357b..a8103c8718a3 100644 >>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c >>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c >>>> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) >>>> * reference it any more. The only tricky case is the trylock on >>>> * the resv object while holding the lru_lock. >>>> */ >>>> - spin_lock(&ttm_glob.lru_lock); >>>> + spin_lock(&bo->bdev->lru_lock); >>>> bo->base.resv = &bo->base._resv; >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> } >>>> >>>> return r; >>>> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, >>>> >>>> if (unlock_resv) >>>> dma_resv_unlock(bo->base.resv); >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> >>>> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible, >>>> 30 * HZ); >>>> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, >>>> else if (lret == 0) >>>> return -EBUSY; >>>> >>>> - spin_lock(&ttm_glob.lru_lock); >>>> + spin_lock(&bo->bdev->lru_lock); >>>> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { >>>> /* >>>> * We raced, and lost, someone else holds the reservation now, >>>> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, >>>> * delayed destruction would succeed, so just return success >>>> * here. >>>> */ >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> return 0; >>>> } >>>> ret = 0; >>>> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, >>>> if (ret || unlikely(list_empty(&bo->ddestroy))) { >>>> if (unlock_resv) >>>> dma_resv_unlock(bo->base.resv); >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> return ret; >>>> } >>>> >>>> ttm_bo_del_from_lru(bo); >>>> list_del_init(&bo->ddestroy); >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> ttm_bo_cleanup_memtype_use(bo); >>>> >>>> if (unlock_resv) >>>> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, >>>> */ >>>> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) >>>> { >>>> - struct ttm_global *glob = &ttm_glob; >>>> struct list_head removed; >>>> bool empty; >>>> >>>> INIT_LIST_HEAD(&removed); >>>> >>>> - spin_lock(&glob->lru_lock); >>>> + spin_lock(&bdev->lru_lock); >>>> while (!list_empty(&bdev->ddestroy)) { >>>> struct ttm_buffer_object *bo; >>>> >>>> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) >>>> continue; >>>> >>>> if (remove_all || bo->base.resv != &bo->base._resv) { >>>> - spin_unlock(&glob->lru_lock); >>>> + spin_unlock(&bdev->lru_lock); >>>> dma_resv_lock(bo->base.resv, NULL); >>>> >>>> - spin_lock(&glob->lru_lock); >>>> + spin_lock(&bdev->lru_lock); >>>> ttm_bo_cleanup_refs(bo, false, !remove_all, true); >>>> >>>> } else if (dma_resv_trylock(bo->base.resv)) { >>>> ttm_bo_cleanup_refs(bo, false, !remove_all, true); >>>> } else { >>>> - spin_unlock(&glob->lru_lock); >>>> + spin_unlock(&bdev->lru_lock); >>>> } >>>> >>>> ttm_bo_put(bo); >>>> - spin_lock(&glob->lru_lock); >>>> + spin_lock(&bdev->lru_lock); >>>> } >>>> list_splice_tail(&removed, &bdev->ddestroy); >>>> empty = list_empty(&bdev->ddestroy); >>>> - spin_unlock(&glob->lru_lock); >>>> + spin_unlock(&bdev->lru_lock); >>>> >>>> return empty; >>>> } >>>> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref) >>>> ttm_bo_flush_all_fences(bo); >>>> bo->deleted = true; >>>> >>>> - spin_lock(&ttm_glob.lru_lock); >>>> + spin_lock(&bo->bdev->lru_lock); >>>> >>>> /* >>>> * Make pinned bos immediately available to >>>> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref) >>>> >>>> kref_init(&bo->kref); >>>> list_add_tail(&bo->ddestroy, &bdev->ddestroy); >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> >>>> schedule_delayed_work(&bdev->wq, >>>> ((HZ / 100) < 1) ? 1 : HZ / 100); >>>> return; >>>> } >>>> >>>> - spin_lock(&ttm_glob.lru_lock); >>>> + spin_lock(&bo->bdev->lru_lock); >>>> ttm_bo_del_from_lru(bo); >>>> list_del(&bo->ddestroy); >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> >>>> ttm_bo_cleanup_memtype_use(bo); >>>> dma_resv_unlock(bo->base.resv); >>>> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, >>>> unsigned i; >>>> int ret; >>>> >>>> - spin_lock(&ttm_glob.lru_lock); >>>> + spin_lock(&bo->bdev->lru_lock); >>>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { >>>> list_for_each_entry(bo, &man->lru[i], lru) { >>>> bool busy; >>>> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, >>>> if (!bo) { >>>> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) >>>> busy_bo = NULL; >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); >>>> if (busy_bo) >>>> ttm_bo_put(busy_bo); >>>> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, >>>> return ret; >>>> } >>>> >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> >>>> ret = ttm_bo_evict(bo, ctx); >>>> if (locked) >>>> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo, >>>> mem->mem_type = place->mem_type; >>>> mem->placement = place->flags; >>>> >>>> - spin_lock(&ttm_glob.lru_lock); >>>> + spin_lock(&bo->bdev->lru_lock); >>>> ttm_bo_move_to_lru_tail(bo, mem, NULL); >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> - >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> return 0; >>>> } >>>> >>>> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait); >>>> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, >>>> gfp_t gfp_flags) >>>> { >>>> - struct ttm_global *glob = &ttm_glob; >>>> bool locked; >>>> int ret; >>>> >>>> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, >>>> >>>> ttm_bo_del_from_lru(bo); >>>> /* TODO: Cleanup the locking */ >>>> - spin_unlock(&glob->lru_lock); >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> >>>> /* >>>> * Move to system cached >>>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c >>>> index 2c280fb1e992..924d892109e8 100644 >>>> --- a/drivers/gpu/drm/ttm/ttm_device.c >>>> +++ b/drivers/gpu/drm/ttm/ttm_device.c >>>> @@ -81,7 +81,6 @@ static int ttm_global_init(void) >>>> ttm_pool_mgr_init(num_pages * 50 / 100); >>>> ttm_tt_mgr_init(); >>>> >>>> - spin_lock_init(&glob->lru_lock); >>>> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); >>>> >>>> if (unlikely(glob->dummy_read_page == NULL)) { >>>> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout); >>>> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, >>>> gfp_t gfp_flags) >>>> { >>>> - struct ttm_global *glob = &ttm_glob; >>>> struct ttm_resource_manager *man; >>>> struct ttm_buffer_object *bo; >>>> unsigned i, j; >>>> int ret; >>>> >>>> - spin_lock(&glob->lru_lock); >>>> + spin_lock(&bdev->lru_lock); >>>> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { >>>> man = ttm_manager_type(bdev, i); >>>> if (!man || !man->use_tt) >>>> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, >>>> } >>>> } >>>> } >>>> - spin_unlock(&glob->lru_lock); >>>> + spin_unlock(&bdev->lru_lock); >>>> return 0; >>>> } >>>> EXPORT_SYMBOL(ttm_device_swapout); >>>> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs, >>>> >>>> bdev->vma_manager = vma_manager; >>>> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue); >>>> + spin_lock_init(&bdev->lru_lock); >>>> INIT_LIST_HEAD(&bdev->ddestroy); >>>> bdev->dev_mapping = mapping; >>>> mutex_lock(&ttm_global_mutex); >>>> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init); >>>> >>>> void ttm_device_fini(struct ttm_device *bdev) >>>> { >>>> - struct ttm_global *glob = &ttm_glob; >>>> struct ttm_resource_manager *man; >>>> unsigned i; >>>> >>>> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev) >>>> if (ttm_bo_delayed_delete(bdev, true)) >>>> pr_debug("Delayed destroy list was clean\n"); >>>> >>>> - spin_lock(&glob->lru_lock); >>>> + spin_lock(&bdev->lru_lock); >>>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) >>>> if (list_empty(&man->lru[0])) >>>> pr_debug("Swap list %d was clean\n", i); >>>> - spin_unlock(&glob->lru_lock); >>>> + spin_unlock(&bdev->lru_lock); >>>> >>>> ttm_pool_fini(&bdev->pool); >>>> ttm_global_release(); >>>> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c >>>> index 690ab97d52b7..071c48d672c6 100644 >>>> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c >>>> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c >>>> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, >>>> if (list_empty(list)) >>>> return; >>>> >>>> - spin_lock(&ttm_glob.lru_lock); >>>> list_for_each_entry(entry, list, head) { >>>> struct ttm_buffer_object *bo = entry->bo; >>>> >>>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); >>>> + ttm_bo_move_to_lru_tail_unlocked(bo); >>>> dma_resv_unlock(bo->base.resv); >>>> } >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> >>>> if (ticket) >>>> ww_acquire_fini(ticket); >>>> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, >>>> if (list_empty(list)) >>>> return; >>>> >>>> - spin_lock(&ttm_glob.lru_lock); >>>> list_for_each_entry(entry, list, head) { >>>> struct ttm_buffer_object *bo = entry->bo; >>>> >>>> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, >>>> dma_resv_add_shared_fence(bo->base.resv, fence); >>>> else >>>> dma_resv_add_excl_fence(bo->base.resv, fence); >>>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); >>>> + ttm_bo_move_to_lru_tail_unlocked(bo); >>>> dma_resv_unlock(bo->base.resv); >>>> } >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> if (ticket) >>>> ww_acquire_fini(ticket); >>>> } >>>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c >>>> index ed1672a9f332..04f2eef653ab 100644 >>>> --- a/drivers/gpu/drm/ttm/ttm_resource.c >>>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c >>>> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, >>>> .no_wait_gpu = false, >>>> .force_alloc = true >>>> }; >>>> - struct ttm_global *glob = &ttm_glob; >>>> struct dma_fence *fence; >>>> int ret; >>>> unsigned i; >>>> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, >>>> * Can't use standard list traversal since we're unlocking. >>>> */ >>>> >>>> - spin_lock(&glob->lru_lock); >>>> + spin_lock(&bdev->lru_lock); >>>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { >>>> while (!list_empty(&man->lru[i])) { >>>> - spin_unlock(&glob->lru_lock); >>>> + spin_unlock(&bdev->lru_lock); >>>> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, >>>> NULL); >>>> if (ret) >>>> return ret; >>>> - spin_lock(&glob->lru_lock); >>>> + spin_lock(&bdev->lru_lock); >>>> } >>>> } >>>> - spin_unlock(&glob->lru_lock); >>>> + spin_unlock(&bdev->lru_lock); >>>> >>>> spin_lock(&man->move_lock); >>>> fence = dma_fence_get(man->move); >>>> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h >>>> index d007feef7676..dbccac957f8f 100644 >>>> --- a/include/drm/ttm/ttm_bo_driver.h >>>> +++ b/include/drm/ttm/ttm_bo_driver.h >>>> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo, >>>> static inline void >>>> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo) >>>> { >>>> - spin_lock(&ttm_glob.lru_lock); >>>> + spin_lock(&bo->bdev->lru_lock); >>>> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL); >>>> - spin_unlock(&ttm_glob.lru_lock); >>>> + spin_unlock(&bo->bdev->lru_lock); >>>> } >>>> >>>> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo, >>>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h >>>> index cda6efb4c34b..bae56d29e8ff 100644 >>>> --- a/include/drm/ttm/ttm_device.h >>>> +++ b/include/drm/ttm/ttm_device.h >>>> @@ -56,7 +56,6 @@ extern struct ttm_global { >>>> */ >>>> >>>> struct page *dummy_read_page; >>>> - spinlock_t lru_lock; >>>> >>>> /** >>>> * Protected by ttm_global_mutex. >>>> @@ -277,8 +276,9 @@ struct ttm_device { >>>> struct ttm_pool pool; >>>> >>>> /* >>>> - * Protected by the global:lru lock. >>>> + * Protection for the per manager LRU and ddestroy lists. >>>> */ >>>> + spinlock_t lru_lock; >>>> struct list_head ddestroy; >>>> >>>> /* >>>> -- >>>> 2.25.1 >>>> >>>> _______________________________________________ >>>> dri-devel mailing list >>>> dri-devel@lists.freedesktop.org >>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel > _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around 2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König @ 2021-03-15 18:47 ` kernel test robot 2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König ` (2 subsequent siblings) 3 siblings, 0 replies; 18+ messages in thread From: kernel test robot @ 2021-03-15 18:47 UTC (permalink / raw) To: Christian König, dri-devel; +Cc: ray.huang, kbuild-all [-- Attachment #1: Type: text/plain, Size: 3093 bytes --] Hi "Christian, I love your patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-allyesconfig (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 git checkout 824dca26fe395899b41d9790944ddea345f7a6fd # save the attached .config to linux build tree make W=1 ARCH=i386 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX' drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout' drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout' >> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead vim +110 drivers/gpu/drm/ttm/ttm_device.c 104 105 /** 106 * A buffer object shrink method that tries to swap out the first 107 * buffer object on the global::swap_lru list. 108 */ 109 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) > 110 { 111 struct ttm_global *glob = &ttm_glob; 112 struct ttm_buffer_object *bo; 113 unsigned i; 114 int ret; 115 116 spin_lock(&glob->lru_lock); 117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { 118 list_for_each_entry(bo, &glob->swap_lru[i], swap) { 119 uint32_t num_pages = bo->ttm->num_pages; 120 121 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 122 /* ttm_bo_swapout has dropped the lru_lock */ 123 if (!ret) 124 return num_pages; 125 if (ret != -EBUSY) 126 return ret; 127 } 128 } 129 spin_unlock(&glob->lru_lock); 130 return 0; 131 } 132 EXPORT_SYMBOL(ttm_global_swapout); 133 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org [-- Attachment #2: .config.gz --] [-- Type: application/gzip, Size: 64665 bytes --] [-- Attachment #3: Type: text/plain, Size: 160 bytes --] _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around @ 2021-03-15 18:47 ` kernel test robot 0 siblings, 0 replies; 18+ messages in thread From: kernel test robot @ 2021-03-15 18:47 UTC (permalink / raw) To: kbuild-all [-- Attachment #1: Type: text/plain, Size: 3165 bytes --] Hi "Christian, I love your patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: i386-allyesconfig (attached as .config) compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 git checkout 824dca26fe395899b41d9790944ddea345f7a6fd # save the attached .config to linux build tree make W=1 ARCH=i386 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX' drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout' drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout' >> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead vim +110 drivers/gpu/drm/ttm/ttm_device.c 104 105 /** 106 * A buffer object shrink method that tries to swap out the first 107 * buffer object on the global::swap_lru list. 108 */ 109 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) > 110 { 111 struct ttm_global *glob = &ttm_glob; 112 struct ttm_buffer_object *bo; 113 unsigned i; 114 int ret; 115 116 spin_lock(&glob->lru_lock); 117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { 118 list_for_each_entry(bo, &glob->swap_lru[i], swap) { 119 uint32_t num_pages = bo->ttm->num_pages; 120 121 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 122 /* ttm_bo_swapout has dropped the lru_lock */ 123 if (!ret) 124 return num_pages; 125 if (ret != -EBUSY) 126 return ret; 127 } 128 } 129 spin_unlock(&glob->lru_lock); 130 return 0; 131 } 132 EXPORT_SYMBOL(ttm_global_swapout); 133 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org [-- Attachment #2: config.gz --] [-- Type: application/gzip, Size: 64665 bytes --] ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around 2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König @ 2021-03-19 9:41 ` kernel test robot 2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König ` (2 subsequent siblings) 3 siblings, 0 replies; 18+ messages in thread From: kernel test robot @ 2021-03-19 9:41 UTC (permalink / raw) To: Christian König, dri-devel; +Cc: clang-built-linux, ray.huang, kbuild-all [-- Attachment #1: Type: text/plain, Size: 3513 bytes --] Hi "Christian, I love your patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [also build test WARNING on next-20210319] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: x86_64-randconfig-a005-20210318 (attached as .config) compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project fcc1ce00931751ac02498986feb37744e9ace8de) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install x86_64 cross compiling tool for clang build # apt-get install binutils-x86-64-linux-gnu # https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 git checkout 824dca26fe395899b41d9790944ddea345f7a6fd # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX' drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout' drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout' >> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead vim +110 drivers/gpu/drm/ttm/ttm_device.c 104 105 /** 106 * A buffer object shrink method that tries to swap out the first 107 * buffer object on the global::swap_lru list. 108 */ 109 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) > 110 { 111 struct ttm_global *glob = &ttm_glob; 112 struct ttm_buffer_object *bo; 113 unsigned i; 114 int ret; 115 116 spin_lock(&glob->lru_lock); 117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { 118 list_for_each_entry(bo, &glob->swap_lru[i], swap) { 119 uint32_t num_pages = bo->ttm->num_pages; 120 121 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 122 /* ttm_bo_swapout has dropped the lru_lock */ 123 if (!ret) 124 return num_pages; 125 if (ret != -EBUSY) 126 return ret; 127 } 128 } 129 spin_unlock(&glob->lru_lock); 130 return 0; 131 } 132 EXPORT_SYMBOL(ttm_global_swapout); 133 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org [-- Attachment #2: .config.gz --] [-- Type: application/gzip, Size: 36289 bytes --] [-- Attachment #3: Type: text/plain, Size: 160 bytes --] _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around @ 2021-03-19 9:41 ` kernel test robot 0 siblings, 0 replies; 18+ messages in thread From: kernel test robot @ 2021-03-19 9:41 UTC (permalink / raw) To: kbuild-all [-- Attachment #1: Type: text/plain, Size: 3590 bytes --] Hi "Christian, I love your patch! Perhaps something to improve: [auto build test WARNING on drm-tip/drm-tip] [also build test WARNING on next-20210319] [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: x86_64-randconfig-a005-20210318 (attached as .config) compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project fcc1ce00931751ac02498986feb37744e9ace8de) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install x86_64 cross compiling tool for clang build # apt-get install binutils-x86-64-linux-gnu # https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551 git checkout 824dca26fe395899b41d9790944ddea345f7a6fd # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX' drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout' drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout' >> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead vim +110 drivers/gpu/drm/ttm/ttm_device.c 104 105 /** 106 * A buffer object shrink method that tries to swap out the first 107 * buffer object on the global::swap_lru list. 108 */ 109 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) > 110 { 111 struct ttm_global *glob = &ttm_glob; 112 struct ttm_buffer_object *bo; 113 unsigned i; 114 int ret; 115 116 spin_lock(&glob->lru_lock); 117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { 118 list_for_each_entry(bo, &glob->swap_lru[i], swap) { 119 uint32_t num_pages = bo->ttm->num_pages; 120 121 ret = ttm_bo_swapout(bo, ctx, gfp_flags); 122 /* ttm_bo_swapout has dropped the lru_lock */ 123 if (!ret) 124 return num_pages; 125 if (ret != -EBUSY) 126 return ret; 127 } 128 } 129 spin_unlock(&glob->lru_lock); 130 return 0; 131 } 132 EXPORT_SYMBOL(ttm_global_swapout); 133 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org [-- Attachment #2: config.gz --] [-- Type: application/gzip, Size: 36289 bytes --] ^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 1/3] drm/ttm: move swapout logic around @ 2021-02-11 13:29 Christian König 2021-02-11 13:29 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König 0 siblings, 1 reply; 18+ messages in thread From: Christian König @ 2021-02-11 13:29 UTC (permalink / raw) To: dri-devel; +Cc: ray.huang Move the iteration of the global lru into the new function ttm_global_swapout() and use that instead in drivers. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++--------------------- drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++ drivers/gpu/drm/ttm/ttm_tt.c | 2 +- drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/ttm/ttm_bo_api.h | 3 +- include/drm/ttm/ttm_device.h | 2 + 7 files changed, 53 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index e38102282fd5..d33578a112b4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1184,56 +1184,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_wait); -/* - * A buffer object shrink method that tries to swap out the first - * buffer object on the bo_global::swap_lru list. - */ -int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob; - struct ttm_buffer_object *bo; - int ret = -EBUSY; bool locked; - unsigned i; - - spin_lock(&glob->lru_lock); - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - list_for_each_entry(bo, &glob->swap_lru[i], swap) { - if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, - NULL)) - continue; - - if (!ttm_bo_get_unless_zero(bo)) { - if (locked) - dma_resv_unlock(bo->base.resv); - continue; - } + int ret; - ret = 0; - break; - } - if (!ret) - break; - } + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL)) + return -EBUSY; - if (ret) { - spin_unlock(&glob->lru_lock); - return ret; + if (!ttm_bo_get_unless_zero(bo)) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; } if (bo->deleted) { - ret = ttm_bo_cleanup_refs(bo, false, false, locked); + ttm_bo_cleanup_refs(bo, false, false, locked); ttm_bo_put(bo); - return ret; + return 0; } ttm_bo_del_from_lru(bo); + /* TODO: Cleanup the locking */ spin_unlock(&glob->lru_lock); - /** + /* * Move to system cached */ - if (bo->mem.mem_type != TTM_PL_SYSTEM) { struct ttm_operation_ctx ctx = { false, false }; struct ttm_resource evict_mem; @@ -1253,29 +1232,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) } } - /** + /* * Make sure BO is idle. */ - ret = ttm_bo_wait(bo, false, false); if (unlikely(ret != 0)) goto out; ttm_bo_unmap_virtual(bo); - /** + /* * Swap out. Buffer will be swapped in again as soon as * anyone tries to access a ttm page. */ - if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo); ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); out: - /** - * + /* * Unreserve without putting on LRU to avoid swapping out an * already swapped buffer. */ @@ -1284,7 +1260,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) ttm_bo_put(bo); return ret; } -EXPORT_SYMBOL(ttm_bo_swapout); void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) { diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 95e1b7b1f2e6..dfc2a7e4e490 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -102,6 +102,35 @@ static int ttm_global_init(void) return ret; } +/** + * A buffer object shrink method that tries to swap out the first + * buffer object on the global::swap_lru list. + */ +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) +{ + struct ttm_global *glob = &ttm_glob; + struct ttm_buffer_object *bo; + unsigned i; + int ret; + + spin_lock(&glob->lru_lock); + for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { + list_for_each_entry(bo, &glob->swap_lru[i], swap) { + uint32_t num_pages = bo->ttm->num_pages; + + ret = ttm_bo_swapout(bo, ctx, gfp_flags); + /* ttm_bo_swapout has dropped the lru_lock */ + if (!ret) + return num_pages; + if (ret != -EBUSY) + return ret; + } + } + spin_unlock(&glob->lru_lock); + return 0; +} +EXPORT_SYMBOL(ttm_global_swapout); + static void ttm_init_sysman(struct ttm_device *bdev) { struct ttm_resource_manager *man = &bdev->sysman; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 2f0833c98d2c..95b5cff25f4c 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink, }; int ret; - ret = ttm_bo_swapout(&ctx, GFP_NOFS); + ret = ttm_global_swapout(&ctx, GFP_NOFS); return ret < 0 ? SHRINK_EMPTY : ret; } diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c index e972af07d029..104b95a8c7a2 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_memory.c +++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c @@ -38,6 +38,7 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> +#include <drm/ttm/ttm_device.h> #include "ttm_memory.h" @@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq, while (ttm_zones_above_swap_target(glob, from_wq, extra)) { spin_unlock(&glob->lock); - ret = ttm_bo_swapout(ctx, GFP_KERNEL); + ret = ttm_global_swapout(ctx, GFP_KERNEL); spin_lock(&glob->lock); if (unlikely(ret < 0)) break; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 4efed3bf0ef9..01da355b86f3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1384,7 +1384,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv); - while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0); + while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0); if (dev_priv->enable_fb) vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) { diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 4fb523dfab32..5044ac330858 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp, const char __user *wbuf, char __user *rbuf, size_t count, loff_t *f_pos, bool write); -int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags); /** * ttm_bo_uses_embedded_gem_object - check if the given bo uses the diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 035bbc044a3b..6a0b267d4fe6 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -297,6 +297,8 @@ struct ttm_device { struct delayed_work wq; }; +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); + static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) { -- 2.25.1 _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH 2/3] drm/ttm: remove swap LRU v2 2021-02-11 13:29 Christian König @ 2021-02-11 13:29 ` Christian König 0 siblings, 0 replies; 18+ messages in thread From: Christian König @ 2021-02-11 13:29 UTC (permalink / raw) To: dri-devel; +Cc: ray.huang Instead evict round robin from each devices SYSTEM and TT domain. v2: reorder num_pages access reported by Dan's script Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/ttm/ttm_bo.c | 31 ++------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 1 - drivers/gpu/drm/ttm/ttm_device.c | 60 +++++++++++++++++++++-------- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- include/drm/ttm/ttm_bo_api.h | 1 - include/drm/ttm/ttm_bo_driver.h | 1 - include/drm/ttm/ttm_device.h | 7 +--- 7 files changed, 52 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d33578a112b4..a1be88be357b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) { struct ttm_device *bdev = bo->bdev; - list_del_init(&bo->swap); list_del_init(&bo->lru); if (bdev->funcs->del_from_lru_notify) @@ -104,14 +103,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]); - if (man->use_tt && bo->ttm && - !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | - TTM_PAGE_FLAG_SWAPPED))) { - struct list_head *swap; - - swap = &ttm_glob.swap_lru[bo->priority]; - list_move_tail(&bo->swap, swap); - } if (bdev->funcs->del_from_lru_notify) bdev->funcs->del_from_lru_notify(bo); @@ -126,9 +117,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo); break; } - if (bo->ttm && !(bo->ttm->page_flags & - (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) - ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo); } } EXPORT_SYMBOL(ttm_bo_move_to_lru_tail); @@ -166,20 +154,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk) list_bulk_move_tail(&man->lru[i], &pos->first->lru, &pos->last->lru); } - - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i]; - struct list_head *lru; - - if (!pos->first) - continue; - - dma_resv_assert_held(pos->first->base.resv); - dma_resv_assert_held(pos->last->base.resv); - - lru = &ttm_glob.swap_lru[i]; - list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap); - } } EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail); @@ -1056,7 +1030,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, kref_init(&bo->kref); INIT_LIST_HEAD(&bo->lru); INIT_LIST_HEAD(&bo->ddestroy); - INIT_LIST_HEAD(&bo->swap); bo->bdev = bdev; bo->type = type; bo->mem.mem_type = TTM_PL_SYSTEM; @@ -1191,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, bool locked; int ret; + if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG | + TTM_PAGE_FLAG_SWAPPED)) + return false; + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL)) return -EBUSY; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 031e5819fec4..a2a17c84ceb3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, atomic_inc(&ttm_glob.bo_count); INIT_LIST_HEAD(&fbo->base.ddestroy); INIT_LIST_HEAD(&fbo->base.lru); - INIT_LIST_HEAD(&fbo->base.swap); fbo->base.moving = NULL; drm_vma_node_reset(&fbo->base.base.vma_node); diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index dfc2a7e4e490..2c280fb1e992 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -67,7 +67,6 @@ static int ttm_global_init(void) unsigned long num_pages; struct sysinfo si; int ret = 0; - unsigned i; mutex_lock(&ttm_global_mutex); if (++ttm_glob_use_count > 1) @@ -90,8 +89,6 @@ static int ttm_global_init(void) goto out; } - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) - INIT_LIST_HEAD(&glob->swap_lru[i]); INIT_LIST_HEAD(&glob->device_list); atomic_set(&glob->bo_count, 0); @@ -109,27 +106,60 @@ static int ttm_global_init(void) long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { struct ttm_global *glob = &ttm_glob; + struct ttm_device *bdev; + int ret = -EBUSY; + + mutex_lock(&ttm_global_mutex); + list_for_each_entry(bdev, &glob->device_list, device_list) { + ret = ttm_device_swapout(bdev, ctx, gfp_flags); + if (ret > 0) { + list_move_tail(&bdev->device_list, &glob->device_list); + break; + } + } + mutex_unlock(&ttm_global_mutex); + return ret; +} +EXPORT_SYMBOL(ttm_global_swapout); + +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags) +{ + struct ttm_global *glob = &ttm_glob; + struct ttm_resource_manager *man; struct ttm_buffer_object *bo; - unsigned i; + unsigned i, j; int ret; spin_lock(&glob->lru_lock); - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - list_for_each_entry(bo, &glob->swap_lru[i], swap) { - uint32_t num_pages = bo->ttm->num_pages; - - ret = ttm_bo_swapout(bo, ctx, gfp_flags); - /* ttm_bo_swapout has dropped the lru_lock */ - if (!ret) - return num_pages; - if (ret != -EBUSY) - return ret; + for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { + man = ttm_manager_type(bdev, i); + if (!man || !man->use_tt) + continue; + + for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { + list_for_each_entry(bo, &man->lru[j], lru) { + long num_pages; + + if (!bo->ttm || + bo->ttm->page_flags & TTM_PAGE_FLAG_SG || + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) + continue; + + num_pages = bo->ttm->num_pages; + ret = ttm_bo_swapout(bo, ctx, gfp_flags); + /* ttm_bo_swapout has dropped the lru_lock */ + if (!ret) + return num_pages; + if (ret != -EBUSY) + return ret; + } } } spin_unlock(&glob->lru_lock); return 0; } -EXPORT_SYMBOL(ttm_global_swapout); +EXPORT_SYMBOL(ttm_device_swapout); static void ttm_init_sysman(struct ttm_device *bdev) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 01da355b86f3..cef896126361 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1384,7 +1384,7 @@ static int vmw_pm_freeze(struct device *kdev) vmw_execbuf_release_pinned_bo(dev_priv); vmw_resource_evict_all(dev_priv); vmw_release_device_early(dev_priv); - while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0); + while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0); if (dev_priv->enable_fb) vmw_fifo_resource_dec(dev_priv); if (atomic_read(&dev_priv->num_fifo_resources) != 0) { diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 5044ac330858..3587f660e8f4 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -144,7 +144,6 @@ struct ttm_buffer_object { struct list_head lru; struct list_head ddestroy; - struct list_head swap; /** * Members protected by a bo reservation. diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 8959c0075cfd..d007feef7676 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos { struct ttm_lru_bulk_move { struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY]; struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY]; - struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY]; }; /* diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 6a0b267d4fe6..cda6efb4c34b 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -63,11 +63,6 @@ extern struct ttm_global { */ struct list_head device_list; - /** - * Protected by the lru_lock. - */ - struct list_head swap_lru[TTM_MAX_BO_PRIORITY]; - /** * Internal protection. */ @@ -298,6 +293,8 @@ struct ttm_device { }; long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags); static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) -- 2.25.1 _______________________________________________ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ^ permalink raw reply related [flat|nested] 18+ messages in thread
end of thread, other threads:[~2021-03-19 9:41 UTC | newest] Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König 2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König 2021-03-15 18:54 ` kernel test robot 2021-03-15 18:54 ` kernel test robot 2021-03-15 18:54 ` Matthew Auld 2021-03-15 19:27 ` Christian König 2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König 2021-03-15 20:17 ` kernel test robot 2021-03-15 20:17 ` kernel test robot 2021-03-16 9:35 ` Daniel Vetter 2021-03-16 12:03 ` Christian König 2021-03-16 12:05 ` Daniel Vetter 2021-03-16 15:13 ` Christian König 2021-03-15 18:47 ` [PATCH 1/3] drm/ttm: move swapout logic around kernel test robot 2021-03-15 18:47 ` kernel test robot 2021-03-19 9:41 ` kernel test robot 2021-03-19 9:41 ` kernel test robot -- strict thread matches above, loose matches on Subject: below -- 2021-02-11 13:29 Christian König 2021-02-11 13:29 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.