* [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-03-15 16:04 Christian König
2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
` (3 more replies)
0 siblings, 4 replies; 19+ messages in thread
From: Christian König @ 2021-03-15 16:04 UTC (permalink / raw)
To: dri-devel; +Cc: ray.huang
Move the iteration of the global lru into the new function
ttm_global_swapout() and use that instead in drivers.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++---------------------
drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++
drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +-
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
include/drm/ttm/ttm_bo_api.h | 3 +-
include/drm/ttm/ttm_device.h | 2 +
7 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a08dec7281fc..56d2e38af273 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1186,56 +1186,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_wait);
-/*
- * A buffer object shrink method that tries to swap out the first
- * buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags)
{
struct ttm_global *glob = &ttm_glob;
- struct ttm_buffer_object *bo;
- int ret = -EBUSY;
bool locked;
- unsigned i;
-
- spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
- list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
- NULL))
- continue;
-
- if (!ttm_bo_get_unless_zero(bo)) {
- if (locked)
- dma_resv_unlock(bo->base.resv);
- continue;
- }
+ int ret;
- ret = 0;
- break;
- }
- if (!ret)
- break;
- }
+ if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+ return -EBUSY;
- if (ret) {
- spin_unlock(&glob->lru_lock);
- return ret;
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ return -EBUSY;
}
if (bo->deleted) {
- ret = ttm_bo_cleanup_refs(bo, false, false, locked);
+ ttm_bo_cleanup_refs(bo, false, false, locked);
ttm_bo_put(bo);
- return ret;
+ return 0;
}
ttm_bo_del_from_lru(bo);
+ /* TODO: Cleanup the locking */
spin_unlock(&glob->lru_lock);
- /**
+ /*
* Move to system cached
*/
-
if (bo->mem.mem_type != TTM_PL_SYSTEM) {
struct ttm_operation_ctx ctx = { false, false };
struct ttm_resource evict_mem;
@@ -1255,29 +1234,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
}
}
- /**
+ /*
* Make sure BO is idle.
*/
-
ret = ttm_bo_wait(bo, false, false);
if (unlikely(ret != 0))
goto out;
ttm_bo_unmap_virtual(bo);
- /**
+ /*
* Swap out. Buffer will be swapped in again as soon as
* anyone tries to access a ttm page.
*/
-
if (bo->bdev->funcs->swap_notify)
bo->bdev->funcs->swap_notify(bo);
ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
out:
- /**
- *
+ /*
* Unreserve without putting on LRU to avoid swapping out an
* already swapped buffer.
*/
@@ -1286,7 +1262,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
ttm_bo_put(bo);
return ret;
}
-EXPORT_SYMBOL(ttm_bo_swapout);
void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
{
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 95e1b7b1f2e6..dfc2a7e4e490 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -102,6 +102,35 @@ static int ttm_global_init(void)
return ret;
}
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the global::swap_lru list.
+ */
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+{
+ struct ttm_global *glob = &ttm_glob;
+ struct ttm_buffer_object *bo;
+ unsigned i;
+ int ret;
+
+ spin_lock(&glob->lru_lock);
+ for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+ list_for_each_entry(bo, &glob->swap_lru[i], swap) {
+ uint32_t num_pages = bo->ttm->num_pages;
+
+ ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+ /* ttm_bo_swapout has dropped the lru_lock */
+ if (!ret)
+ return num_pages;
+ if (ret != -EBUSY)
+ return ret;
+ }
+ }
+ spin_unlock(&glob->lru_lock);
+ return 0;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
static void ttm_init_sysman(struct ttm_device *bdev)
{
struct ttm_resource_manager *man = &bdev->sysman;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..95b5cff25f4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
};
int ret;
- ret = ttm_bo_swapout(&ctx, GFP_NOFS);
+ ret = ttm_global_swapout(&ctx, GFP_NOFS);
return ret < 0 ? SHRINK_EMPTY : ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index e972af07d029..104b95a8c7a2 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -38,6 +38,7 @@
#include <drm/drm_device.h>
#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
#include "ttm_memory.h"
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
spin_unlock(&glob->lock);
- ret = ttm_bo_swapout(ctx, GFP_KERNEL);
+ ret = ttm_global_swapout(ctx, GFP_KERNEL);
spin_lock(&glob->lock);
if (unlikely(ret < 0))
break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 6910111099c8..b991422e156c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
vmw_execbuf_release_pinned_bo(dev_priv);
vmw_resource_evict_all(dev_priv);
vmw_release_device_early(dev_priv);
- while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
+ while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
if (dev_priv->enable_fb)
vmw_fifo_resource_dec(dev_priv);
if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4fb523dfab32..5044ac330858 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
const char __user *wbuf, char __user *rbuf,
size_t count, loff_t *f_pos, bool write);
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags);
/**
* ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 035bbc044a3b..6a0b267d4fe6 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -297,6 +297,8 @@ struct ttm_device {
struct delayed_work wq;
};
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+
static inline struct ttm_resource_manager *
ttm_manager_type(struct ttm_device *bdev, int mem_type)
{
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 2/3] drm/ttm: remove swap LRU v2
2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
@ 2021-03-15 16:04 ` Christian König
2021-03-15 18:54 ` kernel test robot
2021-03-15 18:54 ` Matthew Auld
2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
` (2 subsequent siblings)
3 siblings, 2 replies; 19+ messages in thread
From: Christian König @ 2021-03-15 16:04 UTC (permalink / raw)
To: dri-devel; +Cc: ray.huang
Instead evict round robin from each devices SYSTEM and TT domain.
v2: reorder num_pages access reported by Dan's script
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 33 ++--------------
drivers/gpu/drm/ttm/ttm_bo_util.c | 1 -
drivers/gpu/drm/ttm/ttm_device.c | 60 +++++++++++++++++++++--------
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
include/drm/ttm/ttm_bo_api.h | 1 -
include/drm/ttm/ttm_bo_driver.h | 1 -
include/drm/ttm/ttm_device.h | 7 +---
7 files changed, 52 insertions(+), 53 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 56d2e38af273..a1be88be357b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
{
struct ttm_device *bdev = bo->bdev;
- list_del_init(&bo->swap);
list_del_init(&bo->lru);
if (bdev->funcs->del_from_lru_notify)
@@ -104,16 +103,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
man = ttm_manager_type(bdev, mem->mem_type);
list_move_tail(&bo->lru, &man->lru[bo->priority]);
- if (man->use_tt && bo->ttm &&
- !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
- TTM_PAGE_FLAG_SWAPPED))) {
- struct list_head *swap;
-
- swap = &ttm_glob.swap_lru[bo->priority];
- list_move_tail(&bo->swap, swap);
- } else {
- list_del_init(&bo->swap);
- }
if (bdev->funcs->del_from_lru_notify)
bdev->funcs->del_from_lru_notify(bo);
@@ -128,9 +117,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
break;
}
- if (bo->ttm && !(bo->ttm->page_flags &
- (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED)))
- ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo);
}
}
EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
@@ -168,20 +154,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
list_bulk_move_tail(&man->lru[i], &pos->first->lru,
&pos->last->lru);
}
-
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
- struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i];
- struct list_head *lru;
-
- if (!pos->first)
- continue;
-
- dma_resv_assert_held(pos->first->base.resv);
- dma_resv_assert_held(pos->last->base.resv);
-
- lru = &ttm_glob.swap_lru[i];
- list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
- }
}
EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
@@ -1058,7 +1030,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
kref_init(&bo->kref);
INIT_LIST_HEAD(&bo->lru);
INIT_LIST_HEAD(&bo->ddestroy);
- INIT_LIST_HEAD(&bo->swap);
bo->bdev = bdev;
bo->type = type;
bo->mem.mem_type = TTM_PL_SYSTEM;
@@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
bool locked;
int ret;
+ if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
+ TTM_PAGE_FLAG_SWAPPED))
+ return false;
+
if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
return -EBUSY;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e5819fec4..a2a17c84ceb3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
atomic_inc(&ttm_glob.bo_count);
INIT_LIST_HEAD(&fbo->base.ddestroy);
INIT_LIST_HEAD(&fbo->base.lru);
- INIT_LIST_HEAD(&fbo->base.swap);
fbo->base.moving = NULL;
drm_vma_node_reset(&fbo->base.base.vma_node);
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index dfc2a7e4e490..2c280fb1e992 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -67,7 +67,6 @@ static int ttm_global_init(void)
unsigned long num_pages;
struct sysinfo si;
int ret = 0;
- unsigned i;
mutex_lock(&ttm_global_mutex);
if (++ttm_glob_use_count > 1)
@@ -90,8 +89,6 @@ static int ttm_global_init(void)
goto out;
}
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
- INIT_LIST_HEAD(&glob->swap_lru[i]);
INIT_LIST_HEAD(&glob->device_list);
atomic_set(&glob->bo_count, 0);
@@ -109,27 +106,60 @@ static int ttm_global_init(void)
long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
{
struct ttm_global *glob = &ttm_glob;
+ struct ttm_device *bdev;
+ int ret = -EBUSY;
+
+ mutex_lock(&ttm_global_mutex);
+ list_for_each_entry(bdev, &glob->device_list, device_list) {
+ ret = ttm_device_swapout(bdev, ctx, gfp_flags);
+ if (ret > 0) {
+ list_move_tail(&bdev->device_list, &glob->device_list);
+ break;
+ }
+ }
+ mutex_unlock(&ttm_global_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
+long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags)
+{
+ struct ttm_global *glob = &ttm_glob;
+ struct ttm_resource_manager *man;
struct ttm_buffer_object *bo;
- unsigned i;
+ unsigned i, j;
int ret;
spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
- list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- uint32_t num_pages = bo->ttm->num_pages;
-
- ret = ttm_bo_swapout(bo, ctx, gfp_flags);
- /* ttm_bo_swapout has dropped the lru_lock */
- if (!ret)
- return num_pages;
- if (ret != -EBUSY)
- return ret;
+ for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
+ man = ttm_manager_type(bdev, i);
+ if (!man || !man->use_tt)
+ continue;
+
+ for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
+ list_for_each_entry(bo, &man->lru[j], lru) {
+ long num_pages;
+
+ if (!bo->ttm ||
+ bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
+ bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
+ continue;
+
+ num_pages = bo->ttm->num_pages;
+ ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+ /* ttm_bo_swapout has dropped the lru_lock */
+ if (!ret)
+ return num_pages;
+ if (ret != -EBUSY)
+ return ret;
+ }
}
}
spin_unlock(&glob->lru_lock);
return 0;
}
-EXPORT_SYMBOL(ttm_global_swapout);
+EXPORT_SYMBOL(ttm_device_swapout);
static void ttm_init_sysman(struct ttm_device *bdev)
{
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index b991422e156c..0e82b0662d9e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
vmw_execbuf_release_pinned_bo(dev_priv);
vmw_resource_evict_all(dev_priv);
vmw_release_device_early(dev_priv);
- while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
+ while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0);
if (dev_priv->enable_fb)
vmw_fifo_resource_dec(dev_priv);
if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 5044ac330858..3587f660e8f4 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -144,7 +144,6 @@ struct ttm_buffer_object {
struct list_head lru;
struct list_head ddestroy;
- struct list_head swap;
/**
* Members protected by a bo reservation.
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 8959c0075cfd..d007feef7676 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos {
struct ttm_lru_bulk_move {
struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
- struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY];
};
/*
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 6a0b267d4fe6..cda6efb4c34b 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -63,11 +63,6 @@ extern struct ttm_global {
*/
struct list_head device_list;
- /**
- * Protected by the lru_lock.
- */
- struct list_head swap_lru[TTM_MAX_BO_PRIORITY];
-
/**
* Internal protection.
*/
@@ -298,6 +293,8 @@ struct ttm_device {
};
long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags);
static inline struct ttm_resource_manager *
ttm_manager_type(struct ttm_device *bdev, int mem_type)
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
@ 2021-03-15 16:04 ` Christian König
2021-03-15 20:17 ` kernel test robot
2021-03-16 9:35 ` Daniel Vetter
2021-03-15 18:47 ` kernel test robot
2021-03-19 9:41 ` kernel test robot
3 siblings, 2 replies; 19+ messages in thread
From: Christian König @ 2021-03-15 16:04 UTC (permalink / raw)
To: dri-devel; +Cc: ray.huang
Instead of having a global lock.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
drivers/gpu/drm/qxl/qxl_release.c | 5 +--
drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
include/drm/ttm/ttm_bo_driver.h | 4 +--
include/drm/ttm/ttm_device.h | 4 +--
8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9d19078246c8..ae18c0e32347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&adev->mman.bdev.lru_lock);
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&adev->mman.bdev.lru_lock);
return;
}
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&adev->mman.bdev.lru_lock);
list_for_each_entry(bo_base, &vm->idle, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
&bo->shadow->tbo.mem,
&vm->lru_bulk_move);
}
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true;
}
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index f5845c96d414..b19f2f00b215 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
release->id | 0xf0000000, release->base.seqno);
trace_dma_fence_emit(&release->base);
- spin_lock(&ttm_glob.lru_lock);
-
list_for_each_entry(entry, &release->bos, head) {
bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
ww_acquire_fini(&release->ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a1be88be357b..a8103c8718a3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
* reference it any more. The only tricky case is the trylock on
* the resv object while holding the lru_lock.
*/
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
bo->base.resv = &bo->base._resv;
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
}
return r;
@@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
30 * HZ);
@@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
else if (lret == 0)
return -EBUSY;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
/*
* We raced, and lost, someone else holds the reservation now,
@@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
* delayed destruction would succeed, so just return success
* here.
*/
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
return 0;
}
ret = 0;
@@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (ret || unlikely(list_empty(&bo->ddestroy))) {
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
return ret;
}
ttm_bo_del_from_lru(bo);
list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
*/
bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
{
- struct ttm_global *glob = &ttm_glob;
struct list_head removed;
bool empty;
INIT_LIST_HEAD(&removed);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
while (!list_empty(&bdev->ddestroy)) {
struct ttm_buffer_object *bo;
@@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
dma_resv_lock(bo->base.resv, NULL);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) {
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
}
ttm_bo_put(bo);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
}
list_splice_tail(&removed, &bdev->ddestroy);
empty = list_empty(&bdev->ddestroy);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
return empty;
}
@@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
ttm_bo_flush_all_fences(bo);
bo->deleted = true;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
/*
* Make pinned bos immediately available to
@@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref);
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
return;
}
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_del_from_lru(bo);
list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
dma_resv_unlock(bo->base.resv);
@@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
unsigned i;
int ret;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &man->lru[i], lru) {
bool busy;
@@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
if (!bo) {
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
busy_bo = NULL;
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
if (busy_bo)
ttm_bo_put(busy_bo);
@@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
return ret;
}
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx);
if (locked)
@@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
mem->mem_type = place->mem_type;
mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
-
+ spin_unlock(&bo->bdev->lru_lock);
return 0;
}
@@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
{
- struct ttm_global *glob = &ttm_glob;
bool locked;
int ret;
@@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo);
/* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
/*
* Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 2c280fb1e992..924d892109e8 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -81,7 +81,6 @@ static int ttm_global_init(void)
ttm_pool_mgr_init(num_pages * 50 / 100);
ttm_tt_mgr_init();
- spin_lock_init(&glob->lru_lock);
glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
{
- struct ttm_global *glob = &ttm_glob;
struct ttm_resource_manager *man;
struct ttm_buffer_object *bo;
unsigned i, j;
int ret;
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
man = ttm_manager_type(bdev, i);
if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
}
}
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
return 0;
}
EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager;
INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
+ spin_lock_init(&bdev->lru_lock);
INIT_LIST_HEAD(&bdev->ddestroy);
bdev->dev_mapping = mapping;
mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev)
{
- struct ttm_global *glob = &ttm_glob;
struct ttm_resource_manager *man;
unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
if (ttm_bo_delayed_delete(bdev, true))
pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
if (list_empty(&man->lru[0]))
pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool);
ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 690ab97d52b7..071c48d672c6 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
if (list_empty(list))
return;
- spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
if (ticket)
ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
if (list_empty(list))
return;
- spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
dma_resv_add_shared_fence(bo->base.resv, fence);
else
dma_resv_add_excl_fence(bo->base.resv, fence);
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
if (ticket)
ww_acquire_fini(ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index ed1672a9f332..04f2eef653ab 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
.no_wait_gpu = false,
.force_alloc = true
};
- struct ttm_global *glob = &ttm_glob;
struct dma_fence *fence;
int ret;
unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
* Can't use standard list traversal since we're unlocking.
*/
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
while (!list_empty(&man->lru[i])) {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
NULL);
if (ret)
return ret;
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
}
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock);
fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d007feef7676..dbccac957f8f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
static inline void
ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
{
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
}
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index cda6efb4c34b..bae56d29e8ff 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -56,7 +56,6 @@ extern struct ttm_global {
*/
struct page *dummy_read_page;
- spinlock_t lru_lock;
/**
* Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device {
struct ttm_pool pool;
/*
- * Protected by the global:lru lock.
+ * Protection for the per manager LRU and ddestroy lists.
*/
+ spinlock_t lru_lock;
struct list_head ddestroy;
/*
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around
2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
@ 2021-03-15 18:47 ` kernel test robot
2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
` (2 subsequent siblings)
3 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 18:47 UTC (permalink / raw)
To: Christian König, dri-devel; +Cc: ray.huang, kbuild-all
[-- Attachment #1: Type: text/plain, Size: 3093 bytes --]
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
git checkout 824dca26fe395899b41d9790944ddea345f7a6fd
# save the attached .config to linux build tree
make W=1 ARCH=i386
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX'
drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead
drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout'
drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout'
>> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead
vim +110 drivers/gpu/drm/ttm/ttm_device.c
104
105 /**
106 * A buffer object shrink method that tries to swap out the first
107 * buffer object on the global::swap_lru list.
108 */
109 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> 110 {
111 struct ttm_global *glob = &ttm_glob;
112 struct ttm_buffer_object *bo;
113 unsigned i;
114 int ret;
115
116 spin_lock(&glob->lru_lock);
117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
118 list_for_each_entry(bo, &glob->swap_lru[i], swap) {
119 uint32_t num_pages = bo->ttm->num_pages;
120
121 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
122 /* ttm_bo_swapout has dropped the lru_lock */
123 if (!ret)
124 return num_pages;
125 if (ret != -EBUSY)
126 return ret;
127 }
128 }
129 spin_unlock(&glob->lru_lock);
130 return 0;
131 }
132 EXPORT_SYMBOL(ttm_global_swapout);
133
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 64665 bytes --]
[-- Attachment #3: Type: text/plain, Size: 160 bytes --]
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-03-15 18:47 ` kernel test robot
0 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 18:47 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 3165 bytes --]
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
git checkout 824dca26fe395899b41d9790944ddea345f7a6fd
# save the attached .config to linux build tree
make W=1 ARCH=i386
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX'
drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead
drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout'
drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout'
>> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead
vim +110 drivers/gpu/drm/ttm/ttm_device.c
104
105 /**
106 * A buffer object shrink method that tries to swap out the first
107 * buffer object on the global::swap_lru list.
108 */
109 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> 110 {
111 struct ttm_global *glob = &ttm_glob;
112 struct ttm_buffer_object *bo;
113 unsigned i;
114 int ret;
115
116 spin_lock(&glob->lru_lock);
117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
118 list_for_each_entry(bo, &glob->swap_lru[i], swap) {
119 uint32_t num_pages = bo->ttm->num_pages;
120
121 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
122 /* ttm_bo_swapout has dropped the lru_lock */
123 if (!ret)
124 return num_pages;
125 if (ret != -EBUSY)
126 return ret;
127 }
128 }
129 spin_unlock(&glob->lru_lock);
130 return 0;
131 }
132 EXPORT_SYMBOL(ttm_global_swapout);
133
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 64665 bytes --]
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2
2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
@ 2021-03-15 18:54 ` kernel test robot
2021-03-15 18:54 ` Matthew Auld
1 sibling, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 18:54 UTC (permalink / raw)
To: Christian König, dri-devel; +Cc: ray.huang, kbuild-all
[-- Attachment #1: Type: text/plain, Size: 3216 bytes --]
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-randconfig-s002-20210315 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.3-277-gc089cd2d-dirty
# https://github.com/0day-ci/linux/commit/70ae63f3a85b9791dfcf38034c304aedda122e7b
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
git checkout 70ae63f3a85b9791dfcf38034c304aedda122e7b
# save the attached .config to linux build tree
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
"sparse warnings: (new ones prefixed by >>)"
drivers/gpu/drm/ttm/ttm_device.c:42:1: sparse: sparse: symbol 'ttm_global_mutex' was not declared. Should it be static?
drivers/gpu/drm/ttm/ttm_device.c:43:10: sparse: sparse: symbol 'ttm_glob_use_count' was not declared. Should it be static?
>> drivers/gpu/drm/ttm/ttm_device.c:125:6: sparse: sparse: context imbalance in 'ttm_device_swapout' - wrong count at exit
vim +/ttm_device_swapout +125 drivers/gpu/drm/ttm/ttm_device.c
124
> 125 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
126 gfp_t gfp_flags)
127 {
128 struct ttm_global *glob = &ttm_glob;
129 struct ttm_resource_manager *man;
130 struct ttm_buffer_object *bo;
131 unsigned i, j;
132 int ret;
133
134 spin_lock(&glob->lru_lock);
135 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
136 man = ttm_manager_type(bdev, i);
137 if (!man || !man->use_tt)
138 continue;
139
140 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
141 list_for_each_entry(bo, &man->lru[j], lru) {
142 long num_pages;
143
144 if (!bo->ttm ||
145 bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
146 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
147 continue;
148
149 num_pages = bo->ttm->num_pages;
150 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
151 /* ttm_bo_swapout has dropped the lru_lock */
152 if (!ret)
153 return num_pages;
154 if (ret != -EBUSY)
155 return ret;
156 }
157 }
158 }
159 spin_unlock(&glob->lru_lock);
160 return 0;
161 }
162 EXPORT_SYMBOL(ttm_device_swapout);
163
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 39354 bytes --]
[-- Attachment #3: Type: text/plain, Size: 160 bytes --]
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2
@ 2021-03-15 18:54 ` kernel test robot
0 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 18:54 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 3297 bytes --]
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-randconfig-s002-20210315 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.3-277-gc089cd2d-dirty
# https://github.com/0day-ci/linux/commit/70ae63f3a85b9791dfcf38034c304aedda122e7b
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
git checkout 70ae63f3a85b9791dfcf38034c304aedda122e7b
# save the attached .config to linux build tree
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
"sparse warnings: (new ones prefixed by >>)"
drivers/gpu/drm/ttm/ttm_device.c:42:1: sparse: sparse: symbol 'ttm_global_mutex' was not declared. Should it be static?
drivers/gpu/drm/ttm/ttm_device.c:43:10: sparse: sparse: symbol 'ttm_glob_use_count' was not declared. Should it be static?
>> drivers/gpu/drm/ttm/ttm_device.c:125:6: sparse: sparse: context imbalance in 'ttm_device_swapout' - wrong count at exit
vim +/ttm_device_swapout +125 drivers/gpu/drm/ttm/ttm_device.c
124
> 125 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
126 gfp_t gfp_flags)
127 {
128 struct ttm_global *glob = &ttm_glob;
129 struct ttm_resource_manager *man;
130 struct ttm_buffer_object *bo;
131 unsigned i, j;
132 int ret;
133
134 spin_lock(&glob->lru_lock);
135 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
136 man = ttm_manager_type(bdev, i);
137 if (!man || !man->use_tt)
138 continue;
139
140 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
141 list_for_each_entry(bo, &man->lru[j], lru) {
142 long num_pages;
143
144 if (!bo->ttm ||
145 bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
146 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
147 continue;
148
149 num_pages = bo->ttm->num_pages;
150 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
151 /* ttm_bo_swapout has dropped the lru_lock */
152 if (!ret)
153 return num_pages;
154 if (ret != -EBUSY)
155 return ret;
156 }
157 }
158 }
159 spin_unlock(&glob->lru_lock);
160 return 0;
161 }
162 EXPORT_SYMBOL(ttm_device_swapout);
163
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 39354 bytes --]
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2
2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
2021-03-15 18:54 ` kernel test robot
@ 2021-03-15 18:54 ` Matthew Auld
2021-03-15 19:27 ` Christian König
1 sibling, 1 reply; 19+ messages in thread
From: Matthew Auld @ 2021-03-15 18:54 UTC (permalink / raw)
To: Christian König; +Cc: ray.huang, ML dri-devel
On Mon, 15 Mar 2021 at 16:04, Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Instead evict round robin from each devices SYSTEM and TT domain.
>
> v2: reorder num_pages access reported by Dan's script
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/ttm/ttm_bo.c | 33 ++--------------
> drivers/gpu/drm/ttm/ttm_bo_util.c | 1 -
> drivers/gpu/drm/ttm/ttm_device.c | 60 +++++++++++++++++++++--------
> drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
> include/drm/ttm/ttm_bo_api.h | 1 -
> include/drm/ttm/ttm_bo_driver.h | 1 -
> include/drm/ttm/ttm_device.h | 7 +---
> 7 files changed, 52 insertions(+), 53 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 56d2e38af273..a1be88be357b 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
> {
> struct ttm_device *bdev = bo->bdev;
>
> - list_del_init(&bo->swap);
> list_del_init(&bo->lru);
>
> if (bdev->funcs->del_from_lru_notify)
> @@ -104,16 +103,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>
> man = ttm_manager_type(bdev, mem->mem_type);
> list_move_tail(&bo->lru, &man->lru[bo->priority]);
> - if (man->use_tt && bo->ttm &&
> - !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
> - TTM_PAGE_FLAG_SWAPPED))) {
> - struct list_head *swap;
> -
> - swap = &ttm_glob.swap_lru[bo->priority];
> - list_move_tail(&bo->swap, swap);
> - } else {
> - list_del_init(&bo->swap);
> - }
>
> if (bdev->funcs->del_from_lru_notify)
> bdev->funcs->del_from_lru_notify(bo);
> @@ -128,9 +117,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
> break;
> }
> - if (bo->ttm && !(bo->ttm->page_flags &
> - (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED)))
> - ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo);
> }
> }
> EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
> @@ -168,20 +154,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> &pos->last->lru);
> }
> -
> - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> - struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i];
> - struct list_head *lru;
> -
> - if (!pos->first)
> - continue;
> -
> - dma_resv_assert_held(pos->first->base.resv);
> - dma_resv_assert_held(pos->last->base.resv);
> -
> - lru = &ttm_glob.swap_lru[i];
> - list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
> - }
> }
> EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
>
> @@ -1058,7 +1030,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
> kref_init(&bo->kref);
> INIT_LIST_HEAD(&bo->lru);
> INIT_LIST_HEAD(&bo->ddestroy);
> - INIT_LIST_HEAD(&bo->swap);
> bo->bdev = bdev;
> bo->type = type;
> bo->mem.mem_type = TTM_PL_SYSTEM;
> @@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> bool locked;
> int ret;
>
> + if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
> + TTM_PAGE_FLAG_SWAPPED))
> + return false;
> +
return 0; ?
Seems inconsistent to return zero here and not drop the lru lock? Or
maybe turn this into a programmer error, since the current caller
already checks for the above?
> if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
> return -EBUSY;
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 031e5819fec4..a2a17c84ceb3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
> atomic_inc(&ttm_glob.bo_count);
> INIT_LIST_HEAD(&fbo->base.ddestroy);
> INIT_LIST_HEAD(&fbo->base.lru);
> - INIT_LIST_HEAD(&fbo->base.swap);
> fbo->base.moving = NULL;
> drm_vma_node_reset(&fbo->base.base.vma_node);
>
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index dfc2a7e4e490..2c280fb1e992 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -67,7 +67,6 @@ static int ttm_global_init(void)
> unsigned long num_pages;
> struct sysinfo si;
> int ret = 0;
> - unsigned i;
>
> mutex_lock(&ttm_global_mutex);
> if (++ttm_glob_use_count > 1)
> @@ -90,8 +89,6 @@ static int ttm_global_init(void)
> goto out;
> }
>
> - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> - INIT_LIST_HEAD(&glob->swap_lru[i]);
> INIT_LIST_HEAD(&glob->device_list);
> atomic_set(&glob->bo_count, 0);
>
> @@ -109,27 +106,60 @@ static int ttm_global_init(void)
> long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> {
> struct ttm_global *glob = &ttm_glob;
> + struct ttm_device *bdev;
> + int ret = -EBUSY;
> +
> + mutex_lock(&ttm_global_mutex);
> + list_for_each_entry(bdev, &glob->device_list, device_list) {
> + ret = ttm_device_swapout(bdev, ctx, gfp_flags);
Mixing int and long for num_pages.
Does ttm enforce a maximum page count somewhere for object sizes?
Something like INT_MAX, since it doesn't look like ttm is consistently
using the same type(unsigned long?) when representing the number of
pages for an object?
> + if (ret > 0) {
> + list_move_tail(&bdev->device_list, &glob->device_list);
> + break;
> + }
> + }
> + mutex_unlock(&ttm_global_mutex);
> + return ret;
> +}
> +EXPORT_SYMBOL(ttm_global_swapout);
> +
> +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> + gfp_t gfp_flags)
> +{
> + struct ttm_global *glob = &ttm_glob;
> + struct ttm_resource_manager *man;
> struct ttm_buffer_object *bo;
> - unsigned i;
> + unsigned i, j;
> int ret;
>
> spin_lock(&glob->lru_lock);
> - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> - list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> - uint32_t num_pages = bo->ttm->num_pages;
> -
> - ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> - /* ttm_bo_swapout has dropped the lru_lock */
> - if (!ret)
> - return num_pages;
> - if (ret != -EBUSY)
> - return ret;
> + for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> + man = ttm_manager_type(bdev, i);
> + if (!man || !man->use_tt)
> + continue;
> +
> + for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> + list_for_each_entry(bo, &man->lru[j], lru) {
> + long num_pages;
> +
> + if (!bo->ttm ||
> + bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
> + continue;
> +
> + num_pages = bo->ttm->num_pages;
> + ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> + /* ttm_bo_swapout has dropped the lru_lock */
> + if (!ret)
> + return num_pages;
> + if (ret != -EBUSY)
> + return ret;
> + }
> }
> }
> spin_unlock(&glob->lru_lock);
> return 0;
> }
> -EXPORT_SYMBOL(ttm_global_swapout);
> +EXPORT_SYMBOL(ttm_device_swapout);
>
> static void ttm_init_sysman(struct ttm_device *bdev)
> {
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index b991422e156c..0e82b0662d9e 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
> vmw_execbuf_release_pinned_bo(dev_priv);
> vmw_resource_evict_all(dev_priv);
> vmw_release_device_early(dev_priv);
> - while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
> + while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0);
Is this the intended behaviour? ttm_device_swapout() still just
returns num_pages if it swapped something out. I assume this wants to
keep swapping stuff out, until it can't anymore. Or am I missing
something?
> if (dev_priv->enable_fb)
> vmw_fifo_resource_dec(dev_priv);
> if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 5044ac330858..3587f660e8f4 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -144,7 +144,6 @@ struct ttm_buffer_object {
>
> struct list_head lru;
> struct list_head ddestroy;
> - struct list_head swap;
>
> /**
> * Members protected by a bo reservation.
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index 8959c0075cfd..d007feef7676 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos {
> struct ttm_lru_bulk_move {
> struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> - struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY];
> };
>
> /*
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index 6a0b267d4fe6..cda6efb4c34b 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -63,11 +63,6 @@ extern struct ttm_global {
> */
> struct list_head device_list;
>
> - /**
> - * Protected by the lru_lock.
> - */
> - struct list_head swap_lru[TTM_MAX_BO_PRIORITY];
> -
> /**
> * Internal protection.
> */
> @@ -298,6 +293,8 @@ struct ttm_device {
> };
>
> long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
> +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> + gfp_t gfp_flags);
>
> static inline struct ttm_resource_manager *
> ttm_manager_type(struct ttm_device *bdev, int mem_type)
> --
> 2.25.1
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 2/3] drm/ttm: remove swap LRU v2
2021-03-15 18:54 ` Matthew Auld
@ 2021-03-15 19:27 ` Christian König
0 siblings, 0 replies; 19+ messages in thread
From: Christian König @ 2021-03-15 19:27 UTC (permalink / raw)
To: Matthew Auld; +Cc: ray.huang, ML dri-devel
Am 15.03.21 um 19:54 schrieb Matthew Auld:
> On Mon, 15 Mar 2021 at 16:04, Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> [SNIP]
>> @@ -1193,6 +1164,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>> bool locked;
>> int ret;
>>
>> + if (!bo->ttm || bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
>> + TTM_PAGE_FLAG_SWAPPED))
>> + return false;
>> +
> return 0; ?
>
> Seems inconsistent to return zero here and not drop the lru lock? Or
> maybe turn this into a programmer error, since the current caller
> already checks for the above?
Thanks, that is just an artifact from rebasing and should be removed.
>> [SNIP]
>>
>> @@ -109,27 +106,60 @@ static int ttm_global_init(void)
>> long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
>> {
>> struct ttm_global *glob = &ttm_glob;
>> + struct ttm_device *bdev;
>> + int ret = -EBUSY;
>> +
>> + mutex_lock(&ttm_global_mutex);
>> + list_for_each_entry(bdev, &glob->device_list, device_list) {
>> + ret = ttm_device_swapout(bdev, ctx, gfp_flags);
> Mixing int and long for num_pages.
>
> Does ttm enforce a maximum page count somewhere for object sizes?
We should use 32 bit values for the number of pages in TTM, even signed
values allow for 8TB large BOs.
And I really hope that we can get rid of the BO approach in general
before we ever come close to that limit.
> Something like INT_MAX, since it doesn't look like ttm is consistently
> using the same type(unsigned long?) when representing the number of
> pages for an object?
I should probably add a check for that in the tt code, yes.
> [SNIP]
> static void ttm_init_sysman(struct ttm_device *bdev)
> {
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index b991422e156c..0e82b0662d9e 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
> vmw_execbuf_release_pinned_bo(dev_priv);
> vmw_resource_evict_all(dev_priv);
> vmw_release_device_early(dev_priv);
> - while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
> + while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) == 0);
> Is this the intended behaviour? ttm_device_swapout() still just
> returns num_pages if it swapped something out. I assume this wants to
> keep swapping stuff out, until it can't anymore. Or am I missing
> something?
Indeed that's a mix up. Thanks for pointing that out.
Christian.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
@ 2021-03-15 20:17 ` kernel test robot
2021-03-16 9:35 ` Daniel Vetter
1 sibling, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 20:17 UTC (permalink / raw)
To: Christian König, dri-devel; +Cc: ray.huang, kbuild-all
[-- Attachment #1: Type: text/plain, Size: 4014 bytes --]
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-m001-20210315 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
smatch warnings:
drivers/gpu/drm/ttm/ttm_device.c:158 ttm_device_swapout() warn: inconsistent returns '&bdev->lru_lock'.
drivers/gpu/drm/ttm/ttm_bo.c:665 ttm_mem_evict_first() error: we previously assumed 'bo' could be null (see line 662)
vim +158 drivers/gpu/drm/ttm/ttm_device.c
70ae63f3a85b97 Christian König 2021-03-15 123
70ae63f3a85b97 Christian König 2021-03-15 124 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
70ae63f3a85b97 Christian König 2021-03-15 125 gfp_t gfp_flags)
70ae63f3a85b97 Christian König 2021-03-15 126 {
70ae63f3a85b97 Christian König 2021-03-15 127 struct ttm_resource_manager *man;
824dca26fe3958 Christian König 2021-03-15 128 struct ttm_buffer_object *bo;
70ae63f3a85b97 Christian König 2021-03-15 129 unsigned i, j;
824dca26fe3958 Christian König 2021-03-15 130 int ret;
824dca26fe3958 Christian König 2021-03-15 131
1ed8d8fc515b90 Christian König 2021-03-15 132 spin_lock(&bdev->lru_lock);
70ae63f3a85b97 Christian König 2021-03-15 133 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
70ae63f3a85b97 Christian König 2021-03-15 134 man = ttm_manager_type(bdev, i);
70ae63f3a85b97 Christian König 2021-03-15 135 if (!man || !man->use_tt)
70ae63f3a85b97 Christian König 2021-03-15 136 continue;
70ae63f3a85b97 Christian König 2021-03-15 137
70ae63f3a85b97 Christian König 2021-03-15 138 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
70ae63f3a85b97 Christian König 2021-03-15 139 list_for_each_entry(bo, &man->lru[j], lru) {
70ae63f3a85b97 Christian König 2021-03-15 140 long num_pages;
824dca26fe3958 Christian König 2021-03-15 141
70ae63f3a85b97 Christian König 2021-03-15 142 if (!bo->ttm ||
70ae63f3a85b97 Christian König 2021-03-15 143 bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
70ae63f3a85b97 Christian König 2021-03-15 144 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
70ae63f3a85b97 Christian König 2021-03-15 145 continue;
70ae63f3a85b97 Christian König 2021-03-15 146
70ae63f3a85b97 Christian König 2021-03-15 147 num_pages = bo->ttm->num_pages;
824dca26fe3958 Christian König 2021-03-15 148 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
824dca26fe3958 Christian König 2021-03-15 149 /* ttm_bo_swapout has dropped the lru_lock */
824dca26fe3958 Christian König 2021-03-15 150 if (!ret)
824dca26fe3958 Christian König 2021-03-15 151 return num_pages;
824dca26fe3958 Christian König 2021-03-15 152 if (ret != -EBUSY)
824dca26fe3958 Christian König 2021-03-15 153 return ret;
824dca26fe3958 Christian König 2021-03-15 154 }
824dca26fe3958 Christian König 2021-03-15 155 }
70ae63f3a85b97 Christian König 2021-03-15 156 }
1ed8d8fc515b90 Christian König 2021-03-15 157 spin_unlock(&bdev->lru_lock);
824dca26fe3958 Christian König 2021-03-15 @158 return 0;
824dca26fe3958 Christian König 2021-03-15 159 }
70ae63f3a85b97 Christian König 2021-03-15 160 EXPORT_SYMBOL(ttm_device_swapout);
824dca26fe3958 Christian König 2021-03-15 161
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 36557 bytes --]
[-- Attachment #3: Type: text/plain, Size: 160 bytes --]
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
@ 2021-03-15 20:17 ` kernel test robot
0 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-15 20:17 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 4122 bytes --]
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-m001-20210315 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
smatch warnings:
drivers/gpu/drm/ttm/ttm_device.c:158 ttm_device_swapout() warn: inconsistent returns '&bdev->lru_lock'.
drivers/gpu/drm/ttm/ttm_bo.c:665 ttm_mem_evict_first() error: we previously assumed 'bo' could be null (see line 662)
vim +158 drivers/gpu/drm/ttm/ttm_device.c
70ae63f3a85b97 Christian König 2021-03-15 123
70ae63f3a85b97 Christian König 2021-03-15 124 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
70ae63f3a85b97 Christian König 2021-03-15 125 gfp_t gfp_flags)
70ae63f3a85b97 Christian König 2021-03-15 126 {
70ae63f3a85b97 Christian König 2021-03-15 127 struct ttm_resource_manager *man;
824dca26fe3958 Christian König 2021-03-15 128 struct ttm_buffer_object *bo;
70ae63f3a85b97 Christian König 2021-03-15 129 unsigned i, j;
824dca26fe3958 Christian König 2021-03-15 130 int ret;
824dca26fe3958 Christian König 2021-03-15 131
1ed8d8fc515b90 Christian König 2021-03-15 132 spin_lock(&bdev->lru_lock);
70ae63f3a85b97 Christian König 2021-03-15 133 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
70ae63f3a85b97 Christian König 2021-03-15 134 man = ttm_manager_type(bdev, i);
70ae63f3a85b97 Christian König 2021-03-15 135 if (!man || !man->use_tt)
70ae63f3a85b97 Christian König 2021-03-15 136 continue;
70ae63f3a85b97 Christian König 2021-03-15 137
70ae63f3a85b97 Christian König 2021-03-15 138 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
70ae63f3a85b97 Christian König 2021-03-15 139 list_for_each_entry(bo, &man->lru[j], lru) {
70ae63f3a85b97 Christian König 2021-03-15 140 long num_pages;
824dca26fe3958 Christian König 2021-03-15 141
70ae63f3a85b97 Christian König 2021-03-15 142 if (!bo->ttm ||
70ae63f3a85b97 Christian König 2021-03-15 143 bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
70ae63f3a85b97 Christian König 2021-03-15 144 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
70ae63f3a85b97 Christian König 2021-03-15 145 continue;
70ae63f3a85b97 Christian König 2021-03-15 146
70ae63f3a85b97 Christian König 2021-03-15 147 num_pages = bo->ttm->num_pages;
824dca26fe3958 Christian König 2021-03-15 148 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
824dca26fe3958 Christian König 2021-03-15 149 /* ttm_bo_swapout has dropped the lru_lock */
824dca26fe3958 Christian König 2021-03-15 150 if (!ret)
824dca26fe3958 Christian König 2021-03-15 151 return num_pages;
824dca26fe3958 Christian König 2021-03-15 152 if (ret != -EBUSY)
824dca26fe3958 Christian König 2021-03-15 153 return ret;
824dca26fe3958 Christian König 2021-03-15 154 }
824dca26fe3958 Christian König 2021-03-15 155 }
70ae63f3a85b97 Christian König 2021-03-15 156 }
1ed8d8fc515b90 Christian König 2021-03-15 157 spin_unlock(&bdev->lru_lock);
824dca26fe3958 Christian König 2021-03-15 @158 return 0;
824dca26fe3958 Christian König 2021-03-15 159 }
70ae63f3a85b97 Christian König 2021-03-15 160 EXPORT_SYMBOL(ttm_device_swapout);
824dca26fe3958 Christian König 2021-03-15 161
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 36557 bytes --]
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
2021-03-15 20:17 ` kernel test robot
@ 2021-03-16 9:35 ` Daniel Vetter
2021-03-16 12:03 ` Christian König
1 sibling, 1 reply; 19+ messages in thread
From: Daniel Vetter @ 2021-03-16 9:35 UTC (permalink / raw)
To: Christian König; +Cc: ray.huang, dri-devel
On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
> Instead of having a global lock.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
I guess per zone lru lock is a lot more work since then we need to handle
ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
lingo.
-Daniel
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
> drivers/gpu/drm/qxl/qxl_release.c | 5 +--
> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
> drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
> include/drm/ttm/ttm_bo_driver.h | 4 +--
> include/drm/ttm/ttm_device.h | 4 +--
> 8 files changed, 43 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9d19078246c8..ae18c0e32347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> struct amdgpu_vm_bo_base *bo_base;
>
> if (vm->bulk_moveable) {
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&adev->mman.bdev.lru_lock);
> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&adev->mman.bdev.lru_lock);
> return;
> }
>
> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&adev->mman.bdev.lru_lock);
> list_for_each_entry(bo_base, &vm->idle, vm_status) {
> struct amdgpu_bo *bo = bo_base->bo;
>
> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> &bo->shadow->tbo.mem,
> &vm->lru_bulk_move);
> }
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&adev->mman.bdev.lru_lock);
>
> vm->bulk_moveable = true;
> }
> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> index f5845c96d414..b19f2f00b215 100644
> --- a/drivers/gpu/drm/qxl/qxl_release.c
> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
> release->id | 0xf0000000, release->base.seqno);
> trace_dma_fence_emit(&release->base);
>
> - spin_lock(&ttm_glob.lru_lock);
> -
> list_for_each_entry(entry, &release->bos, head) {
> bo = entry->bo;
>
> dma_resv_add_shared_fence(bo->base.resv, &release->base);
> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> + ttm_bo_move_to_lru_tail_unlocked(bo);
> dma_resv_unlock(bo->base.resv);
> }
> - spin_unlock(&ttm_glob.lru_lock);
> ww_acquire_fini(&release->ticket);
> }
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index a1be88be357b..a8103c8718a3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
> * reference it any more. The only tricky case is the trylock on
> * the resv object while holding the lru_lock.
> */
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> bo->base.resv = &bo->base._resv;
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> }
>
> return r;
> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>
> if (unlock_resv)
> dma_resv_unlock(bo->base.resv);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
> 30 * HZ);
> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> else if (lret == 0)
> return -EBUSY;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
> /*
> * We raced, and lost, someone else holds the reservation now,
> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> * delayed destruction would succeed, so just return success
> * here.
> */
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> return 0;
> }
> ret = 0;
> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> if (ret || unlikely(list_empty(&bo->ddestroy))) {
> if (unlock_resv)
> dma_resv_unlock(bo->base.resv);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> return ret;
> }
>
> ttm_bo_del_from_lru(bo);
> list_del_init(&bo->ddestroy);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> ttm_bo_cleanup_memtype_use(bo);
>
> if (unlock_resv)
> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> */
> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> {
> - struct ttm_global *glob = &ttm_glob;
> struct list_head removed;
> bool empty;
>
> INIT_LIST_HEAD(&removed);
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> while (!list_empty(&bdev->ddestroy)) {
> struct ttm_buffer_object *bo;
>
> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> continue;
>
> if (remove_all || bo->base.resv != &bo->base._resv) {
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> dma_resv_lock(bo->base.resv, NULL);
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>
> } else if (dma_resv_trylock(bo->base.resv)) {
> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> } else {
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> }
>
> ttm_bo_put(bo);
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> }
> list_splice_tail(&removed, &bdev->ddestroy);
> empty = list_empty(&bdev->ddestroy);
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
>
> return empty;
> }
> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
> ttm_bo_flush_all_fences(bo);
> bo->deleted = true;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
>
> /*
> * Make pinned bos immediately available to
> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
>
> kref_init(&bo->kref);
> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> schedule_delayed_work(&bdev->wq,
> ((HZ / 100) < 1) ? 1 : HZ / 100);
> return;
> }
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> ttm_bo_del_from_lru(bo);
> list_del(&bo->ddestroy);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> ttm_bo_cleanup_memtype_use(bo);
> dma_resv_unlock(bo->base.resv);
> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> unsigned i;
> int ret;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> list_for_each_entry(bo, &man->lru[i], lru) {
> bool busy;
> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> if (!bo) {
> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
> busy_bo = NULL;
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
> if (busy_bo)
> ttm_bo_put(busy_bo);
> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> return ret;
> }
>
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> ret = ttm_bo_evict(bo, ctx);
> if (locked)
> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
> mem->mem_type = place->mem_type;
> mem->placement = place->flags;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> ttm_bo_move_to_lru_tail(bo, mem, NULL);
> - spin_unlock(&ttm_glob.lru_lock);
> -
> + spin_unlock(&bo->bdev->lru_lock);
> return 0;
> }
>
> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> gfp_t gfp_flags)
> {
> - struct ttm_global *glob = &ttm_glob;
> bool locked;
> int ret;
>
> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>
> ttm_bo_del_from_lru(bo);
> /* TODO: Cleanup the locking */
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> /*
> * Move to system cached
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index 2c280fb1e992..924d892109e8 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
> ttm_pool_mgr_init(num_pages * 50 / 100);
> ttm_tt_mgr_init();
>
> - spin_lock_init(&glob->lru_lock);
> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>
> if (unlikely(glob->dummy_read_page == NULL)) {
> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> gfp_t gfp_flags)
> {
> - struct ttm_global *glob = &ttm_glob;
> struct ttm_resource_manager *man;
> struct ttm_buffer_object *bo;
> unsigned i, j;
> int ret;
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> man = ttm_manager_type(bdev, i);
> if (!man || !man->use_tt)
> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> }
> }
> }
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> return 0;
> }
> EXPORT_SYMBOL(ttm_device_swapout);
> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>
> bdev->vma_manager = vma_manager;
> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> + spin_lock_init(&bdev->lru_lock);
> INIT_LIST_HEAD(&bdev->ddestroy);
> bdev->dev_mapping = mapping;
> mutex_lock(&ttm_global_mutex);
> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>
> void ttm_device_fini(struct ttm_device *bdev)
> {
> - struct ttm_global *glob = &ttm_glob;
> struct ttm_resource_manager *man;
> unsigned i;
>
> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
> if (ttm_bo_delayed_delete(bdev, true))
> pr_debug("Delayed destroy list was clean\n");
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> if (list_empty(&man->lru[0]))
> pr_debug("Swap list %d was clean\n", i);
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
>
> ttm_pool_fini(&bdev->pool);
> ttm_global_release();
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 690ab97d52b7..071c48d672c6 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
> if (list_empty(list))
> return;
>
> - spin_lock(&ttm_glob.lru_lock);
> list_for_each_entry(entry, list, head) {
> struct ttm_buffer_object *bo = entry->bo;
>
> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> + ttm_bo_move_to_lru_tail_unlocked(bo);
> dma_resv_unlock(bo->base.resv);
> }
> - spin_unlock(&ttm_glob.lru_lock);
>
> if (ticket)
> ww_acquire_fini(ticket);
> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> if (list_empty(list))
> return;
>
> - spin_lock(&ttm_glob.lru_lock);
> list_for_each_entry(entry, list, head) {
> struct ttm_buffer_object *bo = entry->bo;
>
> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> dma_resv_add_shared_fence(bo->base.resv, fence);
> else
> dma_resv_add_excl_fence(bo->base.resv, fence);
> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> + ttm_bo_move_to_lru_tail_unlocked(bo);
> dma_resv_unlock(bo->base.resv);
> }
> - spin_unlock(&ttm_glob.lru_lock);
> if (ticket)
> ww_acquire_fini(ticket);
> }
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index ed1672a9f332..04f2eef653ab 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> .no_wait_gpu = false,
> .force_alloc = true
> };
> - struct ttm_global *glob = &ttm_glob;
> struct dma_fence *fence;
> int ret;
> unsigned i;
> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> * Can't use standard list traversal since we're unlocking.
> */
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> while (!list_empty(&man->lru[i])) {
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
> NULL);
> if (ret)
> return ret;
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> }
> }
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
>
> spin_lock(&man->move_lock);
> fence = dma_fence_get(man->move);
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index d007feef7676..dbccac957f8f 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
> static inline void
> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
> {
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> }
>
> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index cda6efb4c34b..bae56d29e8ff 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -56,7 +56,6 @@ extern struct ttm_global {
> */
>
> struct page *dummy_read_page;
> - spinlock_t lru_lock;
>
> /**
> * Protected by ttm_global_mutex.
> @@ -277,8 +276,9 @@ struct ttm_device {
> struct ttm_pool pool;
>
> /*
> - * Protected by the global:lru lock.
> + * Protection for the per manager LRU and ddestroy lists.
> */
> + spinlock_t lru_lock;
> struct list_head ddestroy;
>
> /*
> --
> 2.25.1
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-16 9:35 ` Daniel Vetter
@ 2021-03-16 12:03 ` Christian König
2021-03-16 12:05 ` Daniel Vetter
0 siblings, 1 reply; 19+ messages in thread
From: Christian König @ 2021-03-16 12:03 UTC (permalink / raw)
To: Daniel Vetter; +Cc: ray.huang, dri-devel
Am 16.03.21 um 10:35 schrieb Daniel Vetter:
> On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
>> Instead of having a global lock.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> I guess per zone lru lock is a lot more work since then we need to handle
> ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
> lingo.
Making the LRU per resource manager is the long term goal, yes.
My key idea so far is that we make bo->mem a pointer and then move the
LRU handling into the resource object instead of the BO.
The resource object then just references the BO and so that we can
figure out which BO to evict or which fence to wait for to free up a
resource.
Regards,
Christian.
> -Daniel
>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
>> drivers/gpu/drm/qxl/qxl_release.c | 5 +--
>> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
>> drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
>> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
>> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
>> include/drm/ttm/ttm_bo_driver.h | 4 +--
>> include/drm/ttm/ttm_device.h | 4 +--
>> 8 files changed, 43 insertions(+), 56 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 9d19078246c8..ae18c0e32347 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>> struct amdgpu_vm_bo_base *bo_base;
>>
>> if (vm->bulk_moveable) {
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&adev->mman.bdev.lru_lock);
>> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&adev->mman.bdev.lru_lock);
>> return;
>> }
>>
>> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&adev->mman.bdev.lru_lock);
>> list_for_each_entry(bo_base, &vm->idle, vm_status) {
>> struct amdgpu_bo *bo = bo_base->bo;
>>
>> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>> &bo->shadow->tbo.mem,
>> &vm->lru_bulk_move);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&adev->mman.bdev.lru_lock);
>>
>> vm->bulk_moveable = true;
>> }
>> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
>> index f5845c96d414..b19f2f00b215 100644
>> --- a/drivers/gpu/drm/qxl/qxl_release.c
>> +++ b/drivers/gpu/drm/qxl/qxl_release.c
>> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>> release->id | 0xf0000000, release->base.seqno);
>> trace_dma_fence_emit(&release->base);
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> -
>> list_for_each_entry(entry, &release->bos, head) {
>> bo = entry->bo;
>>
>> dma_resv_add_shared_fence(bo->base.resv, &release->base);
>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>> dma_resv_unlock(bo->base.resv);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>> ww_acquire_fini(&release->ticket);
>> }
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index a1be88be357b..a8103c8718a3 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>> * reference it any more. The only tricky case is the trylock on
>> * the resv object while holding the lru_lock.
>> */
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> bo->base.resv = &bo->base._resv;
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> }
>>
>> return r;
>> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>
>> if (unlock_resv)
>> dma_resv_unlock(bo->base.resv);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>> 30 * HZ);
>> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> else if (lret == 0)
>> return -EBUSY;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>> /*
>> * We raced, and lost, someone else holds the reservation now,
>> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> * delayed destruction would succeed, so just return success
>> * here.
>> */
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> return 0;
>> }
>> ret = 0;
>> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> if (ret || unlikely(list_empty(&bo->ddestroy))) {
>> if (unlock_resv)
>> dma_resv_unlock(bo->base.resv);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> return ret;
>> }
>>
>> ttm_bo_del_from_lru(bo);
>> list_del_init(&bo->ddestroy);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> ttm_bo_cleanup_memtype_use(bo);
>>
>> if (unlock_resv)
>> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> */
>> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> struct list_head removed;
>> bool empty;
>>
>> INIT_LIST_HEAD(&removed);
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> while (!list_empty(&bdev->ddestroy)) {
>> struct ttm_buffer_object *bo;
>>
>> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>> continue;
>>
>> if (remove_all || bo->base.resv != &bo->base._resv) {
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> dma_resv_lock(bo->base.resv, NULL);
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>
>> } else if (dma_resv_trylock(bo->base.resv)) {
>> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>> } else {
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> }
>>
>> ttm_bo_put(bo);
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> }
>> list_splice_tail(&removed, &bdev->ddestroy);
>> empty = list_empty(&bdev->ddestroy);
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>>
>> return empty;
>> }
>> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
>> ttm_bo_flush_all_fences(bo);
>> bo->deleted = true;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>>
>> /*
>> * Make pinned bos immediately available to
>> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
>>
>> kref_init(&bo->kref);
>> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> schedule_delayed_work(&bdev->wq,
>> ((HZ / 100) < 1) ? 1 : HZ / 100);
>> return;
>> }
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> ttm_bo_del_from_lru(bo);
>> list_del(&bo->ddestroy);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> ttm_bo_cleanup_memtype_use(bo);
>> dma_resv_unlock(bo->base.resv);
>> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>> unsigned i;
>> int ret;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> list_for_each_entry(bo, &man->lru[i], lru) {
>> bool busy;
>> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>> if (!bo) {
>> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
>> busy_bo = NULL;
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
>> if (busy_bo)
>> ttm_bo_put(busy_bo);
>> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>> return ret;
>> }
>>
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> ret = ttm_bo_evict(bo, ctx);
>> if (locked)
>> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
>> mem->mem_type = place->mem_type;
>> mem->placement = place->flags;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> ttm_bo_move_to_lru_tail(bo, mem, NULL);
>> - spin_unlock(&ttm_glob.lru_lock);
>> -
>> + spin_unlock(&bo->bdev->lru_lock);
>> return 0;
>> }
>>
>> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
>> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>> gfp_t gfp_flags)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> bool locked;
>> int ret;
>>
>> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>
>> ttm_bo_del_from_lru(bo);
>> /* TODO: Cleanup the locking */
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> /*
>> * Move to system cached
>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
>> index 2c280fb1e992..924d892109e8 100644
>> --- a/drivers/gpu/drm/ttm/ttm_device.c
>> +++ b/drivers/gpu/drm/ttm/ttm_device.c
>> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
>> ttm_pool_mgr_init(num_pages * 50 / 100);
>> ttm_tt_mgr_init();
>>
>> - spin_lock_init(&glob->lru_lock);
>> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>>
>> if (unlikely(glob->dummy_read_page == NULL)) {
>> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
>> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>> gfp_t gfp_flags)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> struct ttm_resource_manager *man;
>> struct ttm_buffer_object *bo;
>> unsigned i, j;
>> int ret;
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>> man = ttm_manager_type(bdev, i);
>> if (!man || !man->use_tt)
>> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>> }
>> }
>> }
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> return 0;
>> }
>> EXPORT_SYMBOL(ttm_device_swapout);
>> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>>
>> bdev->vma_manager = vma_manager;
>> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
>> + spin_lock_init(&bdev->lru_lock);
>> INIT_LIST_HEAD(&bdev->ddestroy);
>> bdev->dev_mapping = mapping;
>> mutex_lock(&ttm_global_mutex);
>> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>>
>> void ttm_device_fini(struct ttm_device *bdev)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> struct ttm_resource_manager *man;
>> unsigned i;
>>
>> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
>> if (ttm_bo_delayed_delete(bdev, true))
>> pr_debug("Delayed destroy list was clean\n");
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>> if (list_empty(&man->lru[0]))
>> pr_debug("Swap list %d was clean\n", i);
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>>
>> ttm_pool_fini(&bdev->pool);
>> ttm_global_release();
>> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> index 690ab97d52b7..071c48d672c6 100644
>> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
>> if (list_empty(list))
>> return;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> list_for_each_entry(entry, list, head) {
>> struct ttm_buffer_object *bo = entry->bo;
>>
>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>> dma_resv_unlock(bo->base.resv);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>>
>> if (ticket)
>> ww_acquire_fini(ticket);
>> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>> if (list_empty(list))
>> return;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> list_for_each_entry(entry, list, head) {
>> struct ttm_buffer_object *bo = entry->bo;
>>
>> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>> dma_resv_add_shared_fence(bo->base.resv, fence);
>> else
>> dma_resv_add_excl_fence(bo->base.resv, fence);
>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>> dma_resv_unlock(bo->base.resv);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>> if (ticket)
>> ww_acquire_fini(ticket);
>> }
>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
>> index ed1672a9f332..04f2eef653ab 100644
>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>> .no_wait_gpu = false,
>> .force_alloc = true
>> };
>> - struct ttm_global *glob = &ttm_glob;
>> struct dma_fence *fence;
>> int ret;
>> unsigned i;
>> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>> * Can't use standard list traversal since we're unlocking.
>> */
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> while (!list_empty(&man->lru[i])) {
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
>> NULL);
>> if (ret)
>> return ret;
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> }
>> }
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>>
>> spin_lock(&man->move_lock);
>> fence = dma_fence_get(man->move);
>> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
>> index d007feef7676..dbccac957f8f 100644
>> --- a/include/drm/ttm/ttm_bo_driver.h
>> +++ b/include/drm/ttm/ttm_bo_driver.h
>> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
>> static inline void
>> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>> {
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> }
>>
>> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
>> index cda6efb4c34b..bae56d29e8ff 100644
>> --- a/include/drm/ttm/ttm_device.h
>> +++ b/include/drm/ttm/ttm_device.h
>> @@ -56,7 +56,6 @@ extern struct ttm_global {
>> */
>>
>> struct page *dummy_read_page;
>> - spinlock_t lru_lock;
>>
>> /**
>> * Protected by ttm_global_mutex.
>> @@ -277,8 +276,9 @@ struct ttm_device {
>> struct ttm_pool pool;
>>
>> /*
>> - * Protected by the global:lru lock.
>> + * Protection for the per manager LRU and ddestroy lists.
>> */
>> + spinlock_t lru_lock;
>> struct list_head ddestroy;
>>
>> /*
>> --
>> 2.25.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-16 12:03 ` Christian König
@ 2021-03-16 12:05 ` Daniel Vetter
2021-03-16 15:13 ` Christian König
0 siblings, 1 reply; 19+ messages in thread
From: Daniel Vetter @ 2021-03-16 12:05 UTC (permalink / raw)
To: Christian König; +Cc: Huang Rui, dri-devel
On Tue, Mar 16, 2021 at 1:03 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 16.03.21 um 10:35 schrieb Daniel Vetter:
> > On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
> >> Instead of having a global lock.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> > I guess per zone lru lock is a lot more work since then we need to handle
> > ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
> > lingo.
>
> Making the LRU per resource manager is the long term goal, yes.
>
> My key idea so far is that we make bo->mem a pointer and then move the
> LRU handling into the resource object instead of the BO.
>
> The resource object then just references the BO and so that we can
> figure out which BO to evict or which fence to wait for to free up a
> resource.
Hm yeah that could work out fairly nicely. Both from locking but also
refcounting pov. And maybe we could then use entirely free-standing
mem objects instead of ghost objects? Since that's a part of ttm I
don't grok and it always looks a bit like a hack to me. So for these
ghost mem objects you'd only need the lru + dma_fence_wait (can grab a
fence ref under the lru and then drop lru lock for that) for eviction,
no dma_resv_lock.
-Daniel
>
> Regards,
> Christian.
>
> > -Daniel
> >
> >> ---
> >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
> >> drivers/gpu/drm/qxl/qxl_release.c | 5 +--
> >> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
> >> drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
> >> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
> >> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
> >> include/drm/ttm/ttm_bo_driver.h | 4 +--
> >> include/drm/ttm/ttm_device.h | 4 +--
> >> 8 files changed, 43 insertions(+), 56 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> index 9d19078246c8..ae18c0e32347 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >> struct amdgpu_vm_bo_base *bo_base;
> >>
> >> if (vm->bulk_moveable) {
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&adev->mman.bdev.lru_lock);
> >> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&adev->mman.bdev.lru_lock);
> >> return;
> >> }
> >>
> >> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&adev->mman.bdev.lru_lock);
> >> list_for_each_entry(bo_base, &vm->idle, vm_status) {
> >> struct amdgpu_bo *bo = bo_base->bo;
> >>
> >> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >> &bo->shadow->tbo.mem,
> >> &vm->lru_bulk_move);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&adev->mman.bdev.lru_lock);
> >>
> >> vm->bulk_moveable = true;
> >> }
> >> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> >> index f5845c96d414..b19f2f00b215 100644
> >> --- a/drivers/gpu/drm/qxl/qxl_release.c
> >> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> >> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
> >> release->id | 0xf0000000, release->base.seqno);
> >> trace_dma_fence_emit(&release->base);
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> -
> >> list_for_each_entry(entry, &release->bos, head) {
> >> bo = entry->bo;
> >>
> >> dma_resv_add_shared_fence(bo->base.resv, &release->base);
> >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> + ttm_bo_move_to_lru_tail_unlocked(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> ww_acquire_fini(&release->ticket);
> >> }
> >>
> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >> index a1be88be357b..a8103c8718a3 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
> >> * reference it any more. The only tricky case is the trylock on
> >> * the resv object while holding the lru_lock.
> >> */
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> bo->base.resv = &bo->base._resv;
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> }
> >>
> >> return r;
> >> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>
> >> if (unlock_resv)
> >> dma_resv_unlock(bo->base.resv);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
> >> 30 * HZ);
> >> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> else if (lret == 0)
> >> return -EBUSY;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
> >> /*
> >> * We raced, and lost, someone else holds the reservation now,
> >> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> * delayed destruction would succeed, so just return success
> >> * here.
> >> */
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> return 0;
> >> }
> >> ret = 0;
> >> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> if (ret || unlikely(list_empty(&bo->ddestroy))) {
> >> if (unlock_resv)
> >> dma_resv_unlock(bo->base.resv);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> return ret;
> >> }
> >>
> >> ttm_bo_del_from_lru(bo);
> >> list_del_init(&bo->ddestroy);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> ttm_bo_cleanup_memtype_use(bo);
> >>
> >> if (unlock_resv)
> >> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> */
> >> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct list_head removed;
> >> bool empty;
> >>
> >> INIT_LIST_HEAD(&removed);
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> while (!list_empty(&bdev->ddestroy)) {
> >> struct ttm_buffer_object *bo;
> >>
> >> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> >> continue;
> >>
> >> if (remove_all || bo->base.resv != &bo->base._resv) {
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> dma_resv_lock(bo->base.resv, NULL);
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> >>
> >> } else if (dma_resv_trylock(bo->base.resv)) {
> >> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> >> } else {
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> }
> >>
> >> ttm_bo_put(bo);
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> }
> >> list_splice_tail(&removed, &bdev->ddestroy);
> >> empty = list_empty(&bdev->ddestroy);
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >>
> >> return empty;
> >> }
> >> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
> >> ttm_bo_flush_all_fences(bo);
> >> bo->deleted = true;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >>
> >> /*
> >> * Make pinned bos immediately available to
> >> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
> >>
> >> kref_init(&bo->kref);
> >> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> schedule_delayed_work(&bdev->wq,
> >> ((HZ / 100) < 1) ? 1 : HZ / 100);
> >> return;
> >> }
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> ttm_bo_del_from_lru(bo);
> >> list_del(&bo->ddestroy);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> ttm_bo_cleanup_memtype_use(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >> unsigned i;
> >> int ret;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >> list_for_each_entry(bo, &man->lru[i], lru) {
> >> bool busy;
> >> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >> if (!bo) {
> >> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
> >> busy_bo = NULL;
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
> >> if (busy_bo)
> >> ttm_bo_put(busy_bo);
> >> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >> return ret;
> >> }
> >>
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> ret = ttm_bo_evict(bo, ctx);
> >> if (locked)
> >> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
> >> mem->mem_type = place->mem_type;
> >> mem->placement = place->flags;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> ttm_bo_move_to_lru_tail(bo, mem, NULL);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> -
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> return 0;
> >> }
> >>
> >> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
> >> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> >> gfp_t gfp_flags)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> bool locked;
> >> int ret;
> >>
> >> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> >>
> >> ttm_bo_del_from_lru(bo);
> >> /* TODO: Cleanup the locking */
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> /*
> >> * Move to system cached
> >> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> >> index 2c280fb1e992..924d892109e8 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_device.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> >> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
> >> ttm_pool_mgr_init(num_pages * 50 / 100);
> >> ttm_tt_mgr_init();
> >>
> >> - spin_lock_init(&glob->lru_lock);
> >> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
> >>
> >> if (unlikely(glob->dummy_read_page == NULL)) {
> >> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
> >> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >> gfp_t gfp_flags)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct ttm_resource_manager *man;
> >> struct ttm_buffer_object *bo;
> >> unsigned i, j;
> >> int ret;
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> >> man = ttm_manager_type(bdev, i);
> >> if (!man || !man->use_tt)
> >> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >> }
> >> }
> >> }
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> return 0;
> >> }
> >> EXPORT_SYMBOL(ttm_device_swapout);
> >> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
> >>
> >> bdev->vma_manager = vma_manager;
> >> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> >> + spin_lock_init(&bdev->lru_lock);
> >> INIT_LIST_HEAD(&bdev->ddestroy);
> >> bdev->dev_mapping = mapping;
> >> mutex_lock(&ttm_global_mutex);
> >> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
> >>
> >> void ttm_device_fini(struct ttm_device *bdev)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct ttm_resource_manager *man;
> >> unsigned i;
> >>
> >> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
> >> if (ttm_bo_delayed_delete(bdev, true))
> >> pr_debug("Delayed destroy list was clean\n");
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> >> if (list_empty(&man->lru[0]))
> >> pr_debug("Swap list %d was clean\n", i);
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >>
> >> ttm_pool_fini(&bdev->pool);
> >> ttm_global_release();
> >> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> index 690ab97d52b7..071c48d672c6 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
> >> if (list_empty(list))
> >> return;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> list_for_each_entry(entry, list, head) {
> >> struct ttm_buffer_object *bo = entry->bo;
> >>
> >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> + ttm_bo_move_to_lru_tail_unlocked(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >>
> >> if (ticket)
> >> ww_acquire_fini(ticket);
> >> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> >> if (list_empty(list))
> >> return;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> list_for_each_entry(entry, list, head) {
> >> struct ttm_buffer_object *bo = entry->bo;
> >>
> >> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> >> dma_resv_add_shared_fence(bo->base.resv, fence);
> >> else
> >> dma_resv_add_excl_fence(bo->base.resv, fence);
> >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> + ttm_bo_move_to_lru_tail_unlocked(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> if (ticket)
> >> ww_acquire_fini(ticket);
> >> }
> >> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> >> index ed1672a9f332..04f2eef653ab 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> >> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> >> .no_wait_gpu = false,
> >> .force_alloc = true
> >> };
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct dma_fence *fence;
> >> int ret;
> >> unsigned i;
> >> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> >> * Can't use standard list traversal since we're unlocking.
> >> */
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >> while (!list_empty(&man->lru[i])) {
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
> >> NULL);
> >> if (ret)
> >> return ret;
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> }
> >> }
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >>
> >> spin_lock(&man->move_lock);
> >> fence = dma_fence_get(man->move);
> >> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> >> index d007feef7676..dbccac957f8f 100644
> >> --- a/include/drm/ttm/ttm_bo_driver.h
> >> +++ b/include/drm/ttm/ttm_bo_driver.h
> >> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
> >> static inline void
> >> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
> >> {
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> }
> >>
> >> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
> >> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> >> index cda6efb4c34b..bae56d29e8ff 100644
> >> --- a/include/drm/ttm/ttm_device.h
> >> +++ b/include/drm/ttm/ttm_device.h
> >> @@ -56,7 +56,6 @@ extern struct ttm_global {
> >> */
> >>
> >> struct page *dummy_read_page;
> >> - spinlock_t lru_lock;
> >>
> >> /**
> >> * Protected by ttm_global_mutex.
> >> @@ -277,8 +276,9 @@ struct ttm_device {
> >> struct ttm_pool pool;
> >>
> >> /*
> >> - * Protected by the global:lru lock.
> >> + * Protection for the per manager LRU and ddestroy lists.
> >> */
> >> + spinlock_t lru_lock;
> >> struct list_head ddestroy;
> >>
> >> /*
> >> --
> >> 2.25.1
> >>
> >> _______________________________________________
> >> dri-devel mailing list
> >> dri-devel@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-16 12:05 ` Daniel Vetter
@ 2021-03-16 15:13 ` Christian König
0 siblings, 0 replies; 19+ messages in thread
From: Christian König @ 2021-03-16 15:13 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Huang Rui, dri-devel
Am 16.03.21 um 13:05 schrieb Daniel Vetter:
> On Tue, Mar 16, 2021 at 1:03 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 16.03.21 um 10:35 schrieb Daniel Vetter:
>>> On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
>>>> Instead of having a global lock.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> I guess per zone lru lock is a lot more work since then we need to handle
>>> ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
>>> lingo.
>> Making the LRU per resource manager is the long term goal, yes.
>>
>> My key idea so far is that we make bo->mem a pointer and then move the
>> LRU handling into the resource object instead of the BO.
>>
>> The resource object then just references the BO and so that we can
>> figure out which BO to evict or which fence to wait for to free up a
>> resource.
> Hm yeah that could work out fairly nicely. Both from locking but also
> refcounting pov. And maybe we could then use entirely free-standing
> mem objects instead of ghost objects? Since that's a part of ttm I
> don't grok and it always looks a bit like a hack to me. So for these
> ghost mem objects you'd only need the lru + dma_fence_wait (can grab a
> fence ref under the lru and then drop lru lock for that) for eviction,
> no dma_resv_lock.
Exactly that's the background here, yes.
Those ghost objects are more than just a bit of a hack and result in
tons of checks in the driver if a BO is really a BO or a ghost.
Moving all that handling into the resource objects not only allows us to
remove that, but also makes things like delayed delete work out pretty
nicely.
Christian.
> -Daniel
>> Regards,
>> Christian.
>>
>>> -Daniel
>>>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
>>>> drivers/gpu/drm/qxl/qxl_release.c | 5 +--
>>>> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
>>>> drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
>>>> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
>>>> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
>>>> include/drm/ttm/ttm_bo_driver.h | 4 +--
>>>> include/drm/ttm/ttm_device.h | 4 +--
>>>> 8 files changed, 43 insertions(+), 56 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 9d19078246c8..ae18c0e32347 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>>>> struct amdgpu_vm_bo_base *bo_base;
>>>>
>>>> if (vm->bulk_moveable) {
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&adev->mman.bdev.lru_lock);
>>>> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&adev->mman.bdev.lru_lock);
>>>> return;
>>>> }
>>>>
>>>> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&adev->mman.bdev.lru_lock);
>>>> list_for_each_entry(bo_base, &vm->idle, vm_status) {
>>>> struct amdgpu_bo *bo = bo_base->bo;
>>>>
>>>> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>>>> &bo->shadow->tbo.mem,
>>>> &vm->lru_bulk_move);
>>>> }
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&adev->mman.bdev.lru_lock);
>>>>
>>>> vm->bulk_moveable = true;
>>>> }
>>>> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
>>>> index f5845c96d414..b19f2f00b215 100644
>>>> --- a/drivers/gpu/drm/qxl/qxl_release.c
>>>> +++ b/drivers/gpu/drm/qxl/qxl_release.c
>>>> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>>>> release->id | 0xf0000000, release->base.seqno);
>>>> trace_dma_fence_emit(&release->base);
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> -
>>>> list_for_each_entry(entry, &release->bos, head) {
>>>> bo = entry->bo;
>>>>
>>>> dma_resv_add_shared_fence(bo->base.resv, &release->base);
>>>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>>>> dma_resv_unlock(bo->base.resv);
>>>> }
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> ww_acquire_fini(&release->ticket);
>>>> }
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index a1be88be357b..a8103c8718a3 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>>>> * reference it any more. The only tricky case is the trylock on
>>>> * the resv object while holding the lru_lock.
>>>> */
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> bo->base.resv = &bo->base._resv;
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> }
>>>>
>>>> return r;
>>>> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>>
>>>> if (unlock_resv)
>>>> dma_resv_unlock(bo->base.resv);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>>>> 30 * HZ);
>>>> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>> else if (lret == 0)
>>>> return -EBUSY;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>>>> /*
>>>> * We raced, and lost, someone else holds the reservation now,
>>>> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>> * delayed destruction would succeed, so just return success
>>>> * here.
>>>> */
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> return 0;
>>>> }
>>>> ret = 0;
>>>> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>> if (ret || unlikely(list_empty(&bo->ddestroy))) {
>>>> if (unlock_resv)
>>>> dma_resv_unlock(bo->base.resv);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> return ret;
>>>> }
>>>>
>>>> ttm_bo_del_from_lru(bo);
>>>> list_del_init(&bo->ddestroy);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> ttm_bo_cleanup_memtype_use(bo);
>>>>
>>>> if (unlock_resv)
>>>> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>> */
>>>> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>>>> {
>>>> - struct ttm_global *glob = &ttm_glob;
>>>> struct list_head removed;
>>>> bool empty;
>>>>
>>>> INIT_LIST_HEAD(&removed);
>>>>
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> while (!list_empty(&bdev->ddestroy)) {
>>>> struct ttm_buffer_object *bo;
>>>>
>>>> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>>>> continue;
>>>>
>>>> if (remove_all || bo->base.resv != &bo->base._resv) {
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>> dma_resv_lock(bo->base.resv, NULL);
>>>>
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>>>
>>>> } else if (dma_resv_trylock(bo->base.resv)) {
>>>> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>>> } else {
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>> }
>>>>
>>>> ttm_bo_put(bo);
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> }
>>>> list_splice_tail(&removed, &bdev->ddestroy);
>>>> empty = list_empty(&bdev->ddestroy);
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>>
>>>> return empty;
>>>> }
>>>> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
>>>> ttm_bo_flush_all_fences(bo);
>>>> bo->deleted = true;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>>
>>>> /*
>>>> * Make pinned bos immediately available to
>>>> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
>>>>
>>>> kref_init(&bo->kref);
>>>> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>> schedule_delayed_work(&bdev->wq,
>>>> ((HZ / 100) < 1) ? 1 : HZ / 100);
>>>> return;
>>>> }
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> ttm_bo_del_from_lru(bo);
>>>> list_del(&bo->ddestroy);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>> ttm_bo_cleanup_memtype_use(bo);
>>>> dma_resv_unlock(bo->base.resv);
>>>> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>>> unsigned i;
>>>> int ret;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>> list_for_each_entry(bo, &man->lru[i], lru) {
>>>> bool busy;
>>>> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>>> if (!bo) {
>>>> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
>>>> busy_bo = NULL;
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
>>>> if (busy_bo)
>>>> ttm_bo_put(busy_bo);
>>>> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>>> return ret;
>>>> }
>>>>
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>> ret = ttm_bo_evict(bo, ctx);
>>>> if (locked)
>>>> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
>>>> mem->mem_type = place->mem_type;
>>>> mem->placement = place->flags;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> ttm_bo_move_to_lru_tail(bo, mem, NULL);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> -
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> return 0;
>>>> }
>>>>
>>>> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
>>>> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>>> gfp_t gfp_flags)
>>>> {
>>>> - struct ttm_global *glob = &ttm_glob;
>>>> bool locked;
>>>> int ret;
>>>>
>>>> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>>>
>>>> ttm_bo_del_from_lru(bo);
>>>> /* TODO: Cleanup the locking */
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>> /*
>>>> * Move to system cached
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
>>>> index 2c280fb1e992..924d892109e8 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_device.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_device.c
>>>> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
>>>> ttm_pool_mgr_init(num_pages * 50 / 100);
>>>> ttm_tt_mgr_init();
>>>>
>>>> - spin_lock_init(&glob->lru_lock);
>>>> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>>>>
>>>> if (unlikely(glob->dummy_read_page == NULL)) {
>>>> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
>>>> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>>>> gfp_t gfp_flags)
>>>> {
>>>> - struct ttm_global *glob = &ttm_glob;
>>>> struct ttm_resource_manager *man;
>>>> struct ttm_buffer_object *bo;
>>>> unsigned i, j;
>>>> int ret;
>>>>
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>>>> man = ttm_manager_type(bdev, i);
>>>> if (!man || !man->use_tt)
>>>> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>>>> }
>>>> }
>>>> }
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>> return 0;
>>>> }
>>>> EXPORT_SYMBOL(ttm_device_swapout);
>>>> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>>>>
>>>> bdev->vma_manager = vma_manager;
>>>> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
>>>> + spin_lock_init(&bdev->lru_lock);
>>>> INIT_LIST_HEAD(&bdev->ddestroy);
>>>> bdev->dev_mapping = mapping;
>>>> mutex_lock(&ttm_global_mutex);
>>>> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>>>>
>>>> void ttm_device_fini(struct ttm_device *bdev)
>>>> {
>>>> - struct ttm_global *glob = &ttm_glob;
>>>> struct ttm_resource_manager *man;
>>>> unsigned i;
>>>>
>>>> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
>>>> if (ttm_bo_delayed_delete(bdev, true))
>>>> pr_debug("Delayed destroy list was clean\n");
>>>>
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>>>> if (list_empty(&man->lru[0]))
>>>> pr_debug("Swap list %d was clean\n", i);
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>>
>>>> ttm_pool_fini(&bdev->pool);
>>>> ttm_global_release();
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>>>> index 690ab97d52b7..071c48d672c6 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>>>> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
>>>> if (list_empty(list))
>>>> return;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> list_for_each_entry(entry, list, head) {
>>>> struct ttm_buffer_object *bo = entry->bo;
>>>>
>>>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>>>> dma_resv_unlock(bo->base.resv);
>>>> }
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>>
>>>> if (ticket)
>>>> ww_acquire_fini(ticket);
>>>> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>>>> if (list_empty(list))
>>>> return;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> list_for_each_entry(entry, list, head) {
>>>> struct ttm_buffer_object *bo = entry->bo;
>>>>
>>>> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>>>> dma_resv_add_shared_fence(bo->base.resv, fence);
>>>> else
>>>> dma_resv_add_excl_fence(bo->base.resv, fence);
>>>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>>>> dma_resv_unlock(bo->base.resv);
>>>> }
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> if (ticket)
>>>> ww_acquire_fini(ticket);
>>>> }
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
>>>> index ed1672a9f332..04f2eef653ab 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>>>> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>>>> .no_wait_gpu = false,
>>>> .force_alloc = true
>>>> };
>>>> - struct ttm_global *glob = &ttm_glob;
>>>> struct dma_fence *fence;
>>>> int ret;
>>>> unsigned i;
>>>> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>>>> * Can't use standard list traversal since we're unlocking.
>>>> */
>>>>
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>> while (!list_empty(&man->lru[i])) {
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
>>>> NULL);
>>>> if (ret)
>>>> return ret;
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> }
>>>> }
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>>
>>>> spin_lock(&man->move_lock);
>>>> fence = dma_fence_get(man->move);
>>>> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
>>>> index d007feef7676..dbccac957f8f 100644
>>>> --- a/include/drm/ttm/ttm_bo_driver.h
>>>> +++ b/include/drm/ttm/ttm_bo_driver.h
>>>> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
>>>> static inline void
>>>> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>>>> {
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> }
>>>>
>>>> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
>>>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
>>>> index cda6efb4c34b..bae56d29e8ff 100644
>>>> --- a/include/drm/ttm/ttm_device.h
>>>> +++ b/include/drm/ttm/ttm_device.h
>>>> @@ -56,7 +56,6 @@ extern struct ttm_global {
>>>> */
>>>>
>>>> struct page *dummy_read_page;
>>>> - spinlock_t lru_lock;
>>>>
>>>> /**
>>>> * Protected by ttm_global_mutex.
>>>> @@ -277,8 +276,9 @@ struct ttm_device {
>>>> struct ttm_pool pool;
>>>>
>>>> /*
>>>> - * Protected by the global:lru lock.
>>>> + * Protection for the per manager LRU and ddestroy lists.
>>>> */
>>>> + spinlock_t lru_lock;
>>>> struct list_head ddestroy;
>>>>
>>>> /*
>>>> --
>>>> 2.25.1
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around
2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
@ 2021-03-19 9:41 ` kernel test robot
2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
` (2 subsequent siblings)
3 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-19 9:41 UTC (permalink / raw)
To: Christian König, dri-devel; +Cc: clang-built-linux, ray.huang, kbuild-all
[-- Attachment #1: Type: text/plain, Size: 3513 bytes --]
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip]
[also build test WARNING on next-20210319]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a005-20210318 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project fcc1ce00931751ac02498986feb37744e9ace8de)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
git checkout 824dca26fe395899b41d9790944ddea345f7a6fd
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX'
drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead
drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout'
drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout'
>> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead
vim +110 drivers/gpu/drm/ttm/ttm_device.c
104
105 /**
106 * A buffer object shrink method that tries to swap out the first
107 * buffer object on the global::swap_lru list.
108 */
109 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> 110 {
111 struct ttm_global *glob = &ttm_glob;
112 struct ttm_buffer_object *bo;
113 unsigned i;
114 int ret;
115
116 spin_lock(&glob->lru_lock);
117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
118 list_for_each_entry(bo, &glob->swap_lru[i], swap) {
119 uint32_t num_pages = bo->ttm->num_pages;
120
121 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
122 /* ttm_bo_swapout has dropped the lru_lock */
123 if (!ret)
124 return num_pages;
125 if (ret != -EBUSY)
126 return ret;
127 }
128 }
129 spin_unlock(&glob->lru_lock);
130 return 0;
131 }
132 EXPORT_SYMBOL(ttm_global_swapout);
133
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 36289 bytes --]
[-- Attachment #3: Type: text/plain, Size: 160 bytes --]
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-03-19 9:41 ` kernel test robot
0 siblings, 0 replies; 19+ messages in thread
From: kernel test robot @ 2021-03-19 9:41 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 3590 bytes --]
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip]
[also build test WARNING on next-20210319]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a005-20210318 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project fcc1ce00931751ac02498986feb37744e9ace8de)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# https://github.com/0day-ci/linux/commit/824dca26fe395899b41d9790944ddea345f7a6fd
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
git checkout 824dca26fe395899b41d9790944ddea345f7a6fd
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
drivers/gpu/drm/ttm/ttm_device.c:42: warning: Function parameter or member 'ttm_global_mutex' not described in 'DEFINE_MUTEX'
drivers/gpu/drm/ttm/ttm_device.c:42: warning: expecting prototype for ttm_global_mutex(). Prototype was for DEFINE_MUTEX() instead
drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'ctx' not described in 'ttm_global_swapout'
drivers/gpu/drm/ttm/ttm_device.c:110: warning: Function parameter or member 'gfp_flags' not described in 'ttm_global_swapout'
>> drivers/gpu/drm/ttm/ttm_device.c:110: warning: expecting prototype for A buffer object shrink method that tries to swap out the first(). Prototype was for ttm_global_swapout() instead
vim +110 drivers/gpu/drm/ttm/ttm_device.c
104
105 /**
106 * A buffer object shrink method that tries to swap out the first
107 * buffer object on the global::swap_lru list.
108 */
109 long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> 110 {
111 struct ttm_global *glob = &ttm_glob;
112 struct ttm_buffer_object *bo;
113 unsigned i;
114 int ret;
115
116 spin_lock(&glob->lru_lock);
117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
118 list_for_each_entry(bo, &glob->swap_lru[i], swap) {
119 uint32_t num_pages = bo->ttm->num_pages;
120
121 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
122 /* ttm_bo_swapout has dropped the lru_lock */
123 if (!ret)
124 return num_pages;
125 if (ret != -EBUSY)
126 return ret;
127 }
128 }
129 spin_unlock(&glob->lru_lock);
130 return 0;
131 }
132 EXPORT_SYMBOL(ttm_global_swapout);
133
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 36289 bytes --]
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-02-11 13:29 Christian König
0 siblings, 0 replies; 19+ messages in thread
From: Christian König @ 2021-02-11 13:29 UTC (permalink / raw)
To: dri-devel; +Cc: ray.huang
Move the iteration of the global lru into the new function
ttm_global_swapout() and use that instead in drivers.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++---------------------
drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++
drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +-
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
include/drm/ttm/ttm_bo_api.h | 3 +-
include/drm/ttm/ttm_device.h | 2 +
7 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e38102282fd5..d33578a112b4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1184,56 +1184,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_wait);
-/*
- * A buffer object shrink method that tries to swap out the first
- * buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags)
{
struct ttm_global *glob = &ttm_glob;
- struct ttm_buffer_object *bo;
- int ret = -EBUSY;
bool locked;
- unsigned i;
-
- spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
- list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
- NULL))
- continue;
-
- if (!ttm_bo_get_unless_zero(bo)) {
- if (locked)
- dma_resv_unlock(bo->base.resv);
- continue;
- }
+ int ret;
- ret = 0;
- break;
- }
- if (!ret)
- break;
- }
+ if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+ return -EBUSY;
- if (ret) {
- spin_unlock(&glob->lru_lock);
- return ret;
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ return -EBUSY;
}
if (bo->deleted) {
- ret = ttm_bo_cleanup_refs(bo, false, false, locked);
+ ttm_bo_cleanup_refs(bo, false, false, locked);
ttm_bo_put(bo);
- return ret;
+ return 0;
}
ttm_bo_del_from_lru(bo);
+ /* TODO: Cleanup the locking */
spin_unlock(&glob->lru_lock);
- /**
+ /*
* Move to system cached
*/
-
if (bo->mem.mem_type != TTM_PL_SYSTEM) {
struct ttm_operation_ctx ctx = { false, false };
struct ttm_resource evict_mem;
@@ -1253,29 +1232,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
}
}
- /**
+ /*
* Make sure BO is idle.
*/
-
ret = ttm_bo_wait(bo, false, false);
if (unlikely(ret != 0))
goto out;
ttm_bo_unmap_virtual(bo);
- /**
+ /*
* Swap out. Buffer will be swapped in again as soon as
* anyone tries to access a ttm page.
*/
-
if (bo->bdev->funcs->swap_notify)
bo->bdev->funcs->swap_notify(bo);
ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
out:
- /**
- *
+ /*
* Unreserve without putting on LRU to avoid swapping out an
* already swapped buffer.
*/
@@ -1284,7 +1260,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
ttm_bo_put(bo);
return ret;
}
-EXPORT_SYMBOL(ttm_bo_swapout);
void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
{
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 95e1b7b1f2e6..dfc2a7e4e490 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -102,6 +102,35 @@ static int ttm_global_init(void)
return ret;
}
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the global::swap_lru list.
+ */
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+{
+ struct ttm_global *glob = &ttm_glob;
+ struct ttm_buffer_object *bo;
+ unsigned i;
+ int ret;
+
+ spin_lock(&glob->lru_lock);
+ for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+ list_for_each_entry(bo, &glob->swap_lru[i], swap) {
+ uint32_t num_pages = bo->ttm->num_pages;
+
+ ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+ /* ttm_bo_swapout has dropped the lru_lock */
+ if (!ret)
+ return num_pages;
+ if (ret != -EBUSY)
+ return ret;
+ }
+ }
+ spin_unlock(&glob->lru_lock);
+ return 0;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
static void ttm_init_sysman(struct ttm_device *bdev)
{
struct ttm_resource_manager *man = &bdev->sysman;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..95b5cff25f4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
};
int ret;
- ret = ttm_bo_swapout(&ctx, GFP_NOFS);
+ ret = ttm_global_swapout(&ctx, GFP_NOFS);
return ret < 0 ? SHRINK_EMPTY : ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index e972af07d029..104b95a8c7a2 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -38,6 +38,7 @@
#include <drm/drm_device.h>
#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
#include "ttm_memory.h"
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
spin_unlock(&glob->lock);
- ret = ttm_bo_swapout(ctx, GFP_KERNEL);
+ ret = ttm_global_swapout(ctx, GFP_KERNEL);
spin_lock(&glob->lock);
if (unlikely(ret < 0))
break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4efed3bf0ef9..01da355b86f3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1384,7 +1384,7 @@ static int vmw_pm_freeze(struct device *kdev)
vmw_execbuf_release_pinned_bo(dev_priv);
vmw_resource_evict_all(dev_priv);
vmw_release_device_early(dev_priv);
- while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
+ while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
if (dev_priv->enable_fb)
vmw_fifo_resource_dec(dev_priv);
if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4fb523dfab32..5044ac330858 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
const char __user *wbuf, char __user *rbuf,
size_t count, loff_t *f_pos, bool write);
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags);
/**
* ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 035bbc044a3b..6a0b267d4fe6 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -297,6 +297,8 @@ struct ttm_device {
struct delayed_work wq;
};
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+
static inline struct ttm_resource_manager *
ttm_manager_type(struct ttm_device *bdev, int mem_type)
{
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-02-10 15:21 Christian König
0 siblings, 0 replies; 19+ messages in thread
From: Christian König @ 2021-02-10 15:21 UTC (permalink / raw)
To: dri-devel, ray.huang
Move the iteration of the global lru into the new function
ttm_global_swapout() and use that instead in drivers.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++---------------------
drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++
drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +-
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
include/drm/ttm/ttm_bo_api.h | 3 +-
include/drm/ttm/ttm_device.h | 2 +
7 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e38102282fd5..d33578a112b4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1184,56 +1184,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_wait);
-/*
- * A buffer object shrink method that tries to swap out the first
- * buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags)
{
struct ttm_global *glob = &ttm_glob;
- struct ttm_buffer_object *bo;
- int ret = -EBUSY;
bool locked;
- unsigned i;
-
- spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
- list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
- NULL))
- continue;
-
- if (!ttm_bo_get_unless_zero(bo)) {
- if (locked)
- dma_resv_unlock(bo->base.resv);
- continue;
- }
+ int ret;
- ret = 0;
- break;
- }
- if (!ret)
- break;
- }
+ if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+ return -EBUSY;
- if (ret) {
- spin_unlock(&glob->lru_lock);
- return ret;
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ return -EBUSY;
}
if (bo->deleted) {
- ret = ttm_bo_cleanup_refs(bo, false, false, locked);
+ ttm_bo_cleanup_refs(bo, false, false, locked);
ttm_bo_put(bo);
- return ret;
+ return 0;
}
ttm_bo_del_from_lru(bo);
+ /* TODO: Cleanup the locking */
spin_unlock(&glob->lru_lock);
- /**
+ /*
* Move to system cached
*/
-
if (bo->mem.mem_type != TTM_PL_SYSTEM) {
struct ttm_operation_ctx ctx = { false, false };
struct ttm_resource evict_mem;
@@ -1253,29 +1232,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
}
}
- /**
+ /*
* Make sure BO is idle.
*/
-
ret = ttm_bo_wait(bo, false, false);
if (unlikely(ret != 0))
goto out;
ttm_bo_unmap_virtual(bo);
- /**
+ /*
* Swap out. Buffer will be swapped in again as soon as
* anyone tries to access a ttm page.
*/
-
if (bo->bdev->funcs->swap_notify)
bo->bdev->funcs->swap_notify(bo);
ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
out:
- /**
- *
+ /*
* Unreserve without putting on LRU to avoid swapping out an
* already swapped buffer.
*/
@@ -1284,7 +1260,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
ttm_bo_put(bo);
return ret;
}
-EXPORT_SYMBOL(ttm_bo_swapout);
void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
{
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 95e1b7b1f2e6..dfc2a7e4e490 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -102,6 +102,35 @@ static int ttm_global_init(void)
return ret;
}
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the global::swap_lru list.
+ */
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+{
+ struct ttm_global *glob = &ttm_glob;
+ struct ttm_buffer_object *bo;
+ unsigned i;
+ int ret;
+
+ spin_lock(&glob->lru_lock);
+ for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+ list_for_each_entry(bo, &glob->swap_lru[i], swap) {
+ uint32_t num_pages = bo->ttm->num_pages;
+
+ ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+ /* ttm_bo_swapout has dropped the lru_lock */
+ if (!ret)
+ return num_pages;
+ if (ret != -EBUSY)
+ return ret;
+ }
+ }
+ spin_unlock(&glob->lru_lock);
+ return 0;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
static void ttm_init_sysman(struct ttm_device *bdev)
{
struct ttm_resource_manager *man = &bdev->sysman;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..95b5cff25f4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
};
int ret;
- ret = ttm_bo_swapout(&ctx, GFP_NOFS);
+ ret = ttm_global_swapout(&ctx, GFP_NOFS);
return ret < 0 ? SHRINK_EMPTY : ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index e972af07d029..104b95a8c7a2 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -38,6 +38,7 @@
#include <drm/drm_device.h>
#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
#include "ttm_memory.h"
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
spin_unlock(&glob->lock);
- ret = ttm_bo_swapout(ctx, GFP_KERNEL);
+ ret = ttm_global_swapout(ctx, GFP_KERNEL);
spin_lock(&glob->lock);
if (unlikely(ret < 0))
break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4efed3bf0ef9..01da355b86f3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1384,7 +1384,7 @@ static int vmw_pm_freeze(struct device *kdev)
vmw_execbuf_release_pinned_bo(dev_priv);
vmw_resource_evict_all(dev_priv);
vmw_release_device_early(dev_priv);
- while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
+ while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
if (dev_priv->enable_fb)
vmw_fifo_resource_dec(dev_priv);
if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4fb523dfab32..5044ac330858 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
const char __user *wbuf, char __user *rbuf,
size_t count, loff_t *f_pos, bool write);
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags);
/**
* ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 035bbc044a3b..6a0b267d4fe6 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -297,6 +297,8 @@ struct ttm_device {
struct delayed_work wq;
};
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+
static inline struct ttm_resource_manager *
ttm_manager_type(struct ttm_device *bdev, int mem_type)
{
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 19+ messages in thread
end of thread, other threads:[~2021-03-19 9:41 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
2021-03-15 16:04 ` [PATCH 2/3] drm/ttm: remove swap LRU v2 Christian König
2021-03-15 18:54 ` kernel test robot
2021-03-15 18:54 ` kernel test robot
2021-03-15 18:54 ` Matthew Auld
2021-03-15 19:27 ` Christian König
2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
2021-03-15 20:17 ` kernel test robot
2021-03-15 20:17 ` kernel test robot
2021-03-16 9:35 ` Daniel Vetter
2021-03-16 12:03 ` Christian König
2021-03-16 12:05 ` Daniel Vetter
2021-03-16 15:13 ` Christian König
2021-03-15 18:47 ` [PATCH 1/3] drm/ttm: move swapout logic around kernel test robot
2021-03-15 18:47 ` kernel test robot
2021-03-19 9:41 ` kernel test robot
2021-03-19 9:41 ` kernel test robot
-- strict thread matches above, loose matches on Subject: below --
2021-02-11 13:29 Christian König
2021-02-10 15:21 Christian König
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.