* [PATCH 1/3] drm/ttm: move swapout logic around v2
@ 2021-03-18 12:47 Christian König
2021-03-18 12:47 ` [PATCH 2/3] drm/ttm: remove swap LRU v3 Christian König
` (6 more replies)
0 siblings, 7 replies; 28+ messages in thread
From: Christian König @ 2021-03-18 12:47 UTC (permalink / raw)
To: dri-devel; +Cc: ray.huang, matthew.william.auld
Move the iteration of the global lru into the new function
ttm_global_swapout() and use that instead in drivers.
v2: consistently return int
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++---------------------
drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++
drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +-
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
include/drm/ttm/ttm_bo_api.h | 3 +-
include/drm/ttm/ttm_device.h | 2 +
7 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3c23e863a3f0..66e00b404ec3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1193,56 +1193,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_wait);
-/*
- * A buffer object shrink method that tries to swap out the first
- * buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags)
{
struct ttm_global *glob = &ttm_glob;
- struct ttm_buffer_object *bo;
- int ret = -EBUSY;
bool locked;
- unsigned i;
-
- spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
- list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
- NULL))
- continue;
-
- if (!ttm_bo_get_unless_zero(bo)) {
- if (locked)
- dma_resv_unlock(bo->base.resv);
- continue;
- }
+ int ret;
- ret = 0;
- break;
- }
- if (!ret)
- break;
- }
+ if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+ return -EBUSY;
- if (ret) {
- spin_unlock(&glob->lru_lock);
- return ret;
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ return -EBUSY;
}
if (bo->deleted) {
- ret = ttm_bo_cleanup_refs(bo, false, false, locked);
+ ttm_bo_cleanup_refs(bo, false, false, locked);
ttm_bo_put(bo);
- return ret;
+ return 0;
}
ttm_bo_del_from_lru(bo);
+ /* TODO: Cleanup the locking */
spin_unlock(&glob->lru_lock);
- /**
+ /*
* Move to system cached
*/
-
if (bo->mem.mem_type != TTM_PL_SYSTEM) {
struct ttm_operation_ctx ctx = { false, false };
struct ttm_resource evict_mem;
@@ -1262,29 +1241,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
}
}
- /**
+ /*
* Make sure BO is idle.
*/
-
ret = ttm_bo_wait(bo, false, false);
if (unlikely(ret != 0))
goto out;
ttm_bo_unmap_virtual(bo);
- /**
+ /*
* Swap out. Buffer will be swapped in again as soon as
* anyone tries to access a ttm page.
*/
-
if (bo->bdev->funcs->swap_notify)
bo->bdev->funcs->swap_notify(bo);
ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
out:
- /**
- *
+ /*
* Unreserve without putting on LRU to avoid swapping out an
* already swapped buffer.
*/
@@ -1293,7 +1269,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
ttm_bo_put(bo);
return ret;
}
-EXPORT_SYMBOL(ttm_bo_swapout);
void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
{
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 95e1b7b1f2e6..b1424189fdfb 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -102,6 +102,35 @@ static int ttm_global_init(void)
return ret;
}
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the global::swap_lru list.
+ */
+int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+{
+ struct ttm_global *glob = &ttm_glob;
+ struct ttm_buffer_object *bo;
+ unsigned i;
+ int ret;
+
+ spin_lock(&glob->lru_lock);
+ for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+ list_for_each_entry(bo, &glob->swap_lru[i], swap) {
+ uint32_t num_pages = bo->ttm->num_pages;
+
+ ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+ /* ttm_bo_swapout has dropped the lru_lock */
+ if (!ret)
+ return num_pages;
+ if (ret != -EBUSY)
+ return ret;
+ }
+ }
+ spin_unlock(&glob->lru_lock);
+ return 0;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
static void ttm_init_sysman(struct ttm_device *bdev)
{
struct ttm_resource_manager *man = &bdev->sysman;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..95b5cff25f4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
};
int ret;
- ret = ttm_bo_swapout(&ctx, GFP_NOFS);
+ ret = ttm_global_swapout(&ctx, GFP_NOFS);
return ret < 0 ? SHRINK_EMPTY : ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index e972af07d029..104b95a8c7a2 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -38,6 +38,7 @@
#include <drm/drm_device.h>
#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
#include "ttm_memory.h"
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
spin_unlock(&glob->lock);
- ret = ttm_bo_swapout(ctx, GFP_KERNEL);
+ ret = ttm_global_swapout(ctx, GFP_KERNEL);
spin_lock(&glob->lock);
if (unlikely(ret < 0))
break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 6910111099c8..b991422e156c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
vmw_execbuf_release_pinned_bo(dev_priv);
vmw_resource_evict_all(dev_priv);
vmw_release_device_early(dev_priv);
- while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
+ while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
if (dev_priv->enable_fb)
vmw_fifo_resource_dec(dev_priv);
if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4fb523dfab32..5044ac330858 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
const char __user *wbuf, char __user *rbuf,
size_t count, loff_t *f_pos, bool write);
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags);
/**
* ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 035bbc044a3b..6a0b267d4fe6 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -297,6 +297,8 @@ struct ttm_device {
struct delayed_work wq;
};
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+
static inline struct ttm_resource_manager *
ttm_manager_type(struct ttm_device *bdev, int mem_type)
{
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 2/3] drm/ttm: remove swap LRU v3
2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
@ 2021-03-18 12:47 ` Christian König
2021-03-19 4:30 ` Huang Rui
2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
` (5 subsequent siblings)
6 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2021-03-18 12:47 UTC (permalink / raw)
To: dri-devel; +Cc: ray.huang, matthew.william.auld
Instead evict round robin from each devices SYSTEM and TT domain.
v2: reorder num_pages access reported by Dan's script
v3: fix rebase fallout, num_pages should be 32bit
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 29 --------------
drivers/gpu/drm/ttm/ttm_bo_util.c | 1 -
drivers/gpu/drm/ttm/ttm_device.c | 60 +++++++++++++++++++++--------
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
include/drm/ttm/ttm_bo_api.h | 1 -
include/drm/ttm/ttm_bo_driver.h | 1 -
include/drm/ttm/ttm_device.h | 7 +---
7 files changed, 48 insertions(+), 53 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 66e00b404ec3..3673157527ff 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
{
struct ttm_device *bdev = bo->bdev;
- list_del_init(&bo->swap);
list_del_init(&bo->lru);
if (bdev->funcs->del_from_lru_notify)
@@ -105,16 +104,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
man = ttm_manager_type(bdev, mem->mem_type);
list_move_tail(&bo->lru, &man->lru[bo->priority]);
- if (man->use_tt && bo->ttm &&
- !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
- TTM_PAGE_FLAG_SWAPPED))) {
- struct list_head *swap;
-
- swap = &ttm_glob.swap_lru[bo->priority];
- list_move_tail(&bo->swap, swap);
- } else {
- list_del_init(&bo->swap);
- }
if (bdev->funcs->del_from_lru_notify)
bdev->funcs->del_from_lru_notify(bo);
@@ -129,9 +118,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
break;
}
- if (bo->ttm && !(bo->ttm->page_flags &
- (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED)))
- ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo);
}
}
EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
@@ -169,20 +155,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
list_bulk_move_tail(&man->lru[i], &pos->first->lru,
&pos->last->lru);
}
-
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
- struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i];
- struct list_head *lru;
-
- if (!pos->first)
- continue;
-
- dma_resv_assert_held(pos->first->base.resv);
- dma_resv_assert_held(pos->last->base.resv);
-
- lru = &ttm_glob.swap_lru[i];
- list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
- }
}
EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
@@ -1065,7 +1037,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
kref_init(&bo->kref);
INIT_LIST_HEAD(&bo->lru);
INIT_LIST_HEAD(&bo->ddestroy);
- INIT_LIST_HEAD(&bo->swap);
bo->bdev = bdev;
bo->type = type;
bo->mem.mem_type = TTM_PL_SYSTEM;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 031e5819fec4..a2a17c84ceb3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
atomic_inc(&ttm_glob.bo_count);
INIT_LIST_HEAD(&fbo->base.ddestroy);
INIT_LIST_HEAD(&fbo->base.lru);
- INIT_LIST_HEAD(&fbo->base.swap);
fbo->base.moving = NULL;
drm_vma_node_reset(&fbo->base.base.vma_node);
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index b1424189fdfb..2096a0fd9c35 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -67,7 +67,6 @@ static int ttm_global_init(void)
unsigned long num_pages;
struct sysinfo si;
int ret = 0;
- unsigned i;
mutex_lock(&ttm_global_mutex);
if (++ttm_glob_use_count > 1)
@@ -90,8 +89,6 @@ static int ttm_global_init(void)
goto out;
}
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
- INIT_LIST_HEAD(&glob->swap_lru[i]);
INIT_LIST_HEAD(&glob->device_list);
atomic_set(&glob->bo_count, 0);
@@ -109,27 +106,60 @@ static int ttm_global_init(void)
int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
{
struct ttm_global *glob = &ttm_glob;
+ struct ttm_device *bdev;
+ int ret = -EBUSY;
+
+ mutex_lock(&ttm_global_mutex);
+ list_for_each_entry(bdev, &glob->device_list, device_list) {
+ ret = ttm_device_swapout(bdev, ctx, gfp_flags);
+ if (ret > 0) {
+ list_move_tail(&bdev->device_list, &glob->device_list);
+ break;
+ }
+ }
+ mutex_unlock(&ttm_global_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
+long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags)
+{
+ struct ttm_global *glob = &ttm_glob;
+ struct ttm_resource_manager *man;
struct ttm_buffer_object *bo;
- unsigned i;
+ unsigned i, j;
int ret;
spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
- list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- uint32_t num_pages = bo->ttm->num_pages;
-
- ret = ttm_bo_swapout(bo, ctx, gfp_flags);
- /* ttm_bo_swapout has dropped the lru_lock */
- if (!ret)
- return num_pages;
- if (ret != -EBUSY)
- return ret;
+ for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
+ man = ttm_manager_type(bdev, i);
+ if (!man || !man->use_tt)
+ continue;
+
+ for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
+ list_for_each_entry(bo, &man->lru[j], lru) {
+ uint32_t num_pages;
+
+ if (!bo->ttm ||
+ bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
+ bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
+ continue;
+
+ num_pages = bo->ttm->num_pages;
+ ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+ /* ttm_bo_swapout has dropped the lru_lock */
+ if (!ret)
+ return num_pages;
+ if (ret != -EBUSY)
+ return ret;
+ }
}
}
spin_unlock(&glob->lru_lock);
return 0;
}
-EXPORT_SYMBOL(ttm_global_swapout);
+EXPORT_SYMBOL(ttm_device_swapout);
static void ttm_init_sysman(struct ttm_device *bdev)
{
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index b991422e156c..4e41d8221f06 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
vmw_execbuf_release_pinned_bo(dev_priv);
vmw_resource_evict_all(dev_priv);
vmw_release_device_early(dev_priv);
- while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
+ while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) > 0);
if (dev_priv->enable_fb)
vmw_fifo_resource_dec(dev_priv);
if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 5044ac330858..3587f660e8f4 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -144,7 +144,6 @@ struct ttm_buffer_object {
struct list_head lru;
struct list_head ddestroy;
- struct list_head swap;
/**
* Members protected by a bo reservation.
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 8959c0075cfd..d007feef7676 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos {
struct ttm_lru_bulk_move {
struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
- struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY];
};
/*
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 6a0b267d4fe6..cda6efb4c34b 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -63,11 +63,6 @@ extern struct ttm_global {
*/
struct list_head device_list;
- /**
- * Protected by the lru_lock.
- */
- struct list_head swap_lru[TTM_MAX_BO_PRIORITY];
-
/**
* Internal protection.
*/
@@ -298,6 +293,8 @@ struct ttm_device {
};
long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags);
static inline struct ttm_resource_manager *
ttm_manager_type(struct ttm_device *bdev, int mem_type)
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH 2/3] drm/ttm: remove swap LRU v3
2021-03-18 12:47 ` [PATCH 2/3] drm/ttm: remove swap LRU v3 Christian König
@ 2021-03-19 4:30 ` Huang Rui
0 siblings, 0 replies; 28+ messages in thread
From: Huang Rui @ 2021-03-19 4:30 UTC (permalink / raw)
To: Christian König; +Cc: matthew.william.auld, dri-devel
On Thu, Mar 18, 2021 at 08:47:18PM +0800, Christian König wrote:
> Instead evict round robin from each devices SYSTEM and TT domain.
>
> v2: reorder num_pages access reported by Dan's script
> v3: fix rebase fallout, num_pages should be 32bit
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
> ---
> drivers/gpu/drm/ttm/ttm_bo.c | 29 --------------
> drivers/gpu/drm/ttm/ttm_bo_util.c | 1 -
> drivers/gpu/drm/ttm/ttm_device.c | 60 +++++++++++++++++++++--------
> drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
> include/drm/ttm/ttm_bo_api.h | 1 -
> include/drm/ttm/ttm_bo_driver.h | 1 -
> include/drm/ttm/ttm_device.h | 7 +---
> 7 files changed, 48 insertions(+), 53 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 66e00b404ec3..3673157527ff 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
> {
> struct ttm_device *bdev = bo->bdev;
>
> - list_del_init(&bo->swap);
> list_del_init(&bo->lru);
>
> if (bdev->funcs->del_from_lru_notify)
> @@ -105,16 +104,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>
> man = ttm_manager_type(bdev, mem->mem_type);
> list_move_tail(&bo->lru, &man->lru[bo->priority]);
> - if (man->use_tt && bo->ttm &&
> - !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
> - TTM_PAGE_FLAG_SWAPPED))) {
> - struct list_head *swap;
> -
> - swap = &ttm_glob.swap_lru[bo->priority];
> - list_move_tail(&bo->swap, swap);
> - } else {
> - list_del_init(&bo->swap);
> - }
>
> if (bdev->funcs->del_from_lru_notify)
> bdev->funcs->del_from_lru_notify(bo);
> @@ -129,9 +118,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> ttm_bo_bulk_move_set_pos(&bulk->vram[bo->priority], bo);
> break;
> }
> - if (bo->ttm && !(bo->ttm->page_flags &
> - (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED)))
> - ttm_bo_bulk_move_set_pos(&bulk->swap[bo->priority], bo);
> }
> }
> EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
> @@ -169,20 +155,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> list_bulk_move_tail(&man->lru[i], &pos->first->lru,
> &pos->last->lru);
> }
> -
> - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> - struct ttm_lru_bulk_move_pos *pos = &bulk->swap[i];
> - struct list_head *lru;
> -
> - if (!pos->first)
> - continue;
> -
> - dma_resv_assert_held(pos->first->base.resv);
> - dma_resv_assert_held(pos->last->base.resv);
> -
> - lru = &ttm_glob.swap_lru[i];
> - list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
> - }
> }
> EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
>
> @@ -1065,7 +1037,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
> kref_init(&bo->kref);
> INIT_LIST_HEAD(&bo->lru);
> INIT_LIST_HEAD(&bo->ddestroy);
> - INIT_LIST_HEAD(&bo->swap);
> bo->bdev = bdev;
> bo->type = type;
> bo->mem.mem_type = TTM_PL_SYSTEM;
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 031e5819fec4..a2a17c84ceb3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
> atomic_inc(&ttm_glob.bo_count);
> INIT_LIST_HEAD(&fbo->base.ddestroy);
> INIT_LIST_HEAD(&fbo->base.lru);
> - INIT_LIST_HEAD(&fbo->base.swap);
> fbo->base.moving = NULL;
> drm_vma_node_reset(&fbo->base.base.vma_node);
>
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index b1424189fdfb..2096a0fd9c35 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -67,7 +67,6 @@ static int ttm_global_init(void)
> unsigned long num_pages;
> struct sysinfo si;
> int ret = 0;
> - unsigned i;
>
> mutex_lock(&ttm_global_mutex);
> if (++ttm_glob_use_count > 1)
> @@ -90,8 +89,6 @@ static int ttm_global_init(void)
> goto out;
> }
>
> - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> - INIT_LIST_HEAD(&glob->swap_lru[i]);
> INIT_LIST_HEAD(&glob->device_list);
> atomic_set(&glob->bo_count, 0);
>
> @@ -109,27 +106,60 @@ static int ttm_global_init(void)
> int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> {
> struct ttm_global *glob = &ttm_glob;
> + struct ttm_device *bdev;
> + int ret = -EBUSY;
> +
> + mutex_lock(&ttm_global_mutex);
> + list_for_each_entry(bdev, &glob->device_list, device_list) {
> + ret = ttm_device_swapout(bdev, ctx, gfp_flags);
> + if (ret > 0) {
> + list_move_tail(&bdev->device_list, &glob->device_list);
> + break;
> + }
> + }
> + mutex_unlock(&ttm_global_mutex);
> + return ret;
> +}
> +EXPORT_SYMBOL(ttm_global_swapout);
> +
> +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> + gfp_t gfp_flags)
> +{
> + struct ttm_global *glob = &ttm_glob;
> + struct ttm_resource_manager *man;
> struct ttm_buffer_object *bo;
> - unsigned i;
> + unsigned i, j;
> int ret;
>
> spin_lock(&glob->lru_lock);
> - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> - list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> - uint32_t num_pages = bo->ttm->num_pages;
> -
> - ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> - /* ttm_bo_swapout has dropped the lru_lock */
> - if (!ret)
> - return num_pages;
> - if (ret != -EBUSY)
> - return ret;
> + for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> + man = ttm_manager_type(bdev, i);
> + if (!man || !man->use_tt)
> + continue;
> +
> + for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> + list_for_each_entry(bo, &man->lru[j], lru) {
> + uint32_t num_pages;
> +
> + if (!bo->ttm ||
> + bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> + bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
> + continue;
> +
> + num_pages = bo->ttm->num_pages;
> + ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> + /* ttm_bo_swapout has dropped the lru_lock */
> + if (!ret)
> + return num_pages;
> + if (ret != -EBUSY)
> + return ret;
> + }
> }
> }
> spin_unlock(&glob->lru_lock);
> return 0;
> }
> -EXPORT_SYMBOL(ttm_global_swapout);
> +EXPORT_SYMBOL(ttm_device_swapout);
>
> static void ttm_init_sysman(struct ttm_device *bdev)
> {
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index b991422e156c..4e41d8221f06 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
> vmw_execbuf_release_pinned_bo(dev_priv);
> vmw_resource_evict_all(dev_priv);
> vmw_release_device_early(dev_priv);
> - while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
> + while (ttm_device_swapout(&dev_priv->bdev, &ctx, GFP_KERNEL) > 0);
> if (dev_priv->enable_fb)
> vmw_fifo_resource_dec(dev_priv);
> if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 5044ac330858..3587f660e8f4 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -144,7 +144,6 @@ struct ttm_buffer_object {
>
> struct list_head lru;
> struct list_head ddestroy;
> - struct list_head swap;
>
> /**
> * Members protected by a bo reservation.
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index 8959c0075cfd..d007feef7676 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -69,7 +69,6 @@ struct ttm_lru_bulk_move_pos {
> struct ttm_lru_bulk_move {
> struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY];
> struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY];
> - struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY];
> };
>
> /*
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index 6a0b267d4fe6..cda6efb4c34b 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -63,11 +63,6 @@ extern struct ttm_global {
> */
> struct list_head device_list;
>
> - /**
> - * Protected by the lru_lock.
> - */
> - struct list_head swap_lru[TTM_MAX_BO_PRIORITY];
> -
> /**
> * Internal protection.
> */
> @@ -298,6 +293,8 @@ struct ttm_device {
> };
>
> long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
> +long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> + gfp_t gfp_flags);
>
> static inline struct ttm_resource_manager *
> ttm_manager_type(struct ttm_device *bdev, int mem_type)
> --
> 2.25.1
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
2021-03-18 12:47 ` [PATCH 2/3] drm/ttm: remove swap LRU v3 Christian König
@ 2021-03-18 12:47 ` Christian König
2021-03-19 4:32 ` Huang Rui
2021-03-18 14:43 ` [PATCH 1/3] drm/ttm: move swapout logic around v2 Nirmoy
` (4 subsequent siblings)
6 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2021-03-18 12:47 UTC (permalink / raw)
To: dri-devel; +Cc: ray.huang, matthew.william.auld
Instead of having a global lock.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
drivers/gpu/drm/qxl/qxl_release.c | 5 +--
drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
include/drm/ttm/ttm_bo_driver.h | 4 +--
include/drm/ttm/ttm_device.h | 4 +--
8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9d19078246c8..ae18c0e32347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&adev->mman.bdev.lru_lock);
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&adev->mman.bdev.lru_lock);
return;
}
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&adev->mman.bdev.lru_lock);
list_for_each_entry(bo_base, &vm->idle, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
&bo->shadow->tbo.mem,
&vm->lru_bulk_move);
}
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true;
}
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index f5845c96d414..b19f2f00b215 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
release->id | 0xf0000000, release->base.seqno);
trace_dma_fence_emit(&release->base);
- spin_lock(&ttm_glob.lru_lock);
-
list_for_each_entry(entry, &release->bos, head) {
bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
ww_acquire_fini(&release->ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3673157527ff..2d2ac532987e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
* reference it any more. The only tricky case is the trylock on
* the resv object while holding the lru_lock.
*/
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
bo->base.resv = &bo->base._resv;
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
}
return r;
@@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
30 * HZ);
@@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
else if (lret == 0)
return -EBUSY;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
/*
* We raced, and lost, someone else holds the reservation now,
@@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
* delayed destruction would succeed, so just return success
* here.
*/
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
return 0;
}
ret = 0;
@@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (ret || unlikely(list_empty(&bo->ddestroy))) {
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
return ret;
}
ttm_bo_del_from_lru(bo);
list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
*/
bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
{
- struct ttm_global *glob = &ttm_glob;
struct list_head removed;
bool empty;
INIT_LIST_HEAD(&removed);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
while (!list_empty(&bdev->ddestroy)) {
struct ttm_buffer_object *bo;
@@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
dma_resv_lock(bo->base.resv, NULL);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) {
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
}
ttm_bo_put(bo);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
}
list_splice_tail(&removed, &bdev->ddestroy);
empty = list_empty(&bdev->ddestroy);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
return empty;
}
@@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref)
ttm_bo_flush_all_fences(bo);
bo->deleted = true;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
/*
* Make pinned bos immediately available to
@@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref);
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
return;
}
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_del_from_lru(bo);
list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
dma_resv_unlock(bo->base.resv);
@@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
unsigned i;
int ret;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &man->lru[i], lru) {
bool busy;
@@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
if (!bo) {
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
busy_bo = NULL;
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
if (busy_bo)
ttm_bo_put(busy_bo);
@@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
return ret;
}
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx);
if (locked)
@@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
mem->mem_type = place->mem_type;
mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
-
+ spin_unlock(&bo->bdev->lru_lock);
return 0;
}
@@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
{
- struct ttm_global *glob = &ttm_glob;
bool locked;
int ret;
@@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo);
/* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
/*
* Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 2096a0fd9c35..85f6975d9872 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -81,7 +81,6 @@ static int ttm_global_init(void)
ttm_pool_mgr_init(num_pages * 50 / 100);
ttm_tt_mgr_init();
- spin_lock_init(&glob->lru_lock);
glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
{
- struct ttm_global *glob = &ttm_glob;
struct ttm_resource_manager *man;
struct ttm_buffer_object *bo;
unsigned i, j;
int ret;
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
man = ttm_manager_type(bdev, i);
if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
}
}
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
return 0;
}
EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager;
INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
+ spin_lock_init(&bdev->lru_lock);
INIT_LIST_HEAD(&bdev->ddestroy);
bdev->dev_mapping = mapping;
mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev)
{
- struct ttm_global *glob = &ttm_glob;
struct ttm_resource_manager *man;
unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
if (ttm_bo_delayed_delete(bdev, true))
pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
if (list_empty(&man->lru[0]))
pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool);
ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 690ab97d52b7..071c48d672c6 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
if (list_empty(list))
return;
- spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
if (ticket)
ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
if (list_empty(list))
return;
- spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
dma_resv_add_shared_fence(bo->base.resv, fence);
else
dma_resv_add_excl_fence(bo->base.resv, fence);
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
if (ticket)
ww_acquire_fini(ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index ed1672a9f332..04f2eef653ab 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
.no_wait_gpu = false,
.force_alloc = true
};
- struct ttm_global *glob = &ttm_glob;
struct dma_fence *fence;
int ret;
unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
* Can't use standard list traversal since we're unlocking.
*/
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
while (!list_empty(&man->lru[i])) {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
NULL);
if (ret)
return ret;
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
}
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock);
fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d007feef7676..dbccac957f8f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
static inline void
ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
{
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
}
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index cda6efb4c34b..bae56d29e8ff 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -56,7 +56,6 @@ extern struct ttm_global {
*/
struct page *dummy_read_page;
- spinlock_t lru_lock;
/**
* Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device {
struct ttm_pool pool;
/*
- * Protected by the global:lru lock.
+ * Protection for the per manager LRU and ddestroy lists.
*/
+ spinlock_t lru_lock;
struct list_head ddestroy;
/*
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
@ 2021-03-19 4:32 ` Huang Rui
2021-03-19 12:10 ` Christian König
0 siblings, 1 reply; 28+ messages in thread
From: Huang Rui @ 2021-03-19 4:32 UTC (permalink / raw)
To: Christian König; +Cc: matthew.william.auld, dri-devel
On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
> Instead of having a global lock.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
> drivers/gpu/drm/qxl/qxl_release.c | 5 +--
> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
> drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
> include/drm/ttm/ttm_bo_driver.h | 4 +--
> include/drm/ttm/ttm_device.h | 4 +--
> 8 files changed, 43 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9d19078246c8..ae18c0e32347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> struct amdgpu_vm_bo_base *bo_base;
>
> if (vm->bulk_moveable) {
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&adev->mman.bdev.lru_lock);
Could you please explain why do you want to instead of the global lock?
Thanks,
Ray
> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&adev->mman.bdev.lru_lock);
> return;
> }
>
> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&adev->mman.bdev.lru_lock);
> list_for_each_entry(bo_base, &vm->idle, vm_status) {
> struct amdgpu_bo *bo = bo_base->bo;
>
> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> &bo->shadow->tbo.mem,
> &vm->lru_bulk_move);
> }
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&adev->mman.bdev.lru_lock);
>
> vm->bulk_moveable = true;
> }
> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> index f5845c96d414..b19f2f00b215 100644
> --- a/drivers/gpu/drm/qxl/qxl_release.c
> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
> release->id | 0xf0000000, release->base.seqno);
> trace_dma_fence_emit(&release->base);
>
> - spin_lock(&ttm_glob.lru_lock);
> -
> list_for_each_entry(entry, &release->bos, head) {
> bo = entry->bo;
>
> dma_resv_add_shared_fence(bo->base.resv, &release->base);
> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> + ttm_bo_move_to_lru_tail_unlocked(bo);
> dma_resv_unlock(bo->base.resv);
> }
> - spin_unlock(&ttm_glob.lru_lock);
> ww_acquire_fini(&release->ticket);
> }
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 3673157527ff..2d2ac532987e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
> * reference it any more. The only tricky case is the trylock on
> * the resv object while holding the lru_lock.
> */
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> bo->base.resv = &bo->base._resv;
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> }
>
> return r;
> @@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>
> if (unlock_resv)
> dma_resv_unlock(bo->base.resv);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
> 30 * HZ);
> @@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> else if (lret == 0)
> return -EBUSY;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
> /*
> * We raced, and lost, someone else holds the reservation now,
> @@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> * delayed destruction would succeed, so just return success
> * here.
> */
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> return 0;
> }
> ret = 0;
> @@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> if (ret || unlikely(list_empty(&bo->ddestroy))) {
> if (unlock_resv)
> dma_resv_unlock(bo->base.resv);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> return ret;
> }
>
> ttm_bo_del_from_lru(bo);
> list_del_init(&bo->ddestroy);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> ttm_bo_cleanup_memtype_use(bo);
>
> if (unlock_resv)
> @@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> */
> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> {
> - struct ttm_global *glob = &ttm_glob;
> struct list_head removed;
> bool empty;
>
> INIT_LIST_HEAD(&removed);
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> while (!list_empty(&bdev->ddestroy)) {
> struct ttm_buffer_object *bo;
>
> @@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> continue;
>
> if (remove_all || bo->base.resv != &bo->base._resv) {
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> dma_resv_lock(bo->base.resv, NULL);
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>
> } else if (dma_resv_trylock(bo->base.resv)) {
> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> } else {
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> }
>
> ttm_bo_put(bo);
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> }
> list_splice_tail(&removed, &bdev->ddestroy);
> empty = list_empty(&bdev->ddestroy);
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
>
> return empty;
> }
> @@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref)
> ttm_bo_flush_all_fences(bo);
> bo->deleted = true;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
>
> /*
> * Make pinned bos immediately available to
> @@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
>
> kref_init(&bo->kref);
> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> schedule_delayed_work(&bdev->wq,
> ((HZ / 100) < 1) ? 1 : HZ / 100);
> return;
> }
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> ttm_bo_del_from_lru(bo);
> list_del(&bo->ddestroy);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> ttm_bo_cleanup_memtype_use(bo);
> dma_resv_unlock(bo->base.resv);
> @@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> unsigned i;
> int ret;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> list_for_each_entry(bo, &man->lru[i], lru) {
> bool busy;
> @@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> if (!bo) {
> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
> busy_bo = NULL;
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
> if (busy_bo)
> ttm_bo_put(busy_bo);
> @@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> return ret;
> }
>
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> ret = ttm_bo_evict(bo, ctx);
> if (locked)
> @@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
> mem->mem_type = place->mem_type;
> mem->placement = place->flags;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> ttm_bo_move_to_lru_tail(bo, mem, NULL);
> - spin_unlock(&ttm_glob.lru_lock);
> -
> + spin_unlock(&bo->bdev->lru_lock);
> return 0;
> }
>
> @@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> gfp_t gfp_flags)
> {
> - struct ttm_global *glob = &ttm_glob;
> bool locked;
> int ret;
>
> @@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>
> ttm_bo_del_from_lru(bo);
> /* TODO: Cleanup the locking */
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> /*
> * Move to system cached
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index 2096a0fd9c35..85f6975d9872 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
> ttm_pool_mgr_init(num_pages * 50 / 100);
> ttm_tt_mgr_init();
>
> - spin_lock_init(&glob->lru_lock);
> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>
> if (unlikely(glob->dummy_read_page == NULL)) {
> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> gfp_t gfp_flags)
> {
> - struct ttm_global *glob = &ttm_glob;
> struct ttm_resource_manager *man;
> struct ttm_buffer_object *bo;
> unsigned i, j;
> int ret;
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> man = ttm_manager_type(bdev, i);
> if (!man || !man->use_tt)
> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> }
> }
> }
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> return 0;
> }
> EXPORT_SYMBOL(ttm_device_swapout);
> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>
> bdev->vma_manager = vma_manager;
> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> + spin_lock_init(&bdev->lru_lock);
> INIT_LIST_HEAD(&bdev->ddestroy);
> bdev->dev_mapping = mapping;
> mutex_lock(&ttm_global_mutex);
> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>
> void ttm_device_fini(struct ttm_device *bdev)
> {
> - struct ttm_global *glob = &ttm_glob;
> struct ttm_resource_manager *man;
> unsigned i;
>
> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
> if (ttm_bo_delayed_delete(bdev, true))
> pr_debug("Delayed destroy list was clean\n");
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> if (list_empty(&man->lru[0]))
> pr_debug("Swap list %d was clean\n", i);
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
>
> ttm_pool_fini(&bdev->pool);
> ttm_global_release();
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 690ab97d52b7..071c48d672c6 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
> if (list_empty(list))
> return;
>
> - spin_lock(&ttm_glob.lru_lock);
> list_for_each_entry(entry, list, head) {
> struct ttm_buffer_object *bo = entry->bo;
>
> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> + ttm_bo_move_to_lru_tail_unlocked(bo);
> dma_resv_unlock(bo->base.resv);
> }
> - spin_unlock(&ttm_glob.lru_lock);
>
> if (ticket)
> ww_acquire_fini(ticket);
> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> if (list_empty(list))
> return;
>
> - spin_lock(&ttm_glob.lru_lock);
> list_for_each_entry(entry, list, head) {
> struct ttm_buffer_object *bo = entry->bo;
>
> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> dma_resv_add_shared_fence(bo->base.resv, fence);
> else
> dma_resv_add_excl_fence(bo->base.resv, fence);
> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> + ttm_bo_move_to_lru_tail_unlocked(bo);
> dma_resv_unlock(bo->base.resv);
> }
> - spin_unlock(&ttm_glob.lru_lock);
> if (ticket)
> ww_acquire_fini(ticket);
> }
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index ed1672a9f332..04f2eef653ab 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> .no_wait_gpu = false,
> .force_alloc = true
> };
> - struct ttm_global *glob = &ttm_glob;
> struct dma_fence *fence;
> int ret;
> unsigned i;
> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> * Can't use standard list traversal since we're unlocking.
> */
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> while (!list_empty(&man->lru[i])) {
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
> NULL);
> if (ret)
> return ret;
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> }
> }
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
>
> spin_lock(&man->move_lock);
> fence = dma_fence_get(man->move);
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index d007feef7676..dbccac957f8f 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
> static inline void
> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
> {
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> }
>
> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index cda6efb4c34b..bae56d29e8ff 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -56,7 +56,6 @@ extern struct ttm_global {
> */
>
> struct page *dummy_read_page;
> - spinlock_t lru_lock;
>
> /**
> * Protected by ttm_global_mutex.
> @@ -277,8 +276,9 @@ struct ttm_device {
> struct ttm_pool pool;
>
> /*
> - * Protected by the global:lru lock.
> + * Protection for the per manager LRU and ddestroy lists.
> */
> + spinlock_t lru_lock;
> struct list_head ddestroy;
>
> /*
> --
> 2.25.1
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-19 4:32 ` Huang Rui
@ 2021-03-19 12:10 ` Christian König
2021-03-22 7:13 ` Huang Rui
0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2021-03-19 12:10 UTC (permalink / raw)
To: Huang Rui; +Cc: matthew.william.auld, dri-devel
Am 19.03.21 um 05:32 schrieb Huang Rui:
> On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
>> Instead of having a global lock.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
>> drivers/gpu/drm/qxl/qxl_release.c | 5 +--
>> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
>> drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
>> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
>> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
>> include/drm/ttm/ttm_bo_driver.h | 4 +--
>> include/drm/ttm/ttm_device.h | 4 +--
>> 8 files changed, 43 insertions(+), 56 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 9d19078246c8..ae18c0e32347 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>> struct amdgpu_vm_bo_base *bo_base;
>>
>> if (vm->bulk_moveable) {
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&adev->mman.bdev.lru_lock);
> Could you please explain why do you want to instead of the global lock?
Potentially less contention.
But you are right mentioning this in the commit message is a good idea.
Regards,
Christian.
>
> Thanks,
> Ray
>
>> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&adev->mman.bdev.lru_lock);
>> return;
>> }
>>
>> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&adev->mman.bdev.lru_lock);
>> list_for_each_entry(bo_base, &vm->idle, vm_status) {
>> struct amdgpu_bo *bo = bo_base->bo;
>>
>> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>> &bo->shadow->tbo.mem,
>> &vm->lru_bulk_move);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&adev->mman.bdev.lru_lock);
>>
>> vm->bulk_moveable = true;
>> }
>> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
>> index f5845c96d414..b19f2f00b215 100644
>> --- a/drivers/gpu/drm/qxl/qxl_release.c
>> +++ b/drivers/gpu/drm/qxl/qxl_release.c
>> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>> release->id | 0xf0000000, release->base.seqno);
>> trace_dma_fence_emit(&release->base);
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> -
>> list_for_each_entry(entry, &release->bos, head) {
>> bo = entry->bo;
>>
>> dma_resv_add_shared_fence(bo->base.resv, &release->base);
>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>> dma_resv_unlock(bo->base.resv);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>> ww_acquire_fini(&release->ticket);
>> }
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 3673157527ff..2d2ac532987e 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>> * reference it any more. The only tricky case is the trylock on
>> * the resv object while holding the lru_lock.
>> */
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> bo->base.resv = &bo->base._resv;
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> }
>>
>> return r;
>> @@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>
>> if (unlock_resv)
>> dma_resv_unlock(bo->base.resv);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>> 30 * HZ);
>> @@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> else if (lret == 0)
>> return -EBUSY;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>> /*
>> * We raced, and lost, someone else holds the reservation now,
>> @@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> * delayed destruction would succeed, so just return success
>> * here.
>> */
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> return 0;
>> }
>> ret = 0;
>> @@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> if (ret || unlikely(list_empty(&bo->ddestroy))) {
>> if (unlock_resv)
>> dma_resv_unlock(bo->base.resv);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> return ret;
>> }
>>
>> ttm_bo_del_from_lru(bo);
>> list_del_init(&bo->ddestroy);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> ttm_bo_cleanup_memtype_use(bo);
>>
>> if (unlock_resv)
>> @@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> */
>> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> struct list_head removed;
>> bool empty;
>>
>> INIT_LIST_HEAD(&removed);
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> while (!list_empty(&bdev->ddestroy)) {
>> struct ttm_buffer_object *bo;
>>
>> @@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>> continue;
>>
>> if (remove_all || bo->base.resv != &bo->base._resv) {
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> dma_resv_lock(bo->base.resv, NULL);
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>
>> } else if (dma_resv_trylock(bo->base.resv)) {
>> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>> } else {
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> }
>>
>> ttm_bo_put(bo);
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> }
>> list_splice_tail(&removed, &bdev->ddestroy);
>> empty = list_empty(&bdev->ddestroy);
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>>
>> return empty;
>> }
>> @@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref)
>> ttm_bo_flush_all_fences(bo);
>> bo->deleted = true;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>>
>> /*
>> * Make pinned bos immediately available to
>> @@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
>>
>> kref_init(&bo->kref);
>> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> schedule_delayed_work(&bdev->wq,
>> ((HZ / 100) < 1) ? 1 : HZ / 100);
>> return;
>> }
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> ttm_bo_del_from_lru(bo);
>> list_del(&bo->ddestroy);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> ttm_bo_cleanup_memtype_use(bo);
>> dma_resv_unlock(bo->base.resv);
>> @@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>> unsigned i;
>> int ret;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> list_for_each_entry(bo, &man->lru[i], lru) {
>> bool busy;
>> @@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>> if (!bo) {
>> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
>> busy_bo = NULL;
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
>> if (busy_bo)
>> ttm_bo_put(busy_bo);
>> @@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>> return ret;
>> }
>>
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> ret = ttm_bo_evict(bo, ctx);
>> if (locked)
>> @@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
>> mem->mem_type = place->mem_type;
>> mem->placement = place->flags;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> ttm_bo_move_to_lru_tail(bo, mem, NULL);
>> - spin_unlock(&ttm_glob.lru_lock);
>> -
>> + spin_unlock(&bo->bdev->lru_lock);
>> return 0;
>> }
>>
>> @@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
>> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>> gfp_t gfp_flags)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> bool locked;
>> int ret;
>>
>> @@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>
>> ttm_bo_del_from_lru(bo);
>> /* TODO: Cleanup the locking */
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> /*
>> * Move to system cached
>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
>> index 2096a0fd9c35..85f6975d9872 100644
>> --- a/drivers/gpu/drm/ttm/ttm_device.c
>> +++ b/drivers/gpu/drm/ttm/ttm_device.c
>> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
>> ttm_pool_mgr_init(num_pages * 50 / 100);
>> ttm_tt_mgr_init();
>>
>> - spin_lock_init(&glob->lru_lock);
>> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>>
>> if (unlikely(glob->dummy_read_page == NULL)) {
>> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
>> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>> gfp_t gfp_flags)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> struct ttm_resource_manager *man;
>> struct ttm_buffer_object *bo;
>> unsigned i, j;
>> int ret;
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>> man = ttm_manager_type(bdev, i);
>> if (!man || !man->use_tt)
>> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>> }
>> }
>> }
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> return 0;
>> }
>> EXPORT_SYMBOL(ttm_device_swapout);
>> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>>
>> bdev->vma_manager = vma_manager;
>> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
>> + spin_lock_init(&bdev->lru_lock);
>> INIT_LIST_HEAD(&bdev->ddestroy);
>> bdev->dev_mapping = mapping;
>> mutex_lock(&ttm_global_mutex);
>> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>>
>> void ttm_device_fini(struct ttm_device *bdev)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> struct ttm_resource_manager *man;
>> unsigned i;
>>
>> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
>> if (ttm_bo_delayed_delete(bdev, true))
>> pr_debug("Delayed destroy list was clean\n");
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>> if (list_empty(&man->lru[0]))
>> pr_debug("Swap list %d was clean\n", i);
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>>
>> ttm_pool_fini(&bdev->pool);
>> ttm_global_release();
>> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> index 690ab97d52b7..071c48d672c6 100644
>> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
>> if (list_empty(list))
>> return;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> list_for_each_entry(entry, list, head) {
>> struct ttm_buffer_object *bo = entry->bo;
>>
>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>> dma_resv_unlock(bo->base.resv);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>>
>> if (ticket)
>> ww_acquire_fini(ticket);
>> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>> if (list_empty(list))
>> return;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> list_for_each_entry(entry, list, head) {
>> struct ttm_buffer_object *bo = entry->bo;
>>
>> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>> dma_resv_add_shared_fence(bo->base.resv, fence);
>> else
>> dma_resv_add_excl_fence(bo->base.resv, fence);
>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>> dma_resv_unlock(bo->base.resv);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>> if (ticket)
>> ww_acquire_fini(ticket);
>> }
>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
>> index ed1672a9f332..04f2eef653ab 100644
>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>> .no_wait_gpu = false,
>> .force_alloc = true
>> };
>> - struct ttm_global *glob = &ttm_glob;
>> struct dma_fence *fence;
>> int ret;
>> unsigned i;
>> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>> * Can't use standard list traversal since we're unlocking.
>> */
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> while (!list_empty(&man->lru[i])) {
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
>> NULL);
>> if (ret)
>> return ret;
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> }
>> }
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>>
>> spin_lock(&man->move_lock);
>> fence = dma_fence_get(man->move);
>> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
>> index d007feef7676..dbccac957f8f 100644
>> --- a/include/drm/ttm/ttm_bo_driver.h
>> +++ b/include/drm/ttm/ttm_bo_driver.h
>> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
>> static inline void
>> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>> {
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> }
>>
>> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
>> index cda6efb4c34b..bae56d29e8ff 100644
>> --- a/include/drm/ttm/ttm_device.h
>> +++ b/include/drm/ttm/ttm_device.h
>> @@ -56,7 +56,6 @@ extern struct ttm_global {
>> */
>>
>> struct page *dummy_read_page;
>> - spinlock_t lru_lock;
>>
>> /**
>> * Protected by ttm_global_mutex.
>> @@ -277,8 +276,9 @@ struct ttm_device {
>> struct ttm_pool pool;
>>
>> /*
>> - * Protected by the global:lru lock.
>> + * Protection for the per manager LRU and ddestroy lists.
>> */
>> + spinlock_t lru_lock;
>> struct list_head ddestroy;
>>
>> /*
>> --
>> 2.25.1
>>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-19 12:10 ` Christian König
@ 2021-03-22 7:13 ` Huang Rui
0 siblings, 0 replies; 28+ messages in thread
From: Huang Rui @ 2021-03-22 7:13 UTC (permalink / raw)
To: Christian König; +Cc: matthew.william.auld, dri-devel
On Fri, Mar 19, 2021 at 08:10:21PM +0800, Christian König wrote:
> Am 19.03.21 um 05:32 schrieb Huang Rui:
> > On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
> >> Instead of having a global lock.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> ---
> >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
> >> drivers/gpu/drm/qxl/qxl_release.c | 5 +--
> >> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
> >> drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
> >> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
> >> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
> >> include/drm/ttm/ttm_bo_driver.h | 4 +--
> >> include/drm/ttm/ttm_device.h | 4 +--
> >> 8 files changed, 43 insertions(+), 56 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> index 9d19078246c8..ae18c0e32347 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >> struct amdgpu_vm_bo_base *bo_base;
> >>
> >> if (vm->bulk_moveable) {
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&adev->mman.bdev.lru_lock);
> > Could you please explain why do you want to instead of the global lock?
>
> Potentially less contention.
>
> But you are right mentioning this in the commit message is a good idea.
>
Thanks.
Patch is Acked-by: Huang Rui <ray.huang@amd.com>
> Regards,
> Christian.
>
> >
> > Thanks,
> > Ray
> >
> >> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&adev->mman.bdev.lru_lock);
> >> return;
> >> }
> >>
> >> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&adev->mman.bdev.lru_lock);
> >> list_for_each_entry(bo_base, &vm->idle, vm_status) {
> >> struct amdgpu_bo *bo = bo_base->bo;
> >>
> >> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >> &bo->shadow->tbo.mem,
> >> &vm->lru_bulk_move);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&adev->mman.bdev.lru_lock);
> >>
> >> vm->bulk_moveable = true;
> >> }
> >> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> >> index f5845c96d414..b19f2f00b215 100644
> >> --- a/drivers/gpu/drm/qxl/qxl_release.c
> >> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> >> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
> >> release->id | 0xf0000000, release->base.seqno);
> >> trace_dma_fence_emit(&release->base);
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> -
> >> list_for_each_entry(entry, &release->bos, head) {
> >> bo = entry->bo;
> >>
> >> dma_resv_add_shared_fence(bo->base.resv, &release->base);
> >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> + ttm_bo_move_to_lru_tail_unlocked(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> ww_acquire_fini(&release->ticket);
> >> }
> >>
> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >> index 3673157527ff..2d2ac532987e 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >> @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
> >> * reference it any more. The only tricky case is the trylock on
> >> * the resv object while holding the lru_lock.
> >> */
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> bo->base.resv = &bo->base._resv;
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> }
> >>
> >> return r;
> >> @@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>
> >> if (unlock_resv)
> >> dma_resv_unlock(bo->base.resv);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
> >> 30 * HZ);
> >> @@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> else if (lret == 0)
> >> return -EBUSY;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
> >> /*
> >> * We raced, and lost, someone else holds the reservation now,
> >> @@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> * delayed destruction would succeed, so just return success
> >> * here.
> >> */
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> return 0;
> >> }
> >> ret = 0;
> >> @@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> if (ret || unlikely(list_empty(&bo->ddestroy))) {
> >> if (unlock_resv)
> >> dma_resv_unlock(bo->base.resv);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> return ret;
> >> }
> >>
> >> ttm_bo_del_from_lru(bo);
> >> list_del_init(&bo->ddestroy);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> ttm_bo_cleanup_memtype_use(bo);
> >>
> >> if (unlock_resv)
> >> @@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> */
> >> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct list_head removed;
> >> bool empty;
> >>
> >> INIT_LIST_HEAD(&removed);
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> while (!list_empty(&bdev->ddestroy)) {
> >> struct ttm_buffer_object *bo;
> >>
> >> @@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> >> continue;
> >>
> >> if (remove_all || bo->base.resv != &bo->base._resv) {
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> dma_resv_lock(bo->base.resv, NULL);
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> >>
> >> } else if (dma_resv_trylock(bo->base.resv)) {
> >> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> >> } else {
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> }
> >>
> >> ttm_bo_put(bo);
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> }
> >> list_splice_tail(&removed, &bdev->ddestroy);
> >> empty = list_empty(&bdev->ddestroy);
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >>
> >> return empty;
> >> }
> >> @@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref)
> >> ttm_bo_flush_all_fences(bo);
> >> bo->deleted = true;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >>
> >> /*
> >> * Make pinned bos immediately available to
> >> @@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)
> >>
> >> kref_init(&bo->kref);
> >> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> schedule_delayed_work(&bdev->wq,
> >> ((HZ / 100) < 1) ? 1 : HZ / 100);
> >> return;
> >> }
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> ttm_bo_del_from_lru(bo);
> >> list_del(&bo->ddestroy);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> ttm_bo_cleanup_memtype_use(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> @@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >> unsigned i;
> >> int ret;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >> list_for_each_entry(bo, &man->lru[i], lru) {
> >> bool busy;
> >> @@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >> if (!bo) {
> >> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
> >> busy_bo = NULL;
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
> >> if (busy_bo)
> >> ttm_bo_put(busy_bo);
> >> @@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >> return ret;
> >> }
> >>
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> ret = ttm_bo_evict(bo, ctx);
> >> if (locked)
> >> @@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
> >> mem->mem_type = place->mem_type;
> >> mem->placement = place->flags;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> ttm_bo_move_to_lru_tail(bo, mem, NULL);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> -
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> return 0;
> >> }
> >>
> >> @@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
> >> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> >> gfp_t gfp_flags)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> bool locked;
> >> int ret;
> >>
> >> @@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> >>
> >> ttm_bo_del_from_lru(bo);
> >> /* TODO: Cleanup the locking */
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> /*
> >> * Move to system cached
> >> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> >> index 2096a0fd9c35..85f6975d9872 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_device.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> >> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
> >> ttm_pool_mgr_init(num_pages * 50 / 100);
> >> ttm_tt_mgr_init();
> >>
> >> - spin_lock_init(&glob->lru_lock);
> >> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
> >>
> >> if (unlikely(glob->dummy_read_page == NULL)) {
> >> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
> >> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >> gfp_t gfp_flags)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct ttm_resource_manager *man;
> >> struct ttm_buffer_object *bo;
> >> unsigned i, j;
> >> int ret;
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> >> man = ttm_manager_type(bdev, i);
> >> if (!man || !man->use_tt)
> >> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >> }
> >> }
> >> }
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> return 0;
> >> }
> >> EXPORT_SYMBOL(ttm_device_swapout);
> >> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
> >>
> >> bdev->vma_manager = vma_manager;
> >> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> >> + spin_lock_init(&bdev->lru_lock);
> >> INIT_LIST_HEAD(&bdev->ddestroy);
> >> bdev->dev_mapping = mapping;
> >> mutex_lock(&ttm_global_mutex);
> >> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
> >>
> >> void ttm_device_fini(struct ttm_device *bdev)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct ttm_resource_manager *man;
> >> unsigned i;
> >>
> >> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
> >> if (ttm_bo_delayed_delete(bdev, true))
> >> pr_debug("Delayed destroy list was clean\n");
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> >> if (list_empty(&man->lru[0]))
> >> pr_debug("Swap list %d was clean\n", i);
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >>
> >> ttm_pool_fini(&bdev->pool);
> >> ttm_global_release();
> >> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> index 690ab97d52b7..071c48d672c6 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
> >> if (list_empty(list))
> >> return;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> list_for_each_entry(entry, list, head) {
> >> struct ttm_buffer_object *bo = entry->bo;
> >>
> >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> + ttm_bo_move_to_lru_tail_unlocked(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >>
> >> if (ticket)
> >> ww_acquire_fini(ticket);
> >> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> >> if (list_empty(list))
> >> return;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> list_for_each_entry(entry, list, head) {
> >> struct ttm_buffer_object *bo = entry->bo;
> >>
> >> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> >> dma_resv_add_shared_fence(bo->base.resv, fence);
> >> else
> >> dma_resv_add_excl_fence(bo->base.resv, fence);
> >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> + ttm_bo_move_to_lru_tail_unlocked(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> if (ticket)
> >> ww_acquire_fini(ticket);
> >> }
> >> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> >> index ed1672a9f332..04f2eef653ab 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> >> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> >> .no_wait_gpu = false,
> >> .force_alloc = true
> >> };
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct dma_fence *fence;
> >> int ret;
> >> unsigned i;
> >> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> >> * Can't use standard list traversal since we're unlocking.
> >> */
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >> while (!list_empty(&man->lru[i])) {
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
> >> NULL);
> >> if (ret)
> >> return ret;
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> }
> >> }
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >>
> >> spin_lock(&man->move_lock);
> >> fence = dma_fence_get(man->move);
> >> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> >> index d007feef7676..dbccac957f8f 100644
> >> --- a/include/drm/ttm/ttm_bo_driver.h
> >> +++ b/include/drm/ttm/ttm_bo_driver.h
> >> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
> >> static inline void
> >> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
> >> {
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> }
> >>
> >> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
> >> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> >> index cda6efb4c34b..bae56d29e8ff 100644
> >> --- a/include/drm/ttm/ttm_device.h
> >> +++ b/include/drm/ttm/ttm_device.h
> >> @@ -56,7 +56,6 @@ extern struct ttm_global {
> >> */
> >>
> >> struct page *dummy_read_page;
> >> - spinlock_t lru_lock;
> >>
> >> /**
> >> * Protected by ttm_global_mutex.
> >> @@ -277,8 +276,9 @@ struct ttm_device {
> >> struct ttm_pool pool;
> >>
> >> /*
> >> - * Protected by the global:lru lock.
> >> + * Protection for the per manager LRU and ddestroy lists.
> >> */
> >> + spinlock_t lru_lock;
> >> struct list_head ddestroy;
> >>
> >> /*
> >> --
> >> 2.25.1
> >>
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
2021-03-18 12:47 ` [PATCH 2/3] drm/ttm: remove swap LRU v3 Christian König
2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
@ 2021-03-18 14:43 ` Nirmoy
2021-03-18 15:26 ` Christian König
2021-03-18 15:08 ` kernel test robot
` (3 subsequent siblings)
6 siblings, 1 reply; 28+ messages in thread
From: Nirmoy @ 2021-03-18 14:43 UTC (permalink / raw)
To: Christian König, dri-devel; +Cc: ray.huang, matthew.william.auld
Hi Christian,
On 3/18/21 1:47 PM, Christian König wrote:
<snip>
> /**
> * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index 035bbc044a3b..6a0b267d4fe6 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -297,6 +297,8 @@ struct ttm_device {
> struct delayed_work wq;
> };
>
> +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
There is a typo here, long -> int.
Thanks,
Nirmoy
> +
> static inline struct ttm_resource_manager *
> ttm_manager_type(struct ttm_device *bdev, int mem_type)
> {
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
2021-03-18 14:43 ` [PATCH 1/3] drm/ttm: move swapout logic around v2 Nirmoy
@ 2021-03-18 15:26 ` Christian König
2021-03-18 19:46 ` Nirmoy
0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2021-03-18 15:26 UTC (permalink / raw)
To: Nirmoy, dri-devel; +Cc: ray.huang, matthew.william.auld
Am 18.03.21 um 15:43 schrieb Nirmoy:
> Hi Christian,
>
> On 3/18/21 1:47 PM, Christian König wrote:
> <snip>
>> /**
>> * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
>> index 035bbc044a3b..6a0b267d4fe6 100644
>> --- a/include/drm/ttm/ttm_device.h
>> +++ b/include/drm/ttm/ttm_device.h
>> @@ -297,6 +297,8 @@ struct ttm_device {
>> struct delayed_work wq;
>> };
>> +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t
>> gfp_flags);
>
>
> There is a typo here, long -> int.
Ah, yes. I missed to add the change to the header file.
Thanks for pointing that out,
Christian.
>
>
> Thanks,
>
> Nirmoy
>
>
>> +
>> static inline struct ttm_resource_manager *
>> ttm_manager_type(struct ttm_device *bdev, int mem_type)
>> {
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
2021-03-18 15:26 ` Christian König
@ 2021-03-18 19:46 ` Nirmoy
0 siblings, 0 replies; 28+ messages in thread
From: Nirmoy @ 2021-03-18 19:46 UTC (permalink / raw)
To: Christian König, dri-devel; +Cc: ray.huang, matthew.william.auld
On 3/18/21 4:26 PM, Christian König wrote:
> Am 18.03.21 um 15:43 schrieb Nirmoy:
>> Hi Christian,
>>
>> On 3/18/21 1:47 PM, Christian König wrote:
>> <snip>
>>> /**
>>> * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
>>> diff --git a/include/drm/ttm/ttm_device.h
>>> b/include/drm/ttm/ttm_device.h
>>> index 035bbc044a3b..6a0b267d4fe6 100644
>>> --- a/include/drm/ttm/ttm_device.h
>>> +++ b/include/drm/ttm/ttm_device.h
>>> @@ -297,6 +297,8 @@ struct ttm_device {
>>> struct delayed_work wq;
>>> };
>>> +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t
>>> gfp_flags);
>>
>>
>> There is a typo here, long -> int.
>
> Ah, yes. I missed to add the change to the header file.
I have tested the patch series on qxl and vmwgfx.
With that typo fixed, the series is Tested-by: Nirmoy Das
<nirmoy.das@amd.com>
>
> Thanks for pointing that out,
> Christian.
>
>>
>>
>> Thanks,
>>
>> Nirmoy
>>
>>
>>> +
>>> static inline struct ttm_resource_manager *
>>> ttm_manager_type(struct ttm_device *bdev, int mem_type)
>>> {
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
@ 2021-03-18 15:08 ` kernel test robot
2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
` (5 subsequent siblings)
6 siblings, 0 replies; 28+ messages in thread
From: kernel test robot @ 2021-03-18 15:08 UTC (permalink / raw)
To: Christian König, dri-devel
Cc: matthew.william.auld, ray.huang, kbuild-all
[-- Attachment #1: Type: text/plain, Size: 4552 bytes --]
Hi "Christian,
I love your patch! Yet something to improve:
[auto build test ERROR on drm-tip/drm-tip]
[also build test ERROR on next-20210318]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-randconfig-m021-20210318 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
# save the attached .config to linux build tree
make W=1 ARCH=i386
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
109 | int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
| ^~~~~~~~~~~~~~~~~~
In file included from drivers/gpu/drm/ttm/ttm_device.c:32:
include/drm/ttm/ttm_device.h:300:6: note: previous declaration of 'ttm_global_swapout' was here
300 | long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
| ^~~~~~~~~~~~~~~~~~
In file included from include/linux/linkage.h:7,
from include/linux/kernel.h:7,
from include/asm-generic/bug.h:20,
from arch/x86/include/asm/bug.h:93,
from include/linux/bug.h:5,
from include/linux/mmdebug.h:5,
from include/linux/mm.h:9,
from drivers/gpu/drm/ttm/ttm_device.c:30:
drivers/gpu/drm/ttm/ttm_device.c:132:15: error: conflicting types for 'ttm_global_swapout'
132 | EXPORT_SYMBOL(ttm_global_swapout);
| ^~~~~~~~~~~~~~~~~~
include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
98 | extern typeof(sym) sym; \
| ^~~
include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
| ^~~~~~~~~~~~~~~
include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
158 | #define EXPORT_SYMBOL(sym) _EXPORT_SYMBOL(sym, "")
| ^~~~~~~~~~~~~~
drivers/gpu/drm/ttm/ttm_device.c:132:1: note: in expansion of macro 'EXPORT_SYMBOL'
132 | EXPORT_SYMBOL(ttm_global_swapout);
| ^~~~~~~~~~~~~
In file included from drivers/gpu/drm/ttm/ttm_device.c:32:
include/drm/ttm/ttm_device.h:300:6: note: previous declaration of 'ttm_global_swapout' was here
300 | long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
| ^~~~~~~~~~~~~~~~~~
vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c
104
105 /**
106 * A buffer object shrink method that tries to swap out the first
107 * buffer object on the global::swap_lru list.
108 */
> 109 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
110 {
111 struct ttm_global *glob = &ttm_glob;
112 struct ttm_buffer_object *bo;
113 unsigned i;
114 int ret;
115
116 spin_lock(&glob->lru_lock);
117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
118 list_for_each_entry(bo, &glob->swap_lru[i], swap) {
119 uint32_t num_pages = bo->ttm->num_pages;
120
121 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
122 /* ttm_bo_swapout has dropped the lru_lock */
123 if (!ret)
124 return num_pages;
125 if (ret != -EBUSY)
126 return ret;
127 }
128 }
129 spin_unlock(&glob->lru_lock);
130 return 0;
131 }
132 EXPORT_SYMBOL(ttm_global_swapout);
133
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 37450 bytes --]
[-- Attachment #3: Type: text/plain, Size: 160 bytes --]
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
@ 2021-03-18 15:08 ` kernel test robot
0 siblings, 0 replies; 28+ messages in thread
From: kernel test robot @ 2021-03-18 15:08 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 4654 bytes --]
Hi "Christian,
I love your patch! Yet something to improve:
[auto build test ERROR on drm-tip/drm-tip]
[also build test ERROR on next-20210318]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: i386-randconfig-m021-20210318 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
# save the attached .config to linux build tree
make W=1 ARCH=i386
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
109 | int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
| ^~~~~~~~~~~~~~~~~~
In file included from drivers/gpu/drm/ttm/ttm_device.c:32:
include/drm/ttm/ttm_device.h:300:6: note: previous declaration of 'ttm_global_swapout' was here
300 | long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
| ^~~~~~~~~~~~~~~~~~
In file included from include/linux/linkage.h:7,
from include/linux/kernel.h:7,
from include/asm-generic/bug.h:20,
from arch/x86/include/asm/bug.h:93,
from include/linux/bug.h:5,
from include/linux/mmdebug.h:5,
from include/linux/mm.h:9,
from drivers/gpu/drm/ttm/ttm_device.c:30:
drivers/gpu/drm/ttm/ttm_device.c:132:15: error: conflicting types for 'ttm_global_swapout'
132 | EXPORT_SYMBOL(ttm_global_swapout);
| ^~~~~~~~~~~~~~~~~~
include/linux/export.h:98:21: note: in definition of macro '___EXPORT_SYMBOL'
98 | extern typeof(sym) sym; \
| ^~~
include/linux/export.h:155:34: note: in expansion of macro '__EXPORT_SYMBOL'
155 | #define _EXPORT_SYMBOL(sym, sec) __EXPORT_SYMBOL(sym, sec, "")
| ^~~~~~~~~~~~~~~
include/linux/export.h:158:29: note: in expansion of macro '_EXPORT_SYMBOL'
158 | #define EXPORT_SYMBOL(sym) _EXPORT_SYMBOL(sym, "")
| ^~~~~~~~~~~~~~
drivers/gpu/drm/ttm/ttm_device.c:132:1: note: in expansion of macro 'EXPORT_SYMBOL'
132 | EXPORT_SYMBOL(ttm_global_swapout);
| ^~~~~~~~~~~~~
In file included from drivers/gpu/drm/ttm/ttm_device.c:32:
include/drm/ttm/ttm_device.h:300:6: note: previous declaration of 'ttm_global_swapout' was here
300 | long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
| ^~~~~~~~~~~~~~~~~~
vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c
104
105 /**
106 * A buffer object shrink method that tries to swap out the first
107 * buffer object on the global::swap_lru list.
108 */
> 109 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
110 {
111 struct ttm_global *glob = &ttm_glob;
112 struct ttm_buffer_object *bo;
113 unsigned i;
114 int ret;
115
116 spin_lock(&glob->lru_lock);
117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
118 list_for_each_entry(bo, &glob->swap_lru[i], swap) {
119 uint32_t num_pages = bo->ttm->num_pages;
120
121 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
122 /* ttm_bo_swapout has dropped the lru_lock */
123 if (!ret)
124 return num_pages;
125 if (ret != -EBUSY)
126 return ret;
127 }
128 }
129 spin_unlock(&glob->lru_lock);
130 return 0;
131 }
132 EXPORT_SYMBOL(ttm_global_swapout);
133
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 37450 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
@ 2021-03-18 18:13 ` kernel test robot
2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
` (5 subsequent siblings)
6 siblings, 0 replies; 28+ messages in thread
From: kernel test robot @ 2021-03-18 18:13 UTC (permalink / raw)
To: Christian König, dri-devel
Cc: clang-built-linux, ray.huang, kbuild-all, matthew.william.auld
[-- Attachment #1: Type: text/plain, Size: 3143 bytes --]
Hi "Christian,
I love your patch! Yet something to improve:
[auto build test ERROR on drm-tip/drm-tip]
[also build test ERROR on next-20210318]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next linus/master v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a005-20210318 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 6db3ab2903f42712f44000afb5aa467efbd25f35)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
^
include/drm/ttm/ttm_device.h:300:6: note: previous declaration is here
long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
^
1 error generated.
vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c
104
105 /**
106 * A buffer object shrink method that tries to swap out the first
107 * buffer object on the global::swap_lru list.
108 */
> 109 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
110 {
111 struct ttm_global *glob = &ttm_glob;
112 struct ttm_buffer_object *bo;
113 unsigned i;
114 int ret;
115
116 spin_lock(&glob->lru_lock);
117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
118 list_for_each_entry(bo, &glob->swap_lru[i], swap) {
119 uint32_t num_pages = bo->ttm->num_pages;
120
121 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
122 /* ttm_bo_swapout has dropped the lru_lock */
123 if (!ret)
124 return num_pages;
125 if (ret != -EBUSY)
126 return ret;
127 }
128 }
129 spin_unlock(&glob->lru_lock);
130 return 0;
131 }
132 EXPORT_SYMBOL(ttm_global_swapout);
133
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 36290 bytes --]
[-- Attachment #3: Type: text/plain, Size: 160 bytes --]
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
@ 2021-03-18 18:13 ` kernel test robot
0 siblings, 0 replies; 28+ messages in thread
From: kernel test robot @ 2021-03-18 18:13 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 3222 bytes --]
Hi "Christian,
I love your patch! Yet something to improve:
[auto build test ERROR on drm-tip/drm-tip]
[also build test ERROR on next-20210318]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next linus/master v5.12-rc3]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a005-20210318 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 6db3ab2903f42712f44000afb5aa467efbd25f35)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
^
include/drm/ttm/ttm_device.h:300:6: note: previous declaration is here
long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
^
1 error generated.
vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c
104
105 /**
106 * A buffer object shrink method that tries to swap out the first
107 * buffer object on the global::swap_lru list.
108 */
> 109 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
110 {
111 struct ttm_global *glob = &ttm_glob;
112 struct ttm_buffer_object *bo;
113 unsigned i;
114 int ret;
115
116 spin_lock(&glob->lru_lock);
117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
118 list_for_each_entry(bo, &glob->swap_lru[i], swap) {
119 uint32_t num_pages = bo->ttm->num_pages;
120
121 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
122 /* ttm_bo_swapout has dropped the lru_lock */
123 if (!ret)
124 return num_pages;
125 if (ret != -EBUSY)
126 return ret;
127 }
128 }
129 spin_unlock(&glob->lru_lock);
130 return 0;
131 }
132 EXPORT_SYMBOL(ttm_global_swapout);
133
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 36290 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
2021-03-18 18:13 ` kernel test robot
@ 2021-03-19 12:07 ` Christian König
-1 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-03-19 12:07 UTC (permalink / raw)
To: dri-devel, lkml
General question for the audience: Is there any way to silence the build
bot here?
This is a well known false positive.
Regards,
Christian.
Am 18.03.21 um 19:13 schrieb kernel test robot:
> Hi "Christian,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on drm-tip/drm-tip]
> [also build test ERROR on next-20210318]
> [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next linus/master v5.12-rc3]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
> base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
> config: x86_64-randconfig-a005-20210318 (attached as .config)
> compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 6db3ab2903f42712f44000afb5aa467efbd25f35)
> reproduce (this is a W=1 build):
> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # install x86_64 cross compiling tool for clang build
> # apt-get install binutils-x86-64-linux-gnu
> # https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
> git remote add linux-review https://github.com/0day-ci/linux
> git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
> git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
> # save the attached .config to linux build tree
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
>>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
> int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> ^
> include/drm/ttm/ttm_device.h:300:6: note: previous declaration is here
> long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
> ^
> 1 error generated.
>
>
> vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c
>
> 104
> 105 /**
> 106 * A buffer object shrink method that tries to swap out the first
> 107 * buffer object on the global::swap_lru list.
> 108 */
> > 109 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> 110 {
> 111 struct ttm_global *glob = &ttm_glob;
> 112 struct ttm_buffer_object *bo;
> 113 unsigned i;
> 114 int ret;
> 115
> 116 spin_lock(&glob->lru_lock);
> 117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> 118 list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> 119 uint32_t num_pages = bo->ttm->num_pages;
> 120
> 121 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> 122 /* ttm_bo_swapout has dropped the lru_lock */
> 123 if (!ret)
> 124 return num_pages;
> 125 if (ret != -EBUSY)
> 126 return ret;
> 127 }
> 128 }
> 129 spin_unlock(&glob->lru_lock);
> 130 return 0;
> 131 }
> 132 EXPORT_SYMBOL(ttm_global_swapout);
> 133
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
@ 2021-03-19 12:07 ` Christian König
0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-03-19 12:07 UTC (permalink / raw)
To: dri-devel, lkml
General question for the audience: Is there any way to silence the build
bot here?
This is a well known false positive.
Regards,
Christian.
Am 18.03.21 um 19:13 schrieb kernel test robot:
> Hi "Christian,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on drm-tip/drm-tip]
> [also build test ERROR on next-20210318]
> [cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next linus/master v5.12-rc3]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
> base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
> config: x86_64-randconfig-a005-20210318 (attached as .config)
> compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 6db3ab2903f42712f44000afb5aa467efbd25f35)
> reproduce (this is a W=1 build):
> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # install x86_64 cross compiling tool for clang build
> # apt-get install binutils-x86-64-linux-gnu
> # https://github.com/0day-ci/linux/commit/a454d56ea061b53d24a62a700743e4508dd6c9b1
> git remote add linux-review https://github.com/0day-ci/linux
> git fetch --no-tags linux-review Christian-K-nig/drm-ttm-move-swapout-logic-around-v2/20210318-204848
> git checkout a454d56ea061b53d24a62a700743e4508dd6c9b1
> # save the attached .config to linux build tree
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
>>> drivers/gpu/drm/ttm/ttm_device.c:109:5: error: conflicting types for 'ttm_global_swapout'
> int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> ^
> include/drm/ttm/ttm_device.h:300:6: note: previous declaration is here
> long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
> ^
> 1 error generated.
>
>
> vim +/ttm_global_swapout +109 drivers/gpu/drm/ttm/ttm_device.c
>
> 104
> 105 /**
> 106 * A buffer object shrink method that tries to swap out the first
> 107 * buffer object on the global::swap_lru list.
> 108 */
> > 109 int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> 110 {
> 111 struct ttm_global *glob = &ttm_glob;
> 112 struct ttm_buffer_object *bo;
> 113 unsigned i;
> 114 int ret;
> 115
> 116 spin_lock(&glob->lru_lock);
> 117 for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> 118 list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> 119 uint32_t num_pages = bo->ttm->num_pages;
> 120
> 121 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> 122 /* ttm_bo_swapout has dropped the lru_lock */
> 123 if (!ret)
> 124 return num_pages;
> 125 if (ret != -EBUSY)
> 126 return ret;
> 127 }
> 128 }
> 129 spin_unlock(&glob->lru_lock);
> 130 return 0;
> 131 }
> 132 EXPORT_SYMBOL(ttm_global_swapout);
> 133
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
` (4 preceding siblings ...)
2021-03-18 18:13 ` kernel test robot
@ 2021-03-19 4:28 ` Huang Rui
2021-03-19 9:41 ` Matthew Auld
6 siblings, 0 replies; 28+ messages in thread
From: Huang Rui @ 2021-03-19 4:28 UTC (permalink / raw)
To: Christian K�nig; +Cc: matthew.william.auld, dri-devel
On Thu, Mar 18, 2021 at 08:47:17PM +0800, Christian K�nig wrote:
> Move the iteration of the global lru into the new function
> ttm_global_swapout() and use that instead in drivers.
>
> v2: consistently return int
>
> Signed-off-by: Christian K?nig <christian.koenig@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
> ---
> drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++---------------------
> drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++
> drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
> drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +-
> drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
> include/drm/ttm/ttm_bo_api.h | 3 +-
> include/drm/ttm/ttm_device.h | 2 +
> 7 files changed, 53 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 3c23e863a3f0..66e00b404ec3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1193,56 +1193,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
> }
> EXPORT_SYMBOL(ttm_bo_wait);
>
> -/*
> - * A buffer object shrink method that tries to swap out the first
> - * buffer object on the bo_global::swap_lru list.
> - */
> -int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> + gfp_t gfp_flags)
> {
> struct ttm_global *glob = &ttm_glob;
> - struct ttm_buffer_object *bo;
> - int ret = -EBUSY;
> bool locked;
> - unsigned i;
> -
> - spin_lock(&glob->lru_lock);
> - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> - list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> - if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> - NULL))
> - continue;
> -
> - if (!ttm_bo_get_unless_zero(bo)) {
> - if (locked)
> - dma_resv_unlock(bo->base.resv);
> - continue;
> - }
> + int ret;
>
> - ret = 0;
> - break;
> - }
> - if (!ret)
> - break;
> - }
> + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
> + return -EBUSY;
>
> - if (ret) {
> - spin_unlock(&glob->lru_lock);
> - return ret;
> + if (!ttm_bo_get_unless_zero(bo)) {
> + if (locked)
> + dma_resv_unlock(bo->base.resv);
> + return -EBUSY;
> }
>
> if (bo->deleted) {
> - ret = ttm_bo_cleanup_refs(bo, false, false, locked);
> + ttm_bo_cleanup_refs(bo, false, false, locked);
> ttm_bo_put(bo);
> - return ret;
> + return 0;
> }
>
> ttm_bo_del_from_lru(bo);
> + /* TODO: Cleanup the locking */
> spin_unlock(&glob->lru_lock);
>
> - /**
> + /*
> * Move to system cached
> */
> -
> if (bo->mem.mem_type != TTM_PL_SYSTEM) {
> struct ttm_operation_ctx ctx = { false, false };
> struct ttm_resource evict_mem;
> @@ -1262,29 +1241,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> }
> }
>
> - /**
> + /*
> * Make sure BO is idle.
> */
> -
> ret = ttm_bo_wait(bo, false, false);
> if (unlikely(ret != 0))
> goto out;
>
> ttm_bo_unmap_virtual(bo);
>
> - /**
> + /*
> * Swap out. Buffer will be swapped in again as soon as
> * anyone tries to access a ttm page.
> */
> -
> if (bo->bdev->funcs->swap_notify)
> bo->bdev->funcs->swap_notify(bo);
>
> ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
> out:
>
> - /**
> - *
> + /*
> * Unreserve without putting on LRU to avoid swapping out an
> * already swapped buffer.
> */
> @@ -1293,7 +1269,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> ttm_bo_put(bo);
> return ret;
> }
> -EXPORT_SYMBOL(ttm_bo_swapout);
>
> void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
> {
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index 95e1b7b1f2e6..b1424189fdfb 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -102,6 +102,35 @@ static int ttm_global_init(void)
> return ret;
> }
>
> +/**
> + * A buffer object shrink method that tries to swap out the first
> + * buffer object on the global::swap_lru list.
> + */
> +int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> +{
> + struct ttm_global *glob = &ttm_glob;
> + struct ttm_buffer_object *bo;
> + unsigned i;
> + int ret;
> +
> + spin_lock(&glob->lru_lock);
> + for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> + list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> + uint32_t num_pages = bo->ttm->num_pages;
> +
> + ret = ttm_bo_swapout(bo, ctx, gfp_flags);
> + /* ttm_bo_swapout has dropped the lru_lock */
> + if (!ret)
> + return num_pages;
> + if (ret != -EBUSY)
> + return ret;
> + }
> + }
> + spin_unlock(&glob->lru_lock);
> + return 0;
> +}
> +EXPORT_SYMBOL(ttm_global_swapout);
> +
> static void ttm_init_sysman(struct ttm_device *bdev)
> {
> struct ttm_resource_manager *man = &bdev->sysman;
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> index 2f0833c98d2c..95b5cff25f4c 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
> };
> int ret;
>
> - ret = ttm_bo_swapout(&ctx, GFP_NOFS);
> + ret = ttm_global_swapout(&ctx, GFP_NOFS);
> return ret < 0 ? SHRINK_EMPTY : ret;
> }
>
> diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
> index e972af07d029..104b95a8c7a2 100644
> --- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
> +++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
> @@ -38,6 +38,7 @@
>
> #include <drm/drm_device.h>
> #include <drm/drm_file.h>
> +#include <drm/ttm/ttm_device.h>
>
> #include "ttm_memory.h"
>
> @@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
>
> while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
> spin_unlock(&glob->lock);
> - ret = ttm_bo_swapout(ctx, GFP_KERNEL);
> + ret = ttm_global_swapout(ctx, GFP_KERNEL);
> spin_lock(&glob->lock);
> if (unlikely(ret < 0))
> break;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index 6910111099c8..b991422e156c 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
> vmw_execbuf_release_pinned_bo(dev_priv);
> vmw_resource_evict_all(dev_priv);
> vmw_release_device_early(dev_priv);
> - while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
> + while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
> if (dev_priv->enable_fb)
> vmw_fifo_resource_dec(dev_priv);
> if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 4fb523dfab32..5044ac330858 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
> const char __user *wbuf, char __user *rbuf,
> size_t count, loff_t *f_pos, bool write);
>
> -int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
> +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> + gfp_t gfp_flags);
>
> /**
> * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index 035bbc044a3b..6a0b267d4fe6 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -297,6 +297,8 @@ struct ttm_device {
> struct delayed_work wq;
> };
>
> +long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
> +
> static inline struct ttm_resource_manager *
> ttm_manager_type(struct ttm_device *bdev, int mem_type)
> {
> --
> 2.25.1
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-devel&data=04%7C01%7Cray.huang%40amd.com%7C5e884dc7218341f5405a08d8ea0bfab3%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637516684478741238%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=brmyK0mL95w5aG52hDFdMDw1CymMDnuIRTQK64roKKk%3D&reserved=0
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 1/3] drm/ttm: move swapout logic around v2
2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
` (5 preceding siblings ...)
2021-03-19 4:28 ` Huang Rui
@ 2021-03-19 9:41 ` Matthew Auld
6 siblings, 0 replies; 28+ messages in thread
From: Matthew Auld @ 2021-03-19 9:41 UTC (permalink / raw)
To: Christian König; +Cc: ray.huang, ML dri-devel
On Thu, 18 Mar 2021 at 12:47, Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Move the iteration of the global lru into the new function
> ttm_global_swapout() and use that instead in drivers.
>
> v2: consistently return int
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
For the series,
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-03-15 16:04 Christian König
2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2021-03-15 16:04 UTC (permalink / raw)
To: dri-devel; +Cc: ray.huang
Move the iteration of the global lru into the new function
ttm_global_swapout() and use that instead in drivers.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++---------------------
drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++
drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +-
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
include/drm/ttm/ttm_bo_api.h | 3 +-
include/drm/ttm/ttm_device.h | 2 +
7 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a08dec7281fc..56d2e38af273 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1186,56 +1186,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_wait);
-/*
- * A buffer object shrink method that tries to swap out the first
- * buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags)
{
struct ttm_global *glob = &ttm_glob;
- struct ttm_buffer_object *bo;
- int ret = -EBUSY;
bool locked;
- unsigned i;
-
- spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
- list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
- NULL))
- continue;
-
- if (!ttm_bo_get_unless_zero(bo)) {
- if (locked)
- dma_resv_unlock(bo->base.resv);
- continue;
- }
+ int ret;
- ret = 0;
- break;
- }
- if (!ret)
- break;
- }
+ if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+ return -EBUSY;
- if (ret) {
- spin_unlock(&glob->lru_lock);
- return ret;
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ return -EBUSY;
}
if (bo->deleted) {
- ret = ttm_bo_cleanup_refs(bo, false, false, locked);
+ ttm_bo_cleanup_refs(bo, false, false, locked);
ttm_bo_put(bo);
- return ret;
+ return 0;
}
ttm_bo_del_from_lru(bo);
+ /* TODO: Cleanup the locking */
spin_unlock(&glob->lru_lock);
- /**
+ /*
* Move to system cached
*/
-
if (bo->mem.mem_type != TTM_PL_SYSTEM) {
struct ttm_operation_ctx ctx = { false, false };
struct ttm_resource evict_mem;
@@ -1255,29 +1234,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
}
}
- /**
+ /*
* Make sure BO is idle.
*/
-
ret = ttm_bo_wait(bo, false, false);
if (unlikely(ret != 0))
goto out;
ttm_bo_unmap_virtual(bo);
- /**
+ /*
* Swap out. Buffer will be swapped in again as soon as
* anyone tries to access a ttm page.
*/
-
if (bo->bdev->funcs->swap_notify)
bo->bdev->funcs->swap_notify(bo);
ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
out:
- /**
- *
+ /*
* Unreserve without putting on LRU to avoid swapping out an
* already swapped buffer.
*/
@@ -1286,7 +1262,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
ttm_bo_put(bo);
return ret;
}
-EXPORT_SYMBOL(ttm_bo_swapout);
void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
{
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 95e1b7b1f2e6..dfc2a7e4e490 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -102,6 +102,35 @@ static int ttm_global_init(void)
return ret;
}
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the global::swap_lru list.
+ */
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+{
+ struct ttm_global *glob = &ttm_glob;
+ struct ttm_buffer_object *bo;
+ unsigned i;
+ int ret;
+
+ spin_lock(&glob->lru_lock);
+ for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+ list_for_each_entry(bo, &glob->swap_lru[i], swap) {
+ uint32_t num_pages = bo->ttm->num_pages;
+
+ ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+ /* ttm_bo_swapout has dropped the lru_lock */
+ if (!ret)
+ return num_pages;
+ if (ret != -EBUSY)
+ return ret;
+ }
+ }
+ spin_unlock(&glob->lru_lock);
+ return 0;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
static void ttm_init_sysman(struct ttm_device *bdev)
{
struct ttm_resource_manager *man = &bdev->sysman;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..95b5cff25f4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
};
int ret;
- ret = ttm_bo_swapout(&ctx, GFP_NOFS);
+ ret = ttm_global_swapout(&ctx, GFP_NOFS);
return ret < 0 ? SHRINK_EMPTY : ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index e972af07d029..104b95a8c7a2 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -38,6 +38,7 @@
#include <drm/drm_device.h>
#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
#include "ttm_memory.h"
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
spin_unlock(&glob->lock);
- ret = ttm_bo_swapout(ctx, GFP_KERNEL);
+ ret = ttm_global_swapout(ctx, GFP_KERNEL);
spin_lock(&glob->lock);
if (unlikely(ret < 0))
break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 6910111099c8..b991422e156c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1371,7 +1371,7 @@ static int vmw_pm_freeze(struct device *kdev)
vmw_execbuf_release_pinned_bo(dev_priv);
vmw_resource_evict_all(dev_priv);
vmw_release_device_early(dev_priv);
- while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
+ while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
if (dev_priv->enable_fb)
vmw_fifo_resource_dec(dev_priv);
if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4fb523dfab32..5044ac330858 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
const char __user *wbuf, char __user *rbuf,
size_t count, loff_t *f_pos, bool write);
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags);
/**
* ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 035bbc044a3b..6a0b267d4fe6 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -297,6 +297,8 @@ struct ttm_device {
struct delayed_work wq;
};
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+
static inline struct ttm_resource_manager *
ttm_manager_type(struct ttm_device *bdev, int mem_type)
{
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
@ 2021-03-15 16:04 ` Christian König
2021-03-15 20:17 ` kernel test robot
2021-03-16 9:35 ` Daniel Vetter
0 siblings, 2 replies; 28+ messages in thread
From: Christian König @ 2021-03-15 16:04 UTC (permalink / raw)
To: dri-devel; +Cc: ray.huang
Instead of having a global lock.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
drivers/gpu/drm/qxl/qxl_release.c | 5 +--
drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
include/drm/ttm/ttm_bo_driver.h | 4 +--
include/drm/ttm/ttm_device.h | 4 +--
8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9d19078246c8..ae18c0e32347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&adev->mman.bdev.lru_lock);
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&adev->mman.bdev.lru_lock);
return;
}
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&adev->mman.bdev.lru_lock);
list_for_each_entry(bo_base, &vm->idle, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
&bo->shadow->tbo.mem,
&vm->lru_bulk_move);
}
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true;
}
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index f5845c96d414..b19f2f00b215 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
release->id | 0xf0000000, release->base.seqno);
trace_dma_fence_emit(&release->base);
- spin_lock(&ttm_glob.lru_lock);
-
list_for_each_entry(entry, &release->bos, head) {
bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
ww_acquire_fini(&release->ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a1be88be357b..a8103c8718a3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
* reference it any more. The only tricky case is the trylock on
* the resv object while holding the lru_lock.
*/
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
bo->base.resv = &bo->base._resv;
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
}
return r;
@@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
30 * HZ);
@@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
else if (lret == 0)
return -EBUSY;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
/*
* We raced, and lost, someone else holds the reservation now,
@@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
* delayed destruction would succeed, so just return success
* here.
*/
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
return 0;
}
ret = 0;
@@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (ret || unlikely(list_empty(&bo->ddestroy))) {
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
return ret;
}
ttm_bo_del_from_lru(bo);
list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
*/
bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
{
- struct ttm_global *glob = &ttm_glob;
struct list_head removed;
bool empty;
INIT_LIST_HEAD(&removed);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
while (!list_empty(&bdev->ddestroy)) {
struct ttm_buffer_object *bo;
@@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
dma_resv_lock(bo->base.resv, NULL);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) {
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
}
ttm_bo_put(bo);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
}
list_splice_tail(&removed, &bdev->ddestroy);
empty = list_empty(&bdev->ddestroy);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
return empty;
}
@@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
ttm_bo_flush_all_fences(bo);
bo->deleted = true;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
/*
* Make pinned bos immediately available to
@@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref);
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
return;
}
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_del_from_lru(bo);
list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
dma_resv_unlock(bo->base.resv);
@@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
unsigned i;
int ret;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &man->lru[i], lru) {
bool busy;
@@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
if (!bo) {
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
busy_bo = NULL;
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
if (busy_bo)
ttm_bo_put(busy_bo);
@@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
return ret;
}
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx);
if (locked)
@@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
mem->mem_type = place->mem_type;
mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
-
+ spin_unlock(&bo->bdev->lru_lock);
return 0;
}
@@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
{
- struct ttm_global *glob = &ttm_glob;
bool locked;
int ret;
@@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo);
/* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
/*
* Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 2c280fb1e992..924d892109e8 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -81,7 +81,6 @@ static int ttm_global_init(void)
ttm_pool_mgr_init(num_pages * 50 / 100);
ttm_tt_mgr_init();
- spin_lock_init(&glob->lru_lock);
glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
{
- struct ttm_global *glob = &ttm_glob;
struct ttm_resource_manager *man;
struct ttm_buffer_object *bo;
unsigned i, j;
int ret;
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
man = ttm_manager_type(bdev, i);
if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
}
}
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
return 0;
}
EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager;
INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
+ spin_lock_init(&bdev->lru_lock);
INIT_LIST_HEAD(&bdev->ddestroy);
bdev->dev_mapping = mapping;
mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev)
{
- struct ttm_global *glob = &ttm_glob;
struct ttm_resource_manager *man;
unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
if (ttm_bo_delayed_delete(bdev, true))
pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
if (list_empty(&man->lru[0]))
pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool);
ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 690ab97d52b7..071c48d672c6 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
if (list_empty(list))
return;
- spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
if (ticket)
ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
if (list_empty(list))
return;
- spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
dma_resv_add_shared_fence(bo->base.resv, fence);
else
dma_resv_add_excl_fence(bo->base.resv, fence);
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
if (ticket)
ww_acquire_fini(ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index ed1672a9f332..04f2eef653ab 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
.no_wait_gpu = false,
.force_alloc = true
};
- struct ttm_global *glob = &ttm_glob;
struct dma_fence *fence;
int ret;
unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
* Can't use standard list traversal since we're unlocking.
*/
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
while (!list_empty(&man->lru[i])) {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
NULL);
if (ret)
return ret;
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
}
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock);
fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d007feef7676..dbccac957f8f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
static inline void
ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
{
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
}
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index cda6efb4c34b..bae56d29e8ff 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -56,7 +56,6 @@ extern struct ttm_global {
*/
struct page *dummy_read_page;
- spinlock_t lru_lock;
/**
* Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device {
struct ttm_pool pool;
/*
- * Protected by the global:lru lock.
+ * Protection for the per manager LRU and ddestroy lists.
*/
+ spinlock_t lru_lock;
struct list_head ddestroy;
/*
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
@ 2021-03-15 20:17 ` kernel test robot
2021-03-16 9:35 ` Daniel Vetter
1 sibling, 0 replies; 28+ messages in thread
From: kernel test robot @ 2021-03-15 20:17 UTC (permalink / raw)
To: Christian König, dri-devel; +Cc: ray.huang, kbuild-all
[-- Attachment #1: Type: text/plain, Size: 4014 bytes --]
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-m001-20210315 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
smatch warnings:
drivers/gpu/drm/ttm/ttm_device.c:158 ttm_device_swapout() warn: inconsistent returns '&bdev->lru_lock'.
drivers/gpu/drm/ttm/ttm_bo.c:665 ttm_mem_evict_first() error: we previously assumed 'bo' could be null (see line 662)
vim +158 drivers/gpu/drm/ttm/ttm_device.c
70ae63f3a85b97 Christian König 2021-03-15 123
70ae63f3a85b97 Christian König 2021-03-15 124 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
70ae63f3a85b97 Christian König 2021-03-15 125 gfp_t gfp_flags)
70ae63f3a85b97 Christian König 2021-03-15 126 {
70ae63f3a85b97 Christian König 2021-03-15 127 struct ttm_resource_manager *man;
824dca26fe3958 Christian König 2021-03-15 128 struct ttm_buffer_object *bo;
70ae63f3a85b97 Christian König 2021-03-15 129 unsigned i, j;
824dca26fe3958 Christian König 2021-03-15 130 int ret;
824dca26fe3958 Christian König 2021-03-15 131
1ed8d8fc515b90 Christian König 2021-03-15 132 spin_lock(&bdev->lru_lock);
70ae63f3a85b97 Christian König 2021-03-15 133 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
70ae63f3a85b97 Christian König 2021-03-15 134 man = ttm_manager_type(bdev, i);
70ae63f3a85b97 Christian König 2021-03-15 135 if (!man || !man->use_tt)
70ae63f3a85b97 Christian König 2021-03-15 136 continue;
70ae63f3a85b97 Christian König 2021-03-15 137
70ae63f3a85b97 Christian König 2021-03-15 138 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
70ae63f3a85b97 Christian König 2021-03-15 139 list_for_each_entry(bo, &man->lru[j], lru) {
70ae63f3a85b97 Christian König 2021-03-15 140 long num_pages;
824dca26fe3958 Christian König 2021-03-15 141
70ae63f3a85b97 Christian König 2021-03-15 142 if (!bo->ttm ||
70ae63f3a85b97 Christian König 2021-03-15 143 bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
70ae63f3a85b97 Christian König 2021-03-15 144 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
70ae63f3a85b97 Christian König 2021-03-15 145 continue;
70ae63f3a85b97 Christian König 2021-03-15 146
70ae63f3a85b97 Christian König 2021-03-15 147 num_pages = bo->ttm->num_pages;
824dca26fe3958 Christian König 2021-03-15 148 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
824dca26fe3958 Christian König 2021-03-15 149 /* ttm_bo_swapout has dropped the lru_lock */
824dca26fe3958 Christian König 2021-03-15 150 if (!ret)
824dca26fe3958 Christian König 2021-03-15 151 return num_pages;
824dca26fe3958 Christian König 2021-03-15 152 if (ret != -EBUSY)
824dca26fe3958 Christian König 2021-03-15 153 return ret;
824dca26fe3958 Christian König 2021-03-15 154 }
824dca26fe3958 Christian König 2021-03-15 155 }
70ae63f3a85b97 Christian König 2021-03-15 156 }
1ed8d8fc515b90 Christian König 2021-03-15 157 spin_unlock(&bdev->lru_lock);
824dca26fe3958 Christian König 2021-03-15 @158 return 0;
824dca26fe3958 Christian König 2021-03-15 159 }
70ae63f3a85b97 Christian König 2021-03-15 160 EXPORT_SYMBOL(ttm_device_swapout);
824dca26fe3958 Christian König 2021-03-15 161
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 36557 bytes --]
[-- Attachment #3: Type: text/plain, Size: 160 bytes --]
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
@ 2021-03-15 20:17 ` kernel test robot
0 siblings, 0 replies; 28+ messages in thread
From: kernel test robot @ 2021-03-15 20:17 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 4122 bytes --]
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next linus/master drm/drm-next v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210316-000551
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-m001-20210315 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
smatch warnings:
drivers/gpu/drm/ttm/ttm_device.c:158 ttm_device_swapout() warn: inconsistent returns '&bdev->lru_lock'.
drivers/gpu/drm/ttm/ttm_bo.c:665 ttm_mem_evict_first() error: we previously assumed 'bo' could be null (see line 662)
vim +158 drivers/gpu/drm/ttm/ttm_device.c
70ae63f3a85b97 Christian König 2021-03-15 123
70ae63f3a85b97 Christian König 2021-03-15 124 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
70ae63f3a85b97 Christian König 2021-03-15 125 gfp_t gfp_flags)
70ae63f3a85b97 Christian König 2021-03-15 126 {
70ae63f3a85b97 Christian König 2021-03-15 127 struct ttm_resource_manager *man;
824dca26fe3958 Christian König 2021-03-15 128 struct ttm_buffer_object *bo;
70ae63f3a85b97 Christian König 2021-03-15 129 unsigned i, j;
824dca26fe3958 Christian König 2021-03-15 130 int ret;
824dca26fe3958 Christian König 2021-03-15 131
1ed8d8fc515b90 Christian König 2021-03-15 132 spin_lock(&bdev->lru_lock);
70ae63f3a85b97 Christian König 2021-03-15 133 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
70ae63f3a85b97 Christian König 2021-03-15 134 man = ttm_manager_type(bdev, i);
70ae63f3a85b97 Christian König 2021-03-15 135 if (!man || !man->use_tt)
70ae63f3a85b97 Christian König 2021-03-15 136 continue;
70ae63f3a85b97 Christian König 2021-03-15 137
70ae63f3a85b97 Christian König 2021-03-15 138 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
70ae63f3a85b97 Christian König 2021-03-15 139 list_for_each_entry(bo, &man->lru[j], lru) {
70ae63f3a85b97 Christian König 2021-03-15 140 long num_pages;
824dca26fe3958 Christian König 2021-03-15 141
70ae63f3a85b97 Christian König 2021-03-15 142 if (!bo->ttm ||
70ae63f3a85b97 Christian König 2021-03-15 143 bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
70ae63f3a85b97 Christian König 2021-03-15 144 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
70ae63f3a85b97 Christian König 2021-03-15 145 continue;
70ae63f3a85b97 Christian König 2021-03-15 146
70ae63f3a85b97 Christian König 2021-03-15 147 num_pages = bo->ttm->num_pages;
824dca26fe3958 Christian König 2021-03-15 148 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
824dca26fe3958 Christian König 2021-03-15 149 /* ttm_bo_swapout has dropped the lru_lock */
824dca26fe3958 Christian König 2021-03-15 150 if (!ret)
824dca26fe3958 Christian König 2021-03-15 151 return num_pages;
824dca26fe3958 Christian König 2021-03-15 152 if (ret != -EBUSY)
824dca26fe3958 Christian König 2021-03-15 153 return ret;
824dca26fe3958 Christian König 2021-03-15 154 }
824dca26fe3958 Christian König 2021-03-15 155 }
70ae63f3a85b97 Christian König 2021-03-15 156 }
1ed8d8fc515b90 Christian König 2021-03-15 157 spin_unlock(&bdev->lru_lock);
824dca26fe3958 Christian König 2021-03-15 @158 return 0;
824dca26fe3958 Christian König 2021-03-15 159 }
70ae63f3a85b97 Christian König 2021-03-15 160 EXPORT_SYMBOL(ttm_device_swapout);
824dca26fe3958 Christian König 2021-03-15 161
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 36557 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
2021-03-15 20:17 ` kernel test robot
@ 2021-03-16 9:35 ` Daniel Vetter
2021-03-16 12:03 ` Christian König
1 sibling, 1 reply; 28+ messages in thread
From: Daniel Vetter @ 2021-03-16 9:35 UTC (permalink / raw)
To: Christian König; +Cc: ray.huang, dri-devel
On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
> Instead of having a global lock.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
I guess per zone lru lock is a lot more work since then we need to handle
ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
lingo.
-Daniel
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
> drivers/gpu/drm/qxl/qxl_release.c | 5 +--
> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
> drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
> include/drm/ttm/ttm_bo_driver.h | 4 +--
> include/drm/ttm/ttm_device.h | 4 +--
> 8 files changed, 43 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9d19078246c8..ae18c0e32347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> struct amdgpu_vm_bo_base *bo_base;
>
> if (vm->bulk_moveable) {
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&adev->mman.bdev.lru_lock);
> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&adev->mman.bdev.lru_lock);
> return;
> }
>
> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&adev->mman.bdev.lru_lock);
> list_for_each_entry(bo_base, &vm->idle, vm_status) {
> struct amdgpu_bo *bo = bo_base->bo;
>
> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> &bo->shadow->tbo.mem,
> &vm->lru_bulk_move);
> }
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&adev->mman.bdev.lru_lock);
>
> vm->bulk_moveable = true;
> }
> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> index f5845c96d414..b19f2f00b215 100644
> --- a/drivers/gpu/drm/qxl/qxl_release.c
> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
> release->id | 0xf0000000, release->base.seqno);
> trace_dma_fence_emit(&release->base);
>
> - spin_lock(&ttm_glob.lru_lock);
> -
> list_for_each_entry(entry, &release->bos, head) {
> bo = entry->bo;
>
> dma_resv_add_shared_fence(bo->base.resv, &release->base);
> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> + ttm_bo_move_to_lru_tail_unlocked(bo);
> dma_resv_unlock(bo->base.resv);
> }
> - spin_unlock(&ttm_glob.lru_lock);
> ww_acquire_fini(&release->ticket);
> }
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index a1be88be357b..a8103c8718a3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
> * reference it any more. The only tricky case is the trylock on
> * the resv object while holding the lru_lock.
> */
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> bo->base.resv = &bo->base._resv;
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> }
>
> return r;
> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>
> if (unlock_resv)
> dma_resv_unlock(bo->base.resv);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
> 30 * HZ);
> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> else if (lret == 0)
> return -EBUSY;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
> /*
> * We raced, and lost, someone else holds the reservation now,
> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> * delayed destruction would succeed, so just return success
> * here.
> */
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> return 0;
> }
> ret = 0;
> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> if (ret || unlikely(list_empty(&bo->ddestroy))) {
> if (unlock_resv)
> dma_resv_unlock(bo->base.resv);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> return ret;
> }
>
> ttm_bo_del_from_lru(bo);
> list_del_init(&bo->ddestroy);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> ttm_bo_cleanup_memtype_use(bo);
>
> if (unlock_resv)
> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> */
> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> {
> - struct ttm_global *glob = &ttm_glob;
> struct list_head removed;
> bool empty;
>
> INIT_LIST_HEAD(&removed);
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> while (!list_empty(&bdev->ddestroy)) {
> struct ttm_buffer_object *bo;
>
> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> continue;
>
> if (remove_all || bo->base.resv != &bo->base._resv) {
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> dma_resv_lock(bo->base.resv, NULL);
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>
> } else if (dma_resv_trylock(bo->base.resv)) {
> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> } else {
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> }
>
> ttm_bo_put(bo);
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> }
> list_splice_tail(&removed, &bdev->ddestroy);
> empty = list_empty(&bdev->ddestroy);
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
>
> return empty;
> }
> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
> ttm_bo_flush_all_fences(bo);
> bo->deleted = true;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
>
> /*
> * Make pinned bos immediately available to
> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
>
> kref_init(&bo->kref);
> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> schedule_delayed_work(&bdev->wq,
> ((HZ / 100) < 1) ? 1 : HZ / 100);
> return;
> }
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> ttm_bo_del_from_lru(bo);
> list_del(&bo->ddestroy);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> ttm_bo_cleanup_memtype_use(bo);
> dma_resv_unlock(bo->base.resv);
> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> unsigned i;
> int ret;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> list_for_each_entry(bo, &man->lru[i], lru) {
> bool busy;
> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> if (!bo) {
> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
> busy_bo = NULL;
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
> if (busy_bo)
> ttm_bo_put(busy_bo);
> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> return ret;
> }
>
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> ret = ttm_bo_evict(bo, ctx);
> if (locked)
> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
> mem->mem_type = place->mem_type;
> mem->placement = place->flags;
>
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> ttm_bo_move_to_lru_tail(bo, mem, NULL);
> - spin_unlock(&ttm_glob.lru_lock);
> -
> + spin_unlock(&bo->bdev->lru_lock);
> return 0;
> }
>
> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> gfp_t gfp_flags)
> {
> - struct ttm_global *glob = &ttm_glob;
> bool locked;
> int ret;
>
> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>
> ttm_bo_del_from_lru(bo);
> /* TODO: Cleanup the locking */
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
>
> /*
> * Move to system cached
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index 2c280fb1e992..924d892109e8 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
> ttm_pool_mgr_init(num_pages * 50 / 100);
> ttm_tt_mgr_init();
>
> - spin_lock_init(&glob->lru_lock);
> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>
> if (unlikely(glob->dummy_read_page == NULL)) {
> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> gfp_t gfp_flags)
> {
> - struct ttm_global *glob = &ttm_glob;
> struct ttm_resource_manager *man;
> struct ttm_buffer_object *bo;
> unsigned i, j;
> int ret;
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> man = ttm_manager_type(bdev, i);
> if (!man || !man->use_tt)
> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> }
> }
> }
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> return 0;
> }
> EXPORT_SYMBOL(ttm_device_swapout);
> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>
> bdev->vma_manager = vma_manager;
> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> + spin_lock_init(&bdev->lru_lock);
> INIT_LIST_HEAD(&bdev->ddestroy);
> bdev->dev_mapping = mapping;
> mutex_lock(&ttm_global_mutex);
> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>
> void ttm_device_fini(struct ttm_device *bdev)
> {
> - struct ttm_global *glob = &ttm_glob;
> struct ttm_resource_manager *man;
> unsigned i;
>
> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
> if (ttm_bo_delayed_delete(bdev, true))
> pr_debug("Delayed destroy list was clean\n");
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> if (list_empty(&man->lru[0]))
> pr_debug("Swap list %d was clean\n", i);
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
>
> ttm_pool_fini(&bdev->pool);
> ttm_global_release();
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 690ab97d52b7..071c48d672c6 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
> if (list_empty(list))
> return;
>
> - spin_lock(&ttm_glob.lru_lock);
> list_for_each_entry(entry, list, head) {
> struct ttm_buffer_object *bo = entry->bo;
>
> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> + ttm_bo_move_to_lru_tail_unlocked(bo);
> dma_resv_unlock(bo->base.resv);
> }
> - spin_unlock(&ttm_glob.lru_lock);
>
> if (ticket)
> ww_acquire_fini(ticket);
> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> if (list_empty(list))
> return;
>
> - spin_lock(&ttm_glob.lru_lock);
> list_for_each_entry(entry, list, head) {
> struct ttm_buffer_object *bo = entry->bo;
>
> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> dma_resv_add_shared_fence(bo->base.resv, fence);
> else
> dma_resv_add_excl_fence(bo->base.resv, fence);
> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> + ttm_bo_move_to_lru_tail_unlocked(bo);
> dma_resv_unlock(bo->base.resv);
> }
> - spin_unlock(&ttm_glob.lru_lock);
> if (ticket)
> ww_acquire_fini(ticket);
> }
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index ed1672a9f332..04f2eef653ab 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> .no_wait_gpu = false,
> .force_alloc = true
> };
> - struct ttm_global *glob = &ttm_glob;
> struct dma_fence *fence;
> int ret;
> unsigned i;
> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> * Can't use standard list traversal since we're unlocking.
> */
>
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> while (!list_empty(&man->lru[i])) {
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
> NULL);
> if (ret)
> return ret;
> - spin_lock(&glob->lru_lock);
> + spin_lock(&bdev->lru_lock);
> }
> }
> - spin_unlock(&glob->lru_lock);
> + spin_unlock(&bdev->lru_lock);
>
> spin_lock(&man->move_lock);
> fence = dma_fence_get(man->move);
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index d007feef7676..dbccac957f8f 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
> static inline void
> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
> {
> - spin_lock(&ttm_glob.lru_lock);
> + spin_lock(&bo->bdev->lru_lock);
> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> - spin_unlock(&ttm_glob.lru_lock);
> + spin_unlock(&bo->bdev->lru_lock);
> }
>
> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index cda6efb4c34b..bae56d29e8ff 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -56,7 +56,6 @@ extern struct ttm_global {
> */
>
> struct page *dummy_read_page;
> - spinlock_t lru_lock;
>
> /**
> * Protected by ttm_global_mutex.
> @@ -277,8 +276,9 @@ struct ttm_device {
> struct ttm_pool pool;
>
> /*
> - * Protected by the global:lru lock.
> + * Protection for the per manager LRU and ddestroy lists.
> */
> + spinlock_t lru_lock;
> struct list_head ddestroy;
>
> /*
> --
> 2.25.1
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-16 9:35 ` Daniel Vetter
@ 2021-03-16 12:03 ` Christian König
2021-03-16 12:05 ` Daniel Vetter
0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2021-03-16 12:03 UTC (permalink / raw)
To: Daniel Vetter; +Cc: ray.huang, dri-devel
Am 16.03.21 um 10:35 schrieb Daniel Vetter:
> On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
>> Instead of having a global lock.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> I guess per zone lru lock is a lot more work since then we need to handle
> ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
> lingo.
Making the LRU per resource manager is the long term goal, yes.
My key idea so far is that we make bo->mem a pointer and then move the
LRU handling into the resource object instead of the BO.
The resource object then just references the BO and so that we can
figure out which BO to evict or which fence to wait for to free up a
resource.
Regards,
Christian.
> -Daniel
>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
>> drivers/gpu/drm/qxl/qxl_release.c | 5 +--
>> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
>> drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
>> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
>> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
>> include/drm/ttm/ttm_bo_driver.h | 4 +--
>> include/drm/ttm/ttm_device.h | 4 +--
>> 8 files changed, 43 insertions(+), 56 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 9d19078246c8..ae18c0e32347 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>> struct amdgpu_vm_bo_base *bo_base;
>>
>> if (vm->bulk_moveable) {
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&adev->mman.bdev.lru_lock);
>> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&adev->mman.bdev.lru_lock);
>> return;
>> }
>>
>> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&adev->mman.bdev.lru_lock);
>> list_for_each_entry(bo_base, &vm->idle, vm_status) {
>> struct amdgpu_bo *bo = bo_base->bo;
>>
>> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>> &bo->shadow->tbo.mem,
>> &vm->lru_bulk_move);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&adev->mman.bdev.lru_lock);
>>
>> vm->bulk_moveable = true;
>> }
>> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
>> index f5845c96d414..b19f2f00b215 100644
>> --- a/drivers/gpu/drm/qxl/qxl_release.c
>> +++ b/drivers/gpu/drm/qxl/qxl_release.c
>> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>> release->id | 0xf0000000, release->base.seqno);
>> trace_dma_fence_emit(&release->base);
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> -
>> list_for_each_entry(entry, &release->bos, head) {
>> bo = entry->bo;
>>
>> dma_resv_add_shared_fence(bo->base.resv, &release->base);
>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>> dma_resv_unlock(bo->base.resv);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>> ww_acquire_fini(&release->ticket);
>> }
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index a1be88be357b..a8103c8718a3 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>> * reference it any more. The only tricky case is the trylock on
>> * the resv object while holding the lru_lock.
>> */
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> bo->base.resv = &bo->base._resv;
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> }
>>
>> return r;
>> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>
>> if (unlock_resv)
>> dma_resv_unlock(bo->base.resv);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>> 30 * HZ);
>> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> else if (lret == 0)
>> return -EBUSY;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>> /*
>> * We raced, and lost, someone else holds the reservation now,
>> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> * delayed destruction would succeed, so just return success
>> * here.
>> */
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> return 0;
>> }
>> ret = 0;
>> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> if (ret || unlikely(list_empty(&bo->ddestroy))) {
>> if (unlock_resv)
>> dma_resv_unlock(bo->base.resv);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> return ret;
>> }
>>
>> ttm_bo_del_from_lru(bo);
>> list_del_init(&bo->ddestroy);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> ttm_bo_cleanup_memtype_use(bo);
>>
>> if (unlock_resv)
>> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>> */
>> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> struct list_head removed;
>> bool empty;
>>
>> INIT_LIST_HEAD(&removed);
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> while (!list_empty(&bdev->ddestroy)) {
>> struct ttm_buffer_object *bo;
>>
>> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>> continue;
>>
>> if (remove_all || bo->base.resv != &bo->base._resv) {
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> dma_resv_lock(bo->base.resv, NULL);
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>
>> } else if (dma_resv_trylock(bo->base.resv)) {
>> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>> } else {
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> }
>>
>> ttm_bo_put(bo);
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> }
>> list_splice_tail(&removed, &bdev->ddestroy);
>> empty = list_empty(&bdev->ddestroy);
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>>
>> return empty;
>> }
>> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
>> ttm_bo_flush_all_fences(bo);
>> bo->deleted = true;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>>
>> /*
>> * Make pinned bos immediately available to
>> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
>>
>> kref_init(&bo->kref);
>> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> schedule_delayed_work(&bdev->wq,
>> ((HZ / 100) < 1) ? 1 : HZ / 100);
>> return;
>> }
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> ttm_bo_del_from_lru(bo);
>> list_del(&bo->ddestroy);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> ttm_bo_cleanup_memtype_use(bo);
>> dma_resv_unlock(bo->base.resv);
>> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>> unsigned i;
>> int ret;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> list_for_each_entry(bo, &man->lru[i], lru) {
>> bool busy;
>> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>> if (!bo) {
>> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
>> busy_bo = NULL;
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
>> if (busy_bo)
>> ttm_bo_put(busy_bo);
>> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>> return ret;
>> }
>>
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> ret = ttm_bo_evict(bo, ctx);
>> if (locked)
>> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
>> mem->mem_type = place->mem_type;
>> mem->placement = place->flags;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> ttm_bo_move_to_lru_tail(bo, mem, NULL);
>> - spin_unlock(&ttm_glob.lru_lock);
>> -
>> + spin_unlock(&bo->bdev->lru_lock);
>> return 0;
>> }
>>
>> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
>> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>> gfp_t gfp_flags)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> bool locked;
>> int ret;
>>
>> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>
>> ttm_bo_del_from_lru(bo);
>> /* TODO: Cleanup the locking */
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>>
>> /*
>> * Move to system cached
>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
>> index 2c280fb1e992..924d892109e8 100644
>> --- a/drivers/gpu/drm/ttm/ttm_device.c
>> +++ b/drivers/gpu/drm/ttm/ttm_device.c
>> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
>> ttm_pool_mgr_init(num_pages * 50 / 100);
>> ttm_tt_mgr_init();
>>
>> - spin_lock_init(&glob->lru_lock);
>> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>>
>> if (unlikely(glob->dummy_read_page == NULL)) {
>> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
>> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>> gfp_t gfp_flags)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> struct ttm_resource_manager *man;
>> struct ttm_buffer_object *bo;
>> unsigned i, j;
>> int ret;
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>> man = ttm_manager_type(bdev, i);
>> if (!man || !man->use_tt)
>> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>> }
>> }
>> }
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> return 0;
>> }
>> EXPORT_SYMBOL(ttm_device_swapout);
>> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>>
>> bdev->vma_manager = vma_manager;
>> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
>> + spin_lock_init(&bdev->lru_lock);
>> INIT_LIST_HEAD(&bdev->ddestroy);
>> bdev->dev_mapping = mapping;
>> mutex_lock(&ttm_global_mutex);
>> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>>
>> void ttm_device_fini(struct ttm_device *bdev)
>> {
>> - struct ttm_global *glob = &ttm_glob;
>> struct ttm_resource_manager *man;
>> unsigned i;
>>
>> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
>> if (ttm_bo_delayed_delete(bdev, true))
>> pr_debug("Delayed destroy list was clean\n");
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>> if (list_empty(&man->lru[0]))
>> pr_debug("Swap list %d was clean\n", i);
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>>
>> ttm_pool_fini(&bdev->pool);
>> ttm_global_release();
>> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> index 690ab97d52b7..071c48d672c6 100644
>> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
>> if (list_empty(list))
>> return;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> list_for_each_entry(entry, list, head) {
>> struct ttm_buffer_object *bo = entry->bo;
>>
>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>> dma_resv_unlock(bo->base.resv);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>>
>> if (ticket)
>> ww_acquire_fini(ticket);
>> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>> if (list_empty(list))
>> return;
>>
>> - spin_lock(&ttm_glob.lru_lock);
>> list_for_each_entry(entry, list, head) {
>> struct ttm_buffer_object *bo = entry->bo;
>>
>> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>> dma_resv_add_shared_fence(bo->base.resv, fence);
>> else
>> dma_resv_add_excl_fence(bo->base.resv, fence);
>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>> dma_resv_unlock(bo->base.resv);
>> }
>> - spin_unlock(&ttm_glob.lru_lock);
>> if (ticket)
>> ww_acquire_fini(ticket);
>> }
>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
>> index ed1672a9f332..04f2eef653ab 100644
>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>> .no_wait_gpu = false,
>> .force_alloc = true
>> };
>> - struct ttm_global *glob = &ttm_glob;
>> struct dma_fence *fence;
>> int ret;
>> unsigned i;
>> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>> * Can't use standard list traversal since we're unlocking.
>> */
>>
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> while (!list_empty(&man->lru[i])) {
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
>> NULL);
>> if (ret)
>> return ret;
>> - spin_lock(&glob->lru_lock);
>> + spin_lock(&bdev->lru_lock);
>> }
>> }
>> - spin_unlock(&glob->lru_lock);
>> + spin_unlock(&bdev->lru_lock);
>>
>> spin_lock(&man->move_lock);
>> fence = dma_fence_get(man->move);
>> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
>> index d007feef7676..dbccac957f8f 100644
>> --- a/include/drm/ttm/ttm_bo_driver.h
>> +++ b/include/drm/ttm/ttm_bo_driver.h
>> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
>> static inline void
>> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>> {
>> - spin_lock(&ttm_glob.lru_lock);
>> + spin_lock(&bo->bdev->lru_lock);
>> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>> - spin_unlock(&ttm_glob.lru_lock);
>> + spin_unlock(&bo->bdev->lru_lock);
>> }
>>
>> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
>> index cda6efb4c34b..bae56d29e8ff 100644
>> --- a/include/drm/ttm/ttm_device.h
>> +++ b/include/drm/ttm/ttm_device.h
>> @@ -56,7 +56,6 @@ extern struct ttm_global {
>> */
>>
>> struct page *dummy_read_page;
>> - spinlock_t lru_lock;
>>
>> /**
>> * Protected by ttm_global_mutex.
>> @@ -277,8 +276,9 @@ struct ttm_device {
>> struct ttm_pool pool;
>>
>> /*
>> - * Protected by the global:lru lock.
>> + * Protection for the per manager LRU and ddestroy lists.
>> */
>> + spinlock_t lru_lock;
>> struct list_head ddestroy;
>>
>> /*
>> --
>> 2.25.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-16 12:03 ` Christian König
@ 2021-03-16 12:05 ` Daniel Vetter
2021-03-16 15:13 ` Christian König
0 siblings, 1 reply; 28+ messages in thread
From: Daniel Vetter @ 2021-03-16 12:05 UTC (permalink / raw)
To: Christian König; +Cc: Huang Rui, dri-devel
On Tue, Mar 16, 2021 at 1:03 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 16.03.21 um 10:35 schrieb Daniel Vetter:
> > On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
> >> Instead of having a global lock.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> > I guess per zone lru lock is a lot more work since then we need to handle
> > ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
> > lingo.
>
> Making the LRU per resource manager is the long term goal, yes.
>
> My key idea so far is that we make bo->mem a pointer and then move the
> LRU handling into the resource object instead of the BO.
>
> The resource object then just references the BO and so that we can
> figure out which BO to evict or which fence to wait for to free up a
> resource.
Hm yeah that could work out fairly nicely. Both from locking but also
refcounting pov. And maybe we could then use entirely free-standing
mem objects instead of ghost objects? Since that's a part of ttm I
don't grok and it always looks a bit like a hack to me. So for these
ghost mem objects you'd only need the lru + dma_fence_wait (can grab a
fence ref under the lru and then drop lru lock for that) for eviction,
no dma_resv_lock.
-Daniel
>
> Regards,
> Christian.
>
> > -Daniel
> >
> >> ---
> >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
> >> drivers/gpu/drm/qxl/qxl_release.c | 5 +--
> >> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
> >> drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
> >> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
> >> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
> >> include/drm/ttm/ttm_bo_driver.h | 4 +--
> >> include/drm/ttm/ttm_device.h | 4 +--
> >> 8 files changed, 43 insertions(+), 56 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> index 9d19078246c8..ae18c0e32347 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >> struct amdgpu_vm_bo_base *bo_base;
> >>
> >> if (vm->bulk_moveable) {
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&adev->mman.bdev.lru_lock);
> >> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&adev->mman.bdev.lru_lock);
> >> return;
> >> }
> >>
> >> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&adev->mman.bdev.lru_lock);
> >> list_for_each_entry(bo_base, &vm->idle, vm_status) {
> >> struct amdgpu_bo *bo = bo_base->bo;
> >>
> >> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
> >> &bo->shadow->tbo.mem,
> >> &vm->lru_bulk_move);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&adev->mman.bdev.lru_lock);
> >>
> >> vm->bulk_moveable = true;
> >> }
> >> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> >> index f5845c96d414..b19f2f00b215 100644
> >> --- a/drivers/gpu/drm/qxl/qxl_release.c
> >> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> >> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
> >> release->id | 0xf0000000, release->base.seqno);
> >> trace_dma_fence_emit(&release->base);
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> -
> >> list_for_each_entry(entry, &release->bos, head) {
> >> bo = entry->bo;
> >>
> >> dma_resv_add_shared_fence(bo->base.resv, &release->base);
> >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> + ttm_bo_move_to_lru_tail_unlocked(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> ww_acquire_fini(&release->ticket);
> >> }
> >>
> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >> index a1be88be357b..a8103c8718a3 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
> >> * reference it any more. The only tricky case is the trylock on
> >> * the resv object while holding the lru_lock.
> >> */
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> bo->base.resv = &bo->base._resv;
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> }
> >>
> >> return r;
> >> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >>
> >> if (unlock_resv)
> >> dma_resv_unlock(bo->base.resv);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
> >> 30 * HZ);
> >> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> else if (lret == 0)
> >> return -EBUSY;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
> >> /*
> >> * We raced, and lost, someone else holds the reservation now,
> >> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> * delayed destruction would succeed, so just return success
> >> * here.
> >> */
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> return 0;
> >> }
> >> ret = 0;
> >> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> if (ret || unlikely(list_empty(&bo->ddestroy))) {
> >> if (unlock_resv)
> >> dma_resv_unlock(bo->base.resv);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> return ret;
> >> }
> >>
> >> ttm_bo_del_from_lru(bo);
> >> list_del_init(&bo->ddestroy);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> ttm_bo_cleanup_memtype_use(bo);
> >>
> >> if (unlock_resv)
> >> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
> >> */
> >> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct list_head removed;
> >> bool empty;
> >>
> >> INIT_LIST_HEAD(&removed);
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> while (!list_empty(&bdev->ddestroy)) {
> >> struct ttm_buffer_object *bo;
> >>
> >> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
> >> continue;
> >>
> >> if (remove_all || bo->base.resv != &bo->base._resv) {
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> dma_resv_lock(bo->base.resv, NULL);
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> >>
> >> } else if (dma_resv_trylock(bo->base.resv)) {
> >> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
> >> } else {
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> }
> >>
> >> ttm_bo_put(bo);
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> }
> >> list_splice_tail(&removed, &bdev->ddestroy);
> >> empty = list_empty(&bdev->ddestroy);
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >>
> >> return empty;
> >> }
> >> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
> >> ttm_bo_flush_all_fences(bo);
> >> bo->deleted = true;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >>
> >> /*
> >> * Make pinned bos immediately available to
> >> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
> >>
> >> kref_init(&bo->kref);
> >> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> schedule_delayed_work(&bdev->wq,
> >> ((HZ / 100) < 1) ? 1 : HZ / 100);
> >> return;
> >> }
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> ttm_bo_del_from_lru(bo);
> >> list_del(&bo->ddestroy);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> ttm_bo_cleanup_memtype_use(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >> unsigned i;
> >> int ret;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >> list_for_each_entry(bo, &man->lru[i], lru) {
> >> bool busy;
> >> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >> if (!bo) {
> >> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
> >> busy_bo = NULL;
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
> >> if (busy_bo)
> >> ttm_bo_put(busy_bo);
> >> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
> >> return ret;
> >> }
> >>
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> ret = ttm_bo_evict(bo, ctx);
> >> if (locked)
> >> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
> >> mem->mem_type = place->mem_type;
> >> mem->placement = place->flags;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> ttm_bo_move_to_lru_tail(bo, mem, NULL);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> -
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> return 0;
> >> }
> >>
> >> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
> >> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> >> gfp_t gfp_flags)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> bool locked;
> >> int ret;
> >>
> >> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> >>
> >> ttm_bo_del_from_lru(bo);
> >> /* TODO: Cleanup the locking */
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >>
> >> /*
> >> * Move to system cached
> >> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> >> index 2c280fb1e992..924d892109e8 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_device.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> >> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
> >> ttm_pool_mgr_init(num_pages * 50 / 100);
> >> ttm_tt_mgr_init();
> >>
> >> - spin_lock_init(&glob->lru_lock);
> >> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
> >>
> >> if (unlikely(glob->dummy_read_page == NULL)) {
> >> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
> >> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >> gfp_t gfp_flags)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct ttm_resource_manager *man;
> >> struct ttm_buffer_object *bo;
> >> unsigned i, j;
> >> int ret;
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
> >> man = ttm_manager_type(bdev, i);
> >> if (!man || !man->use_tt)
> >> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
> >> }
> >> }
> >> }
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> return 0;
> >> }
> >> EXPORT_SYMBOL(ttm_device_swapout);
> >> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
> >>
> >> bdev->vma_manager = vma_manager;
> >> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> >> + spin_lock_init(&bdev->lru_lock);
> >> INIT_LIST_HEAD(&bdev->ddestroy);
> >> bdev->dev_mapping = mapping;
> >> mutex_lock(&ttm_global_mutex);
> >> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
> >>
> >> void ttm_device_fini(struct ttm_device *bdev)
> >> {
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct ttm_resource_manager *man;
> >> unsigned i;
> >>
> >> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
> >> if (ttm_bo_delayed_delete(bdev, true))
> >> pr_debug("Delayed destroy list was clean\n");
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> >> if (list_empty(&man->lru[0]))
> >> pr_debug("Swap list %d was clean\n", i);
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >>
> >> ttm_pool_fini(&bdev->pool);
> >> ttm_global_release();
> >> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> index 690ab97d52b7..071c48d672c6 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> >> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
> >> if (list_empty(list))
> >> return;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> list_for_each_entry(entry, list, head) {
> >> struct ttm_buffer_object *bo = entry->bo;
> >>
> >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> + ttm_bo_move_to_lru_tail_unlocked(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >>
> >> if (ticket)
> >> ww_acquire_fini(ticket);
> >> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> >> if (list_empty(list))
> >> return;
> >>
> >> - spin_lock(&ttm_glob.lru_lock);
> >> list_for_each_entry(entry, list, head) {
> >> struct ttm_buffer_object *bo = entry->bo;
> >>
> >> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
> >> dma_resv_add_shared_fence(bo->base.resv, fence);
> >> else
> >> dma_resv_add_excl_fence(bo->base.resv, fence);
> >> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> + ttm_bo_move_to_lru_tail_unlocked(bo);
> >> dma_resv_unlock(bo->base.resv);
> >> }
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> if (ticket)
> >> ww_acquire_fini(ticket);
> >> }
> >> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> >> index ed1672a9f332..04f2eef653ab 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> >> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> >> .no_wait_gpu = false,
> >> .force_alloc = true
> >> };
> >> - struct ttm_global *glob = &ttm_glob;
> >> struct dma_fence *fence;
> >> int ret;
> >> unsigned i;
> >> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
> >> * Can't use standard list traversal since we're unlocking.
> >> */
> >>
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >> while (!list_empty(&man->lru[i])) {
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
> >> NULL);
> >> if (ret)
> >> return ret;
> >> - spin_lock(&glob->lru_lock);
> >> + spin_lock(&bdev->lru_lock);
> >> }
> >> }
> >> - spin_unlock(&glob->lru_lock);
> >> + spin_unlock(&bdev->lru_lock);
> >>
> >> spin_lock(&man->move_lock);
> >> fence = dma_fence_get(man->move);
> >> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> >> index d007feef7676..dbccac957f8f 100644
> >> --- a/include/drm/ttm/ttm_bo_driver.h
> >> +++ b/include/drm/ttm/ttm_bo_driver.h
> >> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
> >> static inline void
> >> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
> >> {
> >> - spin_lock(&ttm_glob.lru_lock);
> >> + spin_lock(&bo->bdev->lru_lock);
> >> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
> >> - spin_unlock(&ttm_glob.lru_lock);
> >> + spin_unlock(&bo->bdev->lru_lock);
> >> }
> >>
> >> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
> >> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> >> index cda6efb4c34b..bae56d29e8ff 100644
> >> --- a/include/drm/ttm/ttm_device.h
> >> +++ b/include/drm/ttm/ttm_device.h
> >> @@ -56,7 +56,6 @@ extern struct ttm_global {
> >> */
> >>
> >> struct page *dummy_read_page;
> >> - spinlock_t lru_lock;
> >>
> >> /**
> >> * Protected by ttm_global_mutex.
> >> @@ -277,8 +276,9 @@ struct ttm_device {
> >> struct ttm_pool pool;
> >>
> >> /*
> >> - * Protected by the global:lru lock.
> >> + * Protection for the per manager LRU and ddestroy lists.
> >> */
> >> + spinlock_t lru_lock;
> >> struct list_head ddestroy;
> >>
> >> /*
> >> --
> >> 2.25.1
> >>
> >> _______________________________________________
> >> dri-devel mailing list
> >> dri-devel@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-03-16 12:05 ` Daniel Vetter
@ 2021-03-16 15:13 ` Christian König
0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-03-16 15:13 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Huang Rui, dri-devel
Am 16.03.21 um 13:05 schrieb Daniel Vetter:
> On Tue, Mar 16, 2021 at 1:03 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 16.03.21 um 10:35 schrieb Daniel Vetter:
>>> On Mon, Mar 15, 2021 at 05:04:22PM +0100, Christian König wrote:
>>>> Instead of having a global lock.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> I guess per zone lru lock is a lot more work since then we need to handle
>>> ordering and ABBA deadlocks? s/zone/mem region/ I think is the proper ttm
>>> lingo.
>> Making the LRU per resource manager is the long term goal, yes.
>>
>> My key idea so far is that we make bo->mem a pointer and then move the
>> LRU handling into the resource object instead of the BO.
>>
>> The resource object then just references the BO and so that we can
>> figure out which BO to evict or which fence to wait for to free up a
>> resource.
> Hm yeah that could work out fairly nicely. Both from locking but also
> refcounting pov. And maybe we could then use entirely free-standing
> mem objects instead of ghost objects? Since that's a part of ttm I
> don't grok and it always looks a bit like a hack to me. So for these
> ghost mem objects you'd only need the lru + dma_fence_wait (can grab a
> fence ref under the lru and then drop lru lock for that) for eviction,
> no dma_resv_lock.
Exactly that's the background here, yes.
Those ghost objects are more than just a bit of a hack and result in
tons of checks in the driver if a BO is really a BO or a ghost.
Moving all that handling into the resource objects not only allows us to
remove that, but also makes things like delayed delete work out pretty
nicely.
Christian.
> -Daniel
>> Regards,
>> Christian.
>>
>>> -Daniel
>>>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
>>>> drivers/gpu/drm/qxl/qxl_release.c | 5 +--
>>>> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
>>>> drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
>>>> drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
>>>> drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
>>>> include/drm/ttm/ttm_bo_driver.h | 4 +--
>>>> include/drm/ttm/ttm_device.h | 4 +--
>>>> 8 files changed, 43 insertions(+), 56 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 9d19078246c8..ae18c0e32347 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>>>> struct amdgpu_vm_bo_base *bo_base;
>>>>
>>>> if (vm->bulk_moveable) {
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&adev->mman.bdev.lru_lock);
>>>> ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&adev->mman.bdev.lru_lock);
>>>> return;
>>>> }
>>>>
>>>> memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&adev->mman.bdev.lru_lock);
>>>> list_for_each_entry(bo_base, &vm->idle, vm_status) {
>>>> struct amdgpu_bo *bo = bo_base->bo;
>>>>
>>>> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
>>>> &bo->shadow->tbo.mem,
>>>> &vm->lru_bulk_move);
>>>> }
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&adev->mman.bdev.lru_lock);
>>>>
>>>> vm->bulk_moveable = true;
>>>> }
>>>> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
>>>> index f5845c96d414..b19f2f00b215 100644
>>>> --- a/drivers/gpu/drm/qxl/qxl_release.c
>>>> +++ b/drivers/gpu/drm/qxl/qxl_release.c
>>>> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>>>> release->id | 0xf0000000, release->base.seqno);
>>>> trace_dma_fence_emit(&release->base);
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> -
>>>> list_for_each_entry(entry, &release->bos, head) {
>>>> bo = entry->bo;
>>>>
>>>> dma_resv_add_shared_fence(bo->base.resv, &release->base);
>>>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>>>> dma_resv_unlock(bo->base.resv);
>>>> }
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> ww_acquire_fini(&release->ticket);
>>>> }
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index a1be88be357b..a8103c8718a3 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
>>>> * reference it any more. The only tricky case is the trylock on
>>>> * the resv object while holding the lru_lock.
>>>> */
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> bo->base.resv = &bo->base._resv;
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> }
>>>>
>>>> return r;
>>>> @@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>>
>>>> if (unlock_resv)
>>>> dma_resv_unlock(bo->base.resv);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>> lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>>>> 30 * HZ);
>>>> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>> else if (lret == 0)
>>>> return -EBUSY;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>>>> /*
>>>> * We raced, and lost, someone else holds the reservation now,
>>>> @@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>> * delayed destruction would succeed, so just return success
>>>> * here.
>>>> */
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> return 0;
>>>> }
>>>> ret = 0;
>>>> @@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>> if (ret || unlikely(list_empty(&bo->ddestroy))) {
>>>> if (unlock_resv)
>>>> dma_resv_unlock(bo->base.resv);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> return ret;
>>>> }
>>>>
>>>> ttm_bo_del_from_lru(bo);
>>>> list_del_init(&bo->ddestroy);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> ttm_bo_cleanup_memtype_use(bo);
>>>>
>>>> if (unlock_resv)
>>>> @@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>>>> */
>>>> bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>>>> {
>>>> - struct ttm_global *glob = &ttm_glob;
>>>> struct list_head removed;
>>>> bool empty;
>>>>
>>>> INIT_LIST_HEAD(&removed);
>>>>
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> while (!list_empty(&bdev->ddestroy)) {
>>>> struct ttm_buffer_object *bo;
>>>>
>>>> @@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
>>>> continue;
>>>>
>>>> if (remove_all || bo->base.resv != &bo->base._resv) {
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>> dma_resv_lock(bo->base.resv, NULL);
>>>>
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>>>
>>>> } else if (dma_resv_trylock(bo->base.resv)) {
>>>> ttm_bo_cleanup_refs(bo, false, !remove_all, true);
>>>> } else {
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>> }
>>>>
>>>> ttm_bo_put(bo);
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> }
>>>> list_splice_tail(&removed, &bdev->ddestroy);
>>>> empty = list_empty(&bdev->ddestroy);
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>>
>>>> return empty;
>>>> }
>>>> @@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
>>>> ttm_bo_flush_all_fences(bo);
>>>> bo->deleted = true;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>>
>>>> /*
>>>> * Make pinned bos immediately available to
>>>> @@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
>>>>
>>>> kref_init(&bo->kref);
>>>> list_add_tail(&bo->ddestroy, &bdev->ddestroy);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>> schedule_delayed_work(&bdev->wq,
>>>> ((HZ / 100) < 1) ? 1 : HZ / 100);
>>>> return;
>>>> }
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> ttm_bo_del_from_lru(bo);
>>>> list_del(&bo->ddestroy);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>> ttm_bo_cleanup_memtype_use(bo);
>>>> dma_resv_unlock(bo->base.resv);
>>>> @@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>>> unsigned i;
>>>> int ret;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>> list_for_each_entry(bo, &man->lru[i], lru) {
>>>> bool busy;
>>>> @@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>>> if (!bo) {
>>>> if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
>>>> busy_bo = NULL;
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
>>>> if (busy_bo)
>>>> ttm_bo_put(busy_bo);
>>>> @@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>>>> return ret;
>>>> }
>>>>
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>> ret = ttm_bo_evict(bo, ctx);
>>>> if (locked)
>>>> @@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
>>>> mem->mem_type = place->mem_type;
>>>> mem->placement = place->flags;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> ttm_bo_move_to_lru_tail(bo, mem, NULL);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> -
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> return 0;
>>>> }
>>>>
>>>> @@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
>>>> int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>>> gfp_t gfp_flags)
>>>> {
>>>> - struct ttm_global *glob = &ttm_glob;
>>>> bool locked;
>>>> int ret;
>>>>
>>>> @@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>>>>
>>>> ttm_bo_del_from_lru(bo);
>>>> /* TODO: Cleanup the locking */
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>>
>>>> /*
>>>> * Move to system cached
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
>>>> index 2c280fb1e992..924d892109e8 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_device.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_device.c
>>>> @@ -81,7 +81,6 @@ static int ttm_global_init(void)
>>>> ttm_pool_mgr_init(num_pages * 50 / 100);
>>>> ttm_tt_mgr_init();
>>>>
>>>> - spin_lock_init(&glob->lru_lock);
>>>> glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
>>>>
>>>> if (unlikely(glob->dummy_read_page == NULL)) {
>>>> @@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
>>>> long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>>>> gfp_t gfp_flags)
>>>> {
>>>> - struct ttm_global *glob = &ttm_glob;
>>>> struct ttm_resource_manager *man;
>>>> struct ttm_buffer_object *bo;
>>>> unsigned i, j;
>>>> int ret;
>>>>
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>>>> man = ttm_manager_type(bdev, i);
>>>> if (!man || !man->use_tt)
>>>> @@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>>>> }
>>>> }
>>>> }
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>> return 0;
>>>> }
>>>> EXPORT_SYMBOL(ttm_device_swapout);
>>>> @@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>>>>
>>>> bdev->vma_manager = vma_manager;
>>>> INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
>>>> + spin_lock_init(&bdev->lru_lock);
>>>> INIT_LIST_HEAD(&bdev->ddestroy);
>>>> bdev->dev_mapping = mapping;
>>>> mutex_lock(&ttm_global_mutex);
>>>> @@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
>>>>
>>>> void ttm_device_fini(struct ttm_device *bdev)
>>>> {
>>>> - struct ttm_global *glob = &ttm_glob;
>>>> struct ttm_resource_manager *man;
>>>> unsigned i;
>>>>
>>>> @@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
>>>> if (ttm_bo_delayed_delete(bdev, true))
>>>> pr_debug("Delayed destroy list was clean\n");
>>>>
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>>>> if (list_empty(&man->lru[0]))
>>>> pr_debug("Swap list %d was clean\n", i);
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>>
>>>> ttm_pool_fini(&bdev->pool);
>>>> ttm_global_release();
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>>>> index 690ab97d52b7..071c48d672c6 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
>>>> @@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
>>>> if (list_empty(list))
>>>> return;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> list_for_each_entry(entry, list, head) {
>>>> struct ttm_buffer_object *bo = entry->bo;
>>>>
>>>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>>>> dma_resv_unlock(bo->base.resv);
>>>> }
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>>
>>>> if (ticket)
>>>> ww_acquire_fini(ticket);
>>>> @@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>>>> if (list_empty(list))
>>>> return;
>>>>
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> list_for_each_entry(entry, list, head) {
>>>> struct ttm_buffer_object *bo = entry->bo;
>>>>
>>>> @@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>>>> dma_resv_add_shared_fence(bo->base.resv, fence);
>>>> else
>>>> dma_resv_add_excl_fence(bo->base.resv, fence);
>>>> - ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> + ttm_bo_move_to_lru_tail_unlocked(bo);
>>>> dma_resv_unlock(bo->base.resv);
>>>> }
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> if (ticket)
>>>> ww_acquire_fini(ticket);
>>>> }
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
>>>> index ed1672a9f332..04f2eef653ab 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>>>> @@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>>>> .no_wait_gpu = false,
>>>> .force_alloc = true
>>>> };
>>>> - struct ttm_global *glob = &ttm_glob;
>>>> struct dma_fence *fence;
>>>> int ret;
>>>> unsigned i;
>>>> @@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
>>>> * Can't use standard list traversal since we're unlocking.
>>>> */
>>>>
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>> while (!list_empty(&man->lru[i])) {
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>> ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
>>>> NULL);
>>>> if (ret)
>>>> return ret;
>>>> - spin_lock(&glob->lru_lock);
>>>> + spin_lock(&bdev->lru_lock);
>>>> }
>>>> }
>>>> - spin_unlock(&glob->lru_lock);
>>>> + spin_unlock(&bdev->lru_lock);
>>>>
>>>> spin_lock(&man->move_lock);
>>>> fence = dma_fence_get(man->move);
>>>> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
>>>> index d007feef7676..dbccac957f8f 100644
>>>> --- a/include/drm/ttm/ttm_bo_driver.h
>>>> +++ b/include/drm/ttm/ttm_bo_driver.h
>>>> @@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
>>>> static inline void
>>>> ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
>>>> {
>>>> - spin_lock(&ttm_glob.lru_lock);
>>>> + spin_lock(&bo->bdev->lru_lock);
>>>> ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
>>>> - spin_unlock(&ttm_glob.lru_lock);
>>>> + spin_unlock(&bo->bdev->lru_lock);
>>>> }
>>>>
>>>> static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
>>>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
>>>> index cda6efb4c34b..bae56d29e8ff 100644
>>>> --- a/include/drm/ttm/ttm_device.h
>>>> +++ b/include/drm/ttm/ttm_device.h
>>>> @@ -56,7 +56,6 @@ extern struct ttm_global {
>>>> */
>>>>
>>>> struct page *dummy_read_page;
>>>> - spinlock_t lru_lock;
>>>>
>>>> /**
>>>> * Protected by ttm_global_mutex.
>>>> @@ -277,8 +276,9 @@ struct ttm_device {
>>>> struct ttm_pool pool;
>>>>
>>>> /*
>>>> - * Protected by the global:lru lock.
>>>> + * Protection for the per manager LRU and ddestroy lists.
>>>> */
>>>> + spinlock_t lru_lock;
>>>> struct list_head ddestroy;
>>>>
>>>> /*
>>>> --
>>>> 2.25.1
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-02-11 13:29 Christian König
2021-02-11 13:29 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2021-02-11 13:29 UTC (permalink / raw)
To: dri-devel; +Cc: ray.huang
Move the iteration of the global lru into the new function
ttm_global_swapout() and use that instead in drivers.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++---------------------
drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++
drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +-
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
include/drm/ttm/ttm_bo_api.h | 3 +-
include/drm/ttm/ttm_device.h | 2 +
7 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e38102282fd5..d33578a112b4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1184,56 +1184,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_wait);
-/*
- * A buffer object shrink method that tries to swap out the first
- * buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags)
{
struct ttm_global *glob = &ttm_glob;
- struct ttm_buffer_object *bo;
- int ret = -EBUSY;
bool locked;
- unsigned i;
-
- spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
- list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
- NULL))
- continue;
-
- if (!ttm_bo_get_unless_zero(bo)) {
- if (locked)
- dma_resv_unlock(bo->base.resv);
- continue;
- }
+ int ret;
- ret = 0;
- break;
- }
- if (!ret)
- break;
- }
+ if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+ return -EBUSY;
- if (ret) {
- spin_unlock(&glob->lru_lock);
- return ret;
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ return -EBUSY;
}
if (bo->deleted) {
- ret = ttm_bo_cleanup_refs(bo, false, false, locked);
+ ttm_bo_cleanup_refs(bo, false, false, locked);
ttm_bo_put(bo);
- return ret;
+ return 0;
}
ttm_bo_del_from_lru(bo);
+ /* TODO: Cleanup the locking */
spin_unlock(&glob->lru_lock);
- /**
+ /*
* Move to system cached
*/
-
if (bo->mem.mem_type != TTM_PL_SYSTEM) {
struct ttm_operation_ctx ctx = { false, false };
struct ttm_resource evict_mem;
@@ -1253,29 +1232,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
}
}
- /**
+ /*
* Make sure BO is idle.
*/
-
ret = ttm_bo_wait(bo, false, false);
if (unlikely(ret != 0))
goto out;
ttm_bo_unmap_virtual(bo);
- /**
+ /*
* Swap out. Buffer will be swapped in again as soon as
* anyone tries to access a ttm page.
*/
-
if (bo->bdev->funcs->swap_notify)
bo->bdev->funcs->swap_notify(bo);
ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
out:
- /**
- *
+ /*
* Unreserve without putting on LRU to avoid swapping out an
* already swapped buffer.
*/
@@ -1284,7 +1260,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
ttm_bo_put(bo);
return ret;
}
-EXPORT_SYMBOL(ttm_bo_swapout);
void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
{
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 95e1b7b1f2e6..dfc2a7e4e490 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -102,6 +102,35 @@ static int ttm_global_init(void)
return ret;
}
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the global::swap_lru list.
+ */
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+{
+ struct ttm_global *glob = &ttm_glob;
+ struct ttm_buffer_object *bo;
+ unsigned i;
+ int ret;
+
+ spin_lock(&glob->lru_lock);
+ for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+ list_for_each_entry(bo, &glob->swap_lru[i], swap) {
+ uint32_t num_pages = bo->ttm->num_pages;
+
+ ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+ /* ttm_bo_swapout has dropped the lru_lock */
+ if (!ret)
+ return num_pages;
+ if (ret != -EBUSY)
+ return ret;
+ }
+ }
+ spin_unlock(&glob->lru_lock);
+ return 0;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
static void ttm_init_sysman(struct ttm_device *bdev)
{
struct ttm_resource_manager *man = &bdev->sysman;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..95b5cff25f4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
};
int ret;
- ret = ttm_bo_swapout(&ctx, GFP_NOFS);
+ ret = ttm_global_swapout(&ctx, GFP_NOFS);
return ret < 0 ? SHRINK_EMPTY : ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index e972af07d029..104b95a8c7a2 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -38,6 +38,7 @@
#include <drm/drm_device.h>
#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
#include "ttm_memory.h"
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
spin_unlock(&glob->lock);
- ret = ttm_bo_swapout(ctx, GFP_KERNEL);
+ ret = ttm_global_swapout(ctx, GFP_KERNEL);
spin_lock(&glob->lock);
if (unlikely(ret < 0))
break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4efed3bf0ef9..01da355b86f3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1384,7 +1384,7 @@ static int vmw_pm_freeze(struct device *kdev)
vmw_execbuf_release_pinned_bo(dev_priv);
vmw_resource_evict_all(dev_priv);
vmw_release_device_early(dev_priv);
- while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
+ while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
if (dev_priv->enable_fb)
vmw_fifo_resource_dec(dev_priv);
if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4fb523dfab32..5044ac330858 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
const char __user *wbuf, char __user *rbuf,
size_t count, loff_t *f_pos, bool write);
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags);
/**
* ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 035bbc044a3b..6a0b267d4fe6 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -297,6 +297,8 @@ struct ttm_device {
struct delayed_work wq;
};
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+
static inline struct ttm_resource_manager *
ttm_manager_type(struct ttm_device *bdev, int mem_type)
{
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-02-11 13:29 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
@ 2021-02-11 13:29 ` Christian König
0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-02-11 13:29 UTC (permalink / raw)
To: dri-devel; +Cc: ray.huang
Instead of having a global lock.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
drivers/gpu/drm/qxl/qxl_release.c | 5 +--
drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
include/drm/ttm/ttm_bo_driver.h | 4 +--
include/drm/ttm/ttm_device.h | 4 +--
8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9d19078246c8..ae18c0e32347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&adev->mman.bdev.lru_lock);
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&adev->mman.bdev.lru_lock);
return;
}
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&adev->mman.bdev.lru_lock);
list_for_each_entry(bo_base, &vm->idle, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
&bo->shadow->tbo.mem,
&vm->lru_bulk_move);
}
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true;
}
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 6ed673d75f9f..29ad72cb6b54 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -417,16 +417,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
release->id | 0xf0000000, release->base.seqno);
trace_dma_fence_emit(&release->base);
- spin_lock(&ttm_glob.lru_lock);
-
list_for_each_entry(entry, &release->bos, head) {
bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
ww_acquire_fini(&release->ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a1be88be357b..a8103c8718a3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
* reference it any more. The only tricky case is the trylock on
* the resv object while holding the lru_lock.
*/
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
bo->base.resv = &bo->base._resv;
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
}
return r;
@@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
30 * HZ);
@@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
else if (lret == 0)
return -EBUSY;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
/*
* We raced, and lost, someone else holds the reservation now,
@@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
* delayed destruction would succeed, so just return success
* here.
*/
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
return 0;
}
ret = 0;
@@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (ret || unlikely(list_empty(&bo->ddestroy))) {
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
return ret;
}
ttm_bo_del_from_lru(bo);
list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
*/
bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
{
- struct ttm_global *glob = &ttm_glob;
struct list_head removed;
bool empty;
INIT_LIST_HEAD(&removed);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
while (!list_empty(&bdev->ddestroy)) {
struct ttm_buffer_object *bo;
@@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
dma_resv_lock(bo->base.resv, NULL);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) {
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
}
ttm_bo_put(bo);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
}
list_splice_tail(&removed, &bdev->ddestroy);
empty = list_empty(&bdev->ddestroy);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
return empty;
}
@@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
ttm_bo_flush_all_fences(bo);
bo->deleted = true;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
/*
* Make pinned bos immediately available to
@@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref);
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
return;
}
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_del_from_lru(bo);
list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
dma_resv_unlock(bo->base.resv);
@@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
unsigned i;
int ret;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &man->lru[i], lru) {
bool busy;
@@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
if (!bo) {
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
busy_bo = NULL;
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
if (busy_bo)
ttm_bo_put(busy_bo);
@@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
return ret;
}
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx);
if (locked)
@@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
mem->mem_type = place->mem_type;
mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
-
+ spin_unlock(&bo->bdev->lru_lock);
return 0;
}
@@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
{
- struct ttm_global *glob = &ttm_glob;
bool locked;
int ret;
@@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo);
/* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
/*
* Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 2c280fb1e992..924d892109e8 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -81,7 +81,6 @@ static int ttm_global_init(void)
ttm_pool_mgr_init(num_pages * 50 / 100);
ttm_tt_mgr_init();
- spin_lock_init(&glob->lru_lock);
glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
{
- struct ttm_global *glob = &ttm_glob;
struct ttm_resource_manager *man;
struct ttm_buffer_object *bo;
unsigned i, j;
int ret;
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
man = ttm_manager_type(bdev, i);
if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
}
}
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
return 0;
}
EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager;
INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
+ spin_lock_init(&bdev->lru_lock);
INIT_LIST_HEAD(&bdev->ddestroy);
bdev->dev_mapping = mapping;
mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev)
{
- struct ttm_global *glob = &ttm_glob;
struct ttm_resource_manager *man;
unsigned i;
@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
if (ttm_bo_delayed_delete(bdev, true))
pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
if (list_empty(&man->lru[0]))
pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool);
ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 690ab97d52b7..071c48d672c6 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
if (list_empty(list))
return;
- spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
if (ticket)
ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
if (list_empty(list))
return;
- spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
dma_resv_add_shared_fence(bo->base.resv, fence);
else
dma_resv_add_excl_fence(bo->base.resv, fence);
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
if (ticket)
ww_acquire_fini(ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index ed1672a9f332..04f2eef653ab 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
.no_wait_gpu = false,
.force_alloc = true
};
- struct ttm_global *glob = &ttm_glob;
struct dma_fence *fence;
int ret;
unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
* Can't use standard list traversal since we're unlocking.
*/
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
while (!list_empty(&man->lru[i])) {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
NULL);
if (ret)
return ret;
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
}
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock);
fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d007feef7676..dbccac957f8f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
static inline void
ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
{
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
}
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index cda6efb4c34b..bae56d29e8ff 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -56,7 +56,6 @@ extern struct ttm_global {
*/
struct page *dummy_read_page;
- spinlock_t lru_lock;
/**
* Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device {
struct ttm_pool pool;
/*
- * Protected by the global:lru lock.
+ * Protection for the per manager LRU and ddestroy lists.
*/
+ spinlock_t lru_lock;
struct list_head ddestroy;
/*
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 28+ messages in thread
* Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock
@ 2021-02-10 23:28 kernel test robot
0 siblings, 0 replies; 28+ messages in thread
From: kernel test robot @ 2021-02-10 23:28 UTC (permalink / raw)
To: kbuild
[-- Attachment #1: Type: text/plain, Size: 4606 bytes --]
CC: kbuild-all(a)lists.01.org
In-Reply-To: <20210210152114.1134-3-christian.koenig@amd.com>
References: <20210210152114.1134-3-christian.koenig@amd.com>
TO: "Christian König" <ckoenig.leichtzumerken@gmail.com>
TO: dri-devel(a)lists.freedesktop.org
TO: ray.huang(a)amd.com
Hi "Christian,
I love your patch! Perhaps something to improve:
[auto build test WARNING on drm-tip/drm-tip]
[cannot apply to drm-intel/for-linux-next tegra-drm/drm/tegra/for-next linus/master drm-exynos/exynos-drm-next drm/drm-next v5.11-rc7 next-20210125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-move-swapout-logic-around/20210210-232812
base: git://anongit.freedesktop.org/drm/drm-tip drm-tip
:::::: branch date: 8 hours ago
:::::: commit date: 8 hours ago
config: ia64-randconfig-m031-20210209 (attached as .config)
compiler: ia64-linux-gcc (GCC) 9.3.0
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
New smatch warnings:
drivers/gpu/drm/ttm/ttm_device.c:157 ttm_device_swapout() warn: inconsistent returns 'bdev->lru_lock'.
drivers/gpu/drm/ttm/ttm_bo.c:661 ttm_mem_evict_first() error: we previously assumed 'bo' could be null (see line 658)
Old smatch warnings:
drivers/gpu/drm/ttm/ttm_device.c:142 ttm_device_swapout() warn: variable dereferenced before check 'bo->ttm' (see line 140)
vim +157 drivers/gpu/drm/ttm/ttm_device.c
d37e3072dc32c7 Christian König 2021-02-10 123
d37e3072dc32c7 Christian König 2021-02-10 124 long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
d37e3072dc32c7 Christian König 2021-02-10 125 gfp_t gfp_flags)
d37e3072dc32c7 Christian König 2021-02-10 126 {
d37e3072dc32c7 Christian König 2021-02-10 127 struct ttm_resource_manager *man;
22b09b650f7486 Christian König 2021-02-10 128 struct ttm_buffer_object *bo;
d37e3072dc32c7 Christian König 2021-02-10 129 unsigned i, j;
22b09b650f7486 Christian König 2021-02-10 130 int ret;
22b09b650f7486 Christian König 2021-02-10 131
12b3da401dc93d Christian König 2021-02-10 132 spin_lock(&bdev->lru_lock);
d37e3072dc32c7 Christian König 2021-02-10 133 for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
d37e3072dc32c7 Christian König 2021-02-10 134 man = ttm_manager_type(bdev, i);
d37e3072dc32c7 Christian König 2021-02-10 135 if (!man || !man->use_tt)
d37e3072dc32c7 Christian König 2021-02-10 136 continue;
d37e3072dc32c7 Christian König 2021-02-10 137
d37e3072dc32c7 Christian König 2021-02-10 138 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
d37e3072dc32c7 Christian König 2021-02-10 139 list_for_each_entry(bo, &man->lru[j], lru) {
d37e3072dc32c7 Christian König 2021-02-10 140 long num_pages = bo->ttm->num_pages;
d37e3072dc32c7 Christian König 2021-02-10 141
d37e3072dc32c7 Christian König 2021-02-10 142 if (!bo->ttm ||
d37e3072dc32c7 Christian König 2021-02-10 143 bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
d37e3072dc32c7 Christian König 2021-02-10 144 bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
d37e3072dc32c7 Christian König 2021-02-10 145 continue;
22b09b650f7486 Christian König 2021-02-10 146
22b09b650f7486 Christian König 2021-02-10 147 ret = ttm_bo_swapout(bo, ctx, gfp_flags);
22b09b650f7486 Christian König 2021-02-10 148 /* ttm_bo_swapout has dropped the lru_lock */
22b09b650f7486 Christian König 2021-02-10 149 if (!ret)
22b09b650f7486 Christian König 2021-02-10 150 return num_pages;
22b09b650f7486 Christian König 2021-02-10 151 if (ret != -EBUSY)
22b09b650f7486 Christian König 2021-02-10 152 return ret;
22b09b650f7486 Christian König 2021-02-10 153 }
22b09b650f7486 Christian König 2021-02-10 154 }
d37e3072dc32c7 Christian König 2021-02-10 155 }
12b3da401dc93d Christian König 2021-02-10 156 spin_unlock(&bdev->lru_lock);
22b09b650f7486 Christian König 2021-02-10 @157 return 0;
22b09b650f7486 Christian König 2021-02-10 158 }
d37e3072dc32c7 Christian König 2021-02-10 159 EXPORT_SYMBOL(ttm_device_swapout);
22b09b650f7486 Christian König 2021-02-10 160
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 31987 bytes --]
^ permalink raw reply [flat|nested] 28+ messages in thread
* [PATCH 1/3] drm/ttm: move swapout logic around
@ 2021-02-10 15:21 Christian König
2021-02-10 15:21 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2021-02-10 15:21 UTC (permalink / raw)
To: dri-devel, ray.huang
Move the iteration of the global lru into the new function
ttm_global_swapout() and use that instead in drivers.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/ttm/ttm_bo.c | 57 ++++++++---------------------
drivers/gpu/drm/ttm/ttm_device.c | 29 +++++++++++++++
drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
drivers/gpu/drm/vmwgfx/ttm_memory.c | 3 +-
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
include/drm/ttm/ttm_bo_api.h | 3 +-
include/drm/ttm/ttm_device.h | 2 +
7 files changed, 53 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e38102282fd5..d33578a112b4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1184,56 +1184,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
}
EXPORT_SYMBOL(ttm_bo_wait);
-/*
- * A buffer object shrink method that tries to swap out the first
- * buffer object on the bo_global::swap_lru list.
- */
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags)
{
struct ttm_global *glob = &ttm_glob;
- struct ttm_buffer_object *bo;
- int ret = -EBUSY;
bool locked;
- unsigned i;
-
- spin_lock(&glob->lru_lock);
- for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
- list_for_each_entry(bo, &glob->swap_lru[i], swap) {
- if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
- NULL))
- continue;
-
- if (!ttm_bo_get_unless_zero(bo)) {
- if (locked)
- dma_resv_unlock(bo->base.resv);
- continue;
- }
+ int ret;
- ret = 0;
- break;
- }
- if (!ret)
- break;
- }
+ if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
+ return -EBUSY;
- if (ret) {
- spin_unlock(&glob->lru_lock);
- return ret;
+ if (!ttm_bo_get_unless_zero(bo)) {
+ if (locked)
+ dma_resv_unlock(bo->base.resv);
+ return -EBUSY;
}
if (bo->deleted) {
- ret = ttm_bo_cleanup_refs(bo, false, false, locked);
+ ttm_bo_cleanup_refs(bo, false, false, locked);
ttm_bo_put(bo);
- return ret;
+ return 0;
}
ttm_bo_del_from_lru(bo);
+ /* TODO: Cleanup the locking */
spin_unlock(&glob->lru_lock);
- /**
+ /*
* Move to system cached
*/
-
if (bo->mem.mem_type != TTM_PL_SYSTEM) {
struct ttm_operation_ctx ctx = { false, false };
struct ttm_resource evict_mem;
@@ -1253,29 +1232,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
}
}
- /**
+ /*
* Make sure BO is idle.
*/
-
ret = ttm_bo_wait(bo, false, false);
if (unlikely(ret != 0))
goto out;
ttm_bo_unmap_virtual(bo);
- /**
+ /*
* Swap out. Buffer will be swapped in again as soon as
* anyone tries to access a ttm page.
*/
-
if (bo->bdev->funcs->swap_notify)
bo->bdev->funcs->swap_notify(bo);
ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
out:
- /**
- *
+ /*
* Unreserve without putting on LRU to avoid swapping out an
* already swapped buffer.
*/
@@ -1284,7 +1260,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
ttm_bo_put(bo);
return ret;
}
-EXPORT_SYMBOL(ttm_bo_swapout);
void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
{
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 95e1b7b1f2e6..dfc2a7e4e490 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -102,6 +102,35 @@ static int ttm_global_init(void)
return ret;
}
+/**
+ * A buffer object shrink method that tries to swap out the first
+ * buffer object on the global::swap_lru list.
+ */
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
+{
+ struct ttm_global *glob = &ttm_glob;
+ struct ttm_buffer_object *bo;
+ unsigned i;
+ int ret;
+
+ spin_lock(&glob->lru_lock);
+ for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+ list_for_each_entry(bo, &glob->swap_lru[i], swap) {
+ uint32_t num_pages = bo->ttm->num_pages;
+
+ ret = ttm_bo_swapout(bo, ctx, gfp_flags);
+ /* ttm_bo_swapout has dropped the lru_lock */
+ if (!ret)
+ return num_pages;
+ if (ret != -EBUSY)
+ return ret;
+ }
+ }
+ spin_unlock(&glob->lru_lock);
+ return 0;
+}
+EXPORT_SYMBOL(ttm_global_swapout);
+
static void ttm_init_sysman(struct ttm_device *bdev)
{
struct ttm_resource_manager *man = &bdev->sysman;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..95b5cff25f4c 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink,
};
int ret;
- ret = ttm_bo_swapout(&ctx, GFP_NOFS);
+ ret = ttm_global_swapout(&ctx, GFP_NOFS);
return ret < 0 ? SHRINK_EMPTY : ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index e972af07d029..104b95a8c7a2 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -38,6 +38,7 @@
#include <drm/drm_device.h>
#include <drm/drm_file.h>
+#include <drm/ttm/ttm_device.h>
#include "ttm_memory.h"
@@ -277,7 +278,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
spin_unlock(&glob->lock);
- ret = ttm_bo_swapout(ctx, GFP_KERNEL);
+ ret = ttm_global_swapout(ctx, GFP_KERNEL);
spin_lock(&glob->lock);
if (unlikely(ret < 0))
break;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4efed3bf0ef9..01da355b86f3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1384,7 +1384,7 @@ static int vmw_pm_freeze(struct device *kdev)
vmw_execbuf_release_pinned_bo(dev_priv);
vmw_resource_evict_all(dev_priv);
vmw_release_device_early(dev_priv);
- while (ttm_bo_swapout(&ctx, GFP_KERNEL) > 0);
+ while (ttm_global_swapout(&ctx, GFP_KERNEL) > 0);
if (dev_priv->enable_fb)
vmw_fifo_resource_dec(dev_priv);
if (atomic_read(&dev_priv->num_fifo_resources) != 0) {
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 4fb523dfab32..5044ac330858 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -560,7 +560,8 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file *filp,
const char __user *wbuf, char __user *rbuf,
size_t count, loff_t *f_pos, bool write);
-int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
+ gfp_t gfp_flags);
/**
* ttm_bo_uses_embedded_gem_object - check if the given bo uses the
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 035bbc044a3b..6a0b267d4fe6 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -297,6 +297,8 @@ struct ttm_device {
struct delayed_work wq;
};
+long ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);
+
static inline struct ttm_resource_manager *
ttm_manager_type(struct ttm_device *bdev, int mem_type)
{
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 28+ messages in thread
* [PATCH 3/3] drm/ttm: switch to per device LRU lock
2021-02-10 15:21 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
@ 2021-02-10 15:21 ` Christian König
0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-02-10 15:21 UTC (permalink / raw)
To: dri-devel, ray.huang
Instead of having a global lock.
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++---
drivers/gpu/drm/qxl/qxl_release.c | 5 +--
drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++--------------
drivers/gpu/drm/ttm/ttm_device.c | 12 +++----
drivers/gpu/drm/ttm/ttm_execbuf_util.c | 8 ++---
drivers/gpu/drm/ttm/ttm_resource.c | 9 +++--
include/drm/ttm/ttm_bo_driver.h | 4 +--
include/drm/ttm/ttm_device.h | 4 +--
8 files changed, 43 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9d19078246c8..ae18c0e32347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm_bo_base *bo_base;
if (vm->bulk_moveable) {
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&adev->mman.bdev.lru_lock);
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&adev->mman.bdev.lru_lock);
return;
}
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&adev->mman.bdev.lru_lock);
list_for_each_entry(bo_base, &vm->idle, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
&bo->shadow->tbo.mem,
&vm->lru_bulk_move);
}
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&adev->mman.bdev.lru_lock);
vm->bulk_moveable = true;
}
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 6ed673d75f9f..29ad72cb6b54 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -417,16 +417,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
release->id | 0xf0000000, release->base.seqno);
trace_dma_fence_emit(&release->base);
- spin_lock(&ttm_glob.lru_lock);
-
list_for_each_entry(entry, &release->bos, head) {
bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
ww_acquire_fini(&release->ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a1be88be357b..a8103c8718a3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -242,9 +242,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
* reference it any more. The only tricky case is the trylock on
* the resv object while holding the lru_lock.
*/
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
bo->base.resv = &bo->base._resv;
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
}
return r;
@@ -303,7 +303,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
30 * HZ);
@@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
else if (lret == 0)
return -EBUSY;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
/*
* We raced, and lost, someone else holds the reservation now,
@@ -323,7 +323,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
* delayed destruction would succeed, so just return success
* here.
*/
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
return 0;
}
ret = 0;
@@ -332,13 +332,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
if (ret || unlikely(list_empty(&bo->ddestroy))) {
if (unlock_resv)
dma_resv_unlock(bo->base.resv);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
return ret;
}
ttm_bo_del_from_lru(bo);
list_del_init(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
if (unlock_resv)
@@ -355,13 +355,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
*/
bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
{
- struct ttm_global *glob = &ttm_glob;
struct list_head removed;
bool empty;
INIT_LIST_HEAD(&removed);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
while (!list_empty(&bdev->ddestroy)) {
struct ttm_buffer_object *bo;
@@ -372,24 +371,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
continue;
if (remove_all || bo->base.resv != &bo->base._resv) {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
dma_resv_lock(bo->base.resv, NULL);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else if (dma_resv_trylock(bo->base.resv)) {
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
} else {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
}
ttm_bo_put(bo);
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
}
list_splice_tail(&removed, &bdev->ddestroy);
empty = list_empty(&bdev->ddestroy);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
return empty;
}
@@ -424,7 +423,7 @@ static void ttm_bo_release(struct kref *kref)
ttm_bo_flush_all_fences(bo);
bo->deleted = true;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
/*
* Make pinned bos immediately available to
@@ -438,17 +437,17 @@ static void ttm_bo_release(struct kref *kref)
kref_init(&bo->kref);
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
schedule_delayed_work(&bdev->wq,
((HZ / 100) < 1) ? 1 : HZ / 100);
return;
}
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_del_from_lru(bo);
list_del(&bo->ddestroy);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ttm_bo_cleanup_memtype_use(bo);
dma_resv_unlock(bo->base.resv);
@@ -622,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
unsigned i;
int ret;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
list_for_each_entry(bo, &man->lru[i], lru) {
bool busy;
@@ -659,7 +658,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
if (!bo) {
if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
busy_bo = NULL;
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
if (busy_bo)
ttm_bo_put(busy_bo);
@@ -673,7 +672,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
return ret;
}
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
ret = ttm_bo_evict(bo, ctx);
if (locked)
@@ -773,10 +772,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
mem->mem_type = place->mem_type;
mem->placement = place->flags;
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_move_to_lru_tail(bo, mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
-
+ spin_unlock(&bo->bdev->lru_lock);
return 0;
}
@@ -1160,7 +1158,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
{
- struct ttm_global *glob = &ttm_glob;
bool locked;
int ret;
@@ -1185,7 +1182,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
ttm_bo_del_from_lru(bo);
/* TODO: Cleanup the locking */
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
/*
* Move to system cached
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 5064f7db78ab..34b1b5190a6b 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -81,7 +81,6 @@ static int ttm_global_init(void)
ttm_pool_mgr_init(num_pages * 50 / 100);
ttm_tt_mgr_init();
- spin_lock_init(&glob->lru_lock);
glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
gfp_t gfp_flags)
{
- struct ttm_global *glob = &ttm_glob;
struct ttm_resource_manager *man;
struct ttm_buffer_object *bo;
unsigned i, j;
int ret;
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
man = ttm_manager_type(bdev, i);
if (!man || !man->use_tt)
@@ -155,7 +153,7 @@ long ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
}
}
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
return 0;
}
EXPORT_SYMBOL(ttm_device_swapout);
@@ -222,6 +220,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
bdev->vma_manager = vma_manager;
INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
+ spin_lock_init(&bdev->lru_lock);
INIT_LIST_HEAD(&bdev->ddestroy);
bdev->dev_mapping = mapping;
mutex_lock(&ttm_global_mutex);
@@ -234,7 +233,6 @@ EXPORT_SYMBOL(ttm_device_init);
void ttm_device_fini(struct ttm_device *bdev)
{
- struct ttm_global *glob = &ttm_glob;
struct ttm_resource_manager *man;
unsigned i;
@@ -251,11 +249,11 @@ void ttm_device_fini(struct ttm_device *bdev)
if (ttm_bo_delayed_delete(bdev, true))
pr_debug("Delayed destroy list was clean\n");
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
if (list_empty(&man->lru[0]))
pr_debug("Swap list %d was clean\n", i);
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
ttm_pool_fini(&bdev->pool);
ttm_global_release();
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 690ab97d52b7..071c48d672c6 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
if (list_empty(list))
return;
- spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
if (ticket)
ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
if (list_empty(list))
return;
- spin_lock(&ttm_glob.lru_lock);
list_for_each_entry(entry, list, head) {
struct ttm_buffer_object *bo = entry->bo;
@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
dma_resv_add_shared_fence(bo->base.resv, fence);
else
dma_resv_add_excl_fence(bo->base.resv, fence);
- ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
+ ttm_bo_move_to_lru_tail_unlocked(bo);
dma_resv_unlock(bo->base.resv);
}
- spin_unlock(&ttm_glob.lru_lock);
if (ticket)
ww_acquire_fini(ticket);
}
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index ed1672a9f332..04f2eef653ab 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -91,7 +91,6 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
.no_wait_gpu = false,
.force_alloc = true
};
- struct ttm_global *glob = &ttm_glob;
struct dma_fence *fence;
int ret;
unsigned i;
@@ -100,18 +99,18 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev,
* Can't use standard list traversal since we're unlocking.
*/
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
while (!list_empty(&man->lru[i])) {
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
ret = ttm_mem_evict_first(bdev, man, NULL, &ctx,
NULL);
if (ret)
return ret;
- spin_lock(&glob->lru_lock);
+ spin_lock(&bdev->lru_lock);
}
}
- spin_unlock(&glob->lru_lock);
+ spin_unlock(&bdev->lru_lock);
spin_lock(&man->move_lock);
fence = dma_fence_get(man->move);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index d007feef7676..dbccac957f8f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -180,9 +180,9 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
static inline void
ttm_bo_move_to_lru_tail_unlocked(struct ttm_buffer_object *bo)
{
- spin_lock(&ttm_glob.lru_lock);
+ spin_lock(&bo->bdev->lru_lock);
ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
- spin_unlock(&ttm_glob.lru_lock);
+ spin_unlock(&bo->bdev->lru_lock);
}
static inline void ttm_bo_assign_mem(struct ttm_buffer_object *bo,
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index cda6efb4c34b..bae56d29e8ff 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -56,7 +56,6 @@ extern struct ttm_global {
*/
struct page *dummy_read_page;
- spinlock_t lru_lock;
/**
* Protected by ttm_global_mutex.
@@ -277,8 +276,9 @@ struct ttm_device {
struct ttm_pool pool;
/*
- * Protected by the global:lru lock.
+ * Protection for the per manager LRU and ddestroy lists.
*/
+ spinlock_t lru_lock;
struct list_head ddestroy;
/*
--
2.25.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 28+ messages in thread
end of thread, other threads:[~2021-03-22 7:13 UTC | newest]
Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-18 12:47 [PATCH 1/3] drm/ttm: move swapout logic around v2 Christian König
2021-03-18 12:47 ` [PATCH 2/3] drm/ttm: remove swap LRU v3 Christian König
2021-03-19 4:30 ` Huang Rui
2021-03-18 12:47 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
2021-03-19 4:32 ` Huang Rui
2021-03-19 12:10 ` Christian König
2021-03-22 7:13 ` Huang Rui
2021-03-18 14:43 ` [PATCH 1/3] drm/ttm: move swapout logic around v2 Nirmoy
2021-03-18 15:26 ` Christian König
2021-03-18 19:46 ` Nirmoy
2021-03-18 15:08 ` kernel test robot
2021-03-18 15:08 ` kernel test robot
2021-03-18 18:13 ` kernel test robot
2021-03-18 18:13 ` kernel test robot
2021-03-19 12:07 ` Christian König
2021-03-19 12:07 ` Christian König
2021-03-19 4:28 ` Huang Rui
2021-03-19 9:41 ` Matthew Auld
-- strict thread matches above, loose matches on Subject: below --
2021-03-15 16:04 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
2021-03-15 16:04 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
2021-03-15 20:17 ` kernel test robot
2021-03-15 20:17 ` kernel test robot
2021-03-16 9:35 ` Daniel Vetter
2021-03-16 12:03 ` Christian König
2021-03-16 12:05 ` Daniel Vetter
2021-03-16 15:13 ` Christian König
2021-02-11 13:29 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
2021-02-11 13:29 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
2021-02-10 23:28 kernel test robot
2021-02-10 15:21 [PATCH 1/3] drm/ttm: move swapout logic around Christian König
2021-02-10 15:21 ` [PATCH 3/3] drm/ttm: switch to per device LRU lock Christian König
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.