intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH] drm/i915/gt: Delay taking the spinlock for grabbing from the buffer pool
@ 2020-07-24 16:37 Chris Wilson
  2020-07-24 16:44 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for " Patchwork
                   ` (4 more replies)
  0 siblings, 5 replies; 10+ messages in thread
From: Chris Wilson @ 2020-07-24 16:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Some very low hanging fruit, but contention on the pool->lock is
noticeable between intel_gt_get_buffer_pool() and pool_retire(), with
the majority of the hold time due to the locked list iteration. If we
make the node itself RCU protected, we can perform the search for an
suitable node just under RCU, reserving taking the lock itself for
claiming the node and manipulating the list.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_gt_buffer_pool.c    | 57 ++++++++++++-------
 .../drm/i915/gt/intel_gt_buffer_pool_types.h  |  7 ++-
 2 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 418ae184cecf..7c688dee982b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -35,37 +35,46 @@ static void node_free(struct intel_gt_buffer_pool_node *node)
 {
 	i915_gem_object_put(node->obj);
 	i915_active_fini(&node->active);
-	kfree(node);
+	kfree_rcu(node, rcu);
 }
 
 static void pool_free_work(struct work_struct *wrk)
 {
 	struct intel_gt_buffer_pool *pool =
 		container_of(wrk, typeof(*pool), work.work);
-	struct intel_gt_buffer_pool_node *node, *next;
+	struct intel_gt_buffer_pool_node *node, *stale = NULL;
 	unsigned long old = jiffies - HZ;
 	bool active = false;
-	LIST_HEAD(stale);
 	int n;
 
 	/* Free buffers that have not been used in the past second */
-	spin_lock_irq(&pool->lock);
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
 		struct list_head *list = &pool->cache_list[n];
 
-		/* Most recent at head; oldest at tail */
-		list_for_each_entry_safe_reverse(node, next, list, link) {
-			if (time_before(node->age, old))
-				break;
+		if (list_empty(list))
+			continue;
 
-			list_move(&node->link, &stale);
+		/* Most recent at head; oldest at tail */
+		if (spin_trylock_irq(&pool->lock)) {
+			list_for_each_entry_reverse(node, list, link) {
+				if (time_before(node->age, old))
+					break;
+
+				node->age = 0;
+				list_del_rcu(&node->link);
+				node->free = stale;
+				stale = node;
+			}
+			spin_unlock_irq(&pool->lock);
 		}
+
 		active |= !list_empty(list);
 	}
-	spin_unlock_irq(&pool->lock);
 
-	list_for_each_entry_safe(node, next, &stale, link)
+	while ((node = stale)) {
+		stale = stale->free;
 		node_free(node);
+	}
 
 	if (active)
 		schedule_delayed_work(&pool->work,
@@ -110,7 +119,7 @@ static void pool_retire(struct i915_active *ref)
 
 	spin_lock_irqsave(&pool->lock, flags);
 	node->age = jiffies;
-	list_add(&node->link, list);
+	list_add_rcu(&node->link, list);
 	spin_unlock_irqrestore(&pool->lock, flags);
 
 	schedule_delayed_work(&pool->work,
@@ -151,22 +160,32 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
 	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
 	struct intel_gt_buffer_pool_node *node;
 	struct list_head *list;
-	unsigned long flags;
+	bool found = false;
 	int ret;
 
 	size = PAGE_ALIGN(size);
 	list = bucket_for_size(pool, size);
 
-	spin_lock_irqsave(&pool->lock, flags);
-	list_for_each_entry(node, list, link) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(node, list, link) {
+		unsigned long flags;
+
 		if (node->obj->base.size < size)
 			continue;
-		list_del(&node->link);
-		break;
+
+		spin_lock_irqsave(&pool->lock, flags);
+		if (node->age) {
+			list_del_rcu(&node->link);
+			node->age = 0;
+			found = true;
+		}
+		spin_unlock_irqrestore(&pool->lock, flags);
+		if (found)
+			break;
 	}
-	spin_unlock_irqrestore(&pool->lock, flags);
+	rcu_read_unlock();
 
-	if (&node->link == list) {
+	if (!found) {
 		node = node_create(pool, size);
 		if (IS_ERR(node))
 			return node;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
index e28bdda771ed..d30dc6d32f10 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
@@ -25,8 +25,13 @@ struct intel_gt_buffer_pool_node {
 	struct i915_active active;
 	struct drm_i915_gem_object *obj;
 	struct list_head link;
-	struct intel_gt_buffer_pool *pool;
+	union {
+		struct intel_gt_buffer_pool *pool;
+		struct intel_gt_buffer_pool_node *free;
+		struct rcu_head rcu;
+	};
 	unsigned long age;
+
 };
 
 #endif /* INTEL_GT_BUFFER_POOL_TYPES_H */
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread
* [Intel-gfx] [PATCH] drm/i915/gt: Delay taking the spinlock for grabbing from the buffer pool
@ 2020-07-28 15:59 Chris Wilson
  2020-07-28 18:08 ` Matthew Auld
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Chris Wilson @ 2020-07-28 15:59 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson

Some very low hanging fruit, but contention on the pool->lock is
noticeable between intel_gt_get_buffer_pool() and pool_retire(), with
the majority of the hold time due to the locked list iteration. If we
make the node itself RCU protected, we can perform the search for an
suitable node just under RCU, reserving taking the lock itself for
claiming the node and manipulating the list.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_gt_buffer_pool.c    | 64 +++++++++++++------
 .../drm/i915/gt/intel_gt_buffer_pool_types.h  |  6 +-
 2 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 418ae184cecf..0af30238215f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -35,37 +35,51 @@ static void node_free(struct intel_gt_buffer_pool_node *node)
 {
 	i915_gem_object_put(node->obj);
 	i915_active_fini(&node->active);
-	kfree(node);
+	kfree_rcu(node, rcu);
 }
 
 static void pool_free_work(struct work_struct *wrk)
 {
 	struct intel_gt_buffer_pool *pool =
 		container_of(wrk, typeof(*pool), work.work);
-	struct intel_gt_buffer_pool_node *node, *next;
+	struct intel_gt_buffer_pool_node *node, *stale = NULL;
 	unsigned long old = jiffies - HZ;
 	bool active = false;
-	LIST_HEAD(stale);
 	int n;
 
 	/* Free buffers that have not been used in the past second */
-	spin_lock_irq(&pool->lock);
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
 		struct list_head *list = &pool->cache_list[n];
 
-		/* Most recent at head; oldest at tail */
-		list_for_each_entry_safe_reverse(node, next, list, link) {
-			if (time_before(node->age, old))
-				break;
+		if (list_empty(list))
+			continue;
+
+		if (spin_trylock_irq(&pool->lock)) {
+			struct list_head *pos;
+
+			/* Most recent at head; oldest at tail */
+			list_for_each_prev(pos, list) {
+				node = list_entry(pos, typeof(*node), link);
+				if (time_before(node->age, old))
+					break;
 
-			list_move(&node->link, &stale);
+				node->age = 0;
+				node->free = stale;
+				stale = node;
+			}
+			if (!list_is_last(pos, list))
+				__list_del_many(pos, list);
+
+			spin_unlock_irq(&pool->lock);
 		}
+
 		active |= !list_empty(list);
 	}
-	spin_unlock_irq(&pool->lock);
 
-	list_for_each_entry_safe(node, next, &stale, link)
+	while ((node = stale)) {
+		stale = stale->free;
 		node_free(node);
+	}
 
 	if (active)
 		schedule_delayed_work(&pool->work,
@@ -108,9 +122,9 @@ static void pool_retire(struct i915_active *ref)
 	/* Return this object to the shrinker pool */
 	i915_gem_object_make_purgeable(node->obj);
 
+	node->age = jiffies ?: 1;
 	spin_lock_irqsave(&pool->lock, flags);
-	node->age = jiffies;
-	list_add(&node->link, list);
+	list_add_rcu(&node->link, list);
 	spin_unlock_irqrestore(&pool->lock, flags);
 
 	schedule_delayed_work(&pool->work,
@@ -151,22 +165,32 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
 	struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
 	struct intel_gt_buffer_pool_node *node;
 	struct list_head *list;
-	unsigned long flags;
+	bool found = false;
 	int ret;
 
 	size = PAGE_ALIGN(size);
 	list = bucket_for_size(pool, size);
 
-	spin_lock_irqsave(&pool->lock, flags);
-	list_for_each_entry(node, list, link) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(node, list, link) {
+		unsigned long flags;
+
 		if (node->obj->base.size < size)
 			continue;
-		list_del(&node->link);
-		break;
+
+		spin_lock_irqsave(&pool->lock, flags);
+		if (node->age) {
+			list_del_rcu(&node->link);
+			node->age = 0;
+			found = true;
+		}
+		spin_unlock_irqrestore(&pool->lock, flags);
+		if (found)
+			break;
 	}
-	spin_unlock_irqrestore(&pool->lock, flags);
+	rcu_read_unlock();
 
-	if (&node->link == list) {
+	if (!found) {
 		node = node_create(pool, size);
 		if (IS_ERR(node))
 			return node;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
index e28bdda771ed..bcf1658c9633 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
@@ -25,7 +25,11 @@ struct intel_gt_buffer_pool_node {
 	struct i915_active active;
 	struct drm_i915_gem_object *obj;
 	struct list_head link;
-	struct intel_gt_buffer_pool *pool;
+	union {
+		struct intel_gt_buffer_pool *pool;
+		struct intel_gt_buffer_pool_node *free;
+		struct rcu_head rcu;
+	};
 	unsigned long age;
 };
 
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-07-28 19:36 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-24 16:37 [Intel-gfx] [PATCH] drm/i915/gt: Delay taking the spinlock for grabbing from the buffer pool Chris Wilson
2020-07-24 16:44 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for " Patchwork
2020-07-24 17:02 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2020-07-24 17:51 ` [Intel-gfx] [PATCH] " Chris Wilson
2020-07-24 18:05 ` [Intel-gfx] ✗ Fi.CI.SPARSE: warning for drm/i915/gt: Delay taking the spinlock for grabbing from the buffer pool (rev2) Patchwork
2020-07-24 18:31 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2020-07-28 15:59 [Intel-gfx] [PATCH] drm/i915/gt: Delay taking the spinlock for grabbing from the buffer pool Chris Wilson
2020-07-28 18:08 ` Matthew Auld
2020-07-28 18:36 ` Chris Wilson
2020-07-28 19:36 ` Chris Wilson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).