All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nhat Pham <nphamcs@gmail.com>
To: akpm@linux-foundation.org
Cc: hannes@cmpxchg.org, cerasuolodomenico@gmail.com,
	yosryahmed@google.com, sjenning@redhat.com, ddstreet@ieee.org,
	vitaly.wool@konsulko.com, mhocko@kernel.org,
	roman.gushchin@linux.dev, shakeelb@google.com,
	muchun.song@linux.dev, linux-mm@kvack.org, kernel-team@meta.com,
	linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org,
	shuah@kernel.org
Subject: [PATCH v3 1/5] mm: list_lru: allow external numa node and cgroup tracking
Date: Tue, 17 Oct 2023 16:21:48 -0700	[thread overview]
Message-ID: <20231017232152.2605440-2-nphamcs@gmail.com> (raw)
In-Reply-To: <20231017232152.2605440-1-nphamcs@gmail.com>

The interface of list_lru is based on the assumption that objects are
allocated on the correct node/memcg, with this change it is introduced the
possibility to explicitly specify numa node and memcgroup when adding and
removing objects. This is so that users of list_lru can track node/memcg
of the items outside of the list_lru, like in zswap, where the allocations
can be made by kswapd for data that's charged to a different cgroup.

Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 include/linux/list_lru.h | 38 +++++++++++++++++++++++++++++++++++
 mm/list_lru.c            | 43 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index b35968ee9fb5..0f5f39cacbbb 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -89,6 +89,24 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren
  */
 bool list_lru_add(struct list_lru *lru, struct list_head *item);
 
+/**
+ * __list_lru_add: add an element to a specific sublist.
+ * @list_lru: the lru pointer
+ * @item: the item to be added.
+ * @memcg: the cgroup of the sublist to add the item to.
+ * @nid: the node id of the sublist to add the item to.
+ *
+ * This function is similar to list_lru_add(), but it allows the caller to
+ * specify the sublist to which the item should be added. This can be useful
+ * when the list_head node is not necessarily in the same cgroup and NUMA node
+ * as the data it represents, such as zswap, where the list_head node could be
+ * from kswapd and the data from a different cgroup altogether.
+ *
+ * Return value: true if the list was updated, false otherwise
+ */
+bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
+		    struct mem_cgroup *memcg);
+
 /**
  * list_lru_del: delete an element to the lru list
  * @list_lru: the lru pointer
@@ -102,6 +120,18 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item);
  */
 bool list_lru_del(struct list_lru *lru, struct list_head *item);
 
+/**
+ * __list_lru_del: delete an element from a specific sublist.
+ * @list_lru: the lru pointer
+ * @item: the item to be deleted.
+ * @memcg: the cgroup of the sublist to delete the item from.
+ * @nid: the node id of the sublist to delete the item from.
+ *
+ * Return value: true if the list was updated, false otherwise.
+ */
+bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
+		    struct mem_cgroup *memcg);
+
 /**
  * list_lru_count_one: return the number of objects currently held by @lru
  * @lru: the lru pointer.
@@ -136,6 +166,14 @@ static inline unsigned long list_lru_count(struct list_lru *lru)
 void list_lru_isolate(struct list_lru_one *list, struct list_head *item);
 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
 			   struct list_head *head);
+/*
+ * list_lru_putback: undo list_lru_isolate.
+ *
+ * Since we might have dropped the LRU lock in between, recompute list_lru_one
+ * from the node's id and memcg.
+ */
+void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid,
+		      struct mem_cgroup *memcg);
 
 typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item,
 		struct list_lru_one *list, spinlock_t *lock, void *cb_arg);
diff --git a/mm/list_lru.c b/mm/list_lru.c
index a05e5bef3b40..63b75163c6ad 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -119,13 +119,22 @@ list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
 bool list_lru_add(struct list_lru *lru, struct list_head *item)
 {
 	int nid = page_to_nid(virt_to_page(item));
+	struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ?
+		mem_cgroup_from_slab_obj(item) : NULL;
+
+	return __list_lru_add(lru, item, nid, memcg);
+}
+EXPORT_SYMBOL_GPL(list_lru_add);
+
+bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
+		    struct mem_cgroup *memcg)
+{
 	struct list_lru_node *nlru = &lru->node[nid];
-	struct mem_cgroup *memcg;
 	struct list_lru_one *l;
 
 	spin_lock(&nlru->lock);
 	if (list_empty(item)) {
-		l = list_lru_from_kmem(lru, nid, item, &memcg);
+		l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
 		list_add_tail(item, &l->list);
 		/* Set shrinker bit if the first element was added */
 		if (!l->nr_items++)
@@ -138,17 +147,27 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
 	spin_unlock(&nlru->lock);
 	return false;
 }
-EXPORT_SYMBOL_GPL(list_lru_add);
+EXPORT_SYMBOL_GPL(__list_lru_add);
 
 bool list_lru_del(struct list_lru *lru, struct list_head *item)
 {
 	int nid = page_to_nid(virt_to_page(item));
+	struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ?
+		mem_cgroup_from_slab_obj(item) : NULL;
+
+	return __list_lru_del(lru, item, nid, memcg);
+}
+EXPORT_SYMBOL_GPL(list_lru_del);
+
+bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
+		    struct mem_cgroup *memcg)
+{
 	struct list_lru_node *nlru = &lru->node[nid];
 	struct list_lru_one *l;
 
 	spin_lock(&nlru->lock);
 	if (!list_empty(item)) {
-		l = list_lru_from_kmem(lru, nid, item, NULL);
+		l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
 		list_del_init(item);
 		l->nr_items--;
 		nlru->nr_items--;
@@ -158,7 +177,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item)
 	spin_unlock(&nlru->lock);
 	return false;
 }
-EXPORT_SYMBOL_GPL(list_lru_del);
+EXPORT_SYMBOL_GPL(__list_lru_del);
 
 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
 {
@@ -175,6 +194,20 @@ void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
 }
 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
 
+void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid,
+		      struct mem_cgroup *memcg)
+{
+	struct list_lru_one *list =
+		list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
+
+	if (list_empty(item)) {
+		list_add_tail(item, &list->list);
+		if (!list->nr_items++)
+			set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
+	}
+}
+EXPORT_SYMBOL_GPL(list_lru_putback);
+
 unsigned long list_lru_count_one(struct list_lru *lru,
 				 int nid, struct mem_cgroup *memcg)
 {
-- 
2.34.1

  reply	other threads:[~2023-10-17 23:22 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-17 23:21 [PATCH v3 0/5] workload-specific and memory pressure-driven zswap writeback Nhat Pham
2023-10-17 23:21 ` Nhat Pham [this message]
2023-10-18 22:26   ` [PATCH v3 1/5] mm: list_lru: allow external numa node and cgroup tracking Yosry Ahmed
2023-10-18 23:09     ` Nhat Pham
2023-10-17 23:21 ` [PATCH v3 2/5] zswap: make shrinking memcg-aware Nhat Pham
2023-10-18 23:20   ` Yosry Ahmed
2023-10-18 23:46     ` Nhat Pham
2023-10-18 23:48       ` Nhat Pham
2023-10-19  1:11       ` Yosry Ahmed
2023-10-19 12:47         ` Domenico Cerasuolo
2023-10-19 16:28           ` Yosry Ahmed
2023-10-19 12:29     ` Domenico Cerasuolo
2023-10-19 16:14       ` Yosry Ahmed
2023-10-20 19:58         ` Nhat Pham
2023-10-17 23:21 ` [PATCH v3 3/5] mm: memcg: add per-memcg zswap writeback stat Nhat Pham
2023-10-17 23:35   ` Nhat Pham
2023-10-17 23:37     ` Jeff Johnson
2023-10-17 23:40       ` Nhat Pham
2023-10-18 23:24   ` Yosry Ahmed
2023-10-18 23:50     ` Nhat Pham
2023-10-17 23:21 ` [PATCH v3 4/5] selftests: cgroup: update per-memcg zswap writeback selftest Nhat Pham
2023-10-17 23:34   ` Nhat Pham
2023-10-17 23:44     ` Nhat Pham
2023-10-17 23:21 ` [PATCH v3 5/5] zswap: shrinks zswap pool based on memory pressure Nhat Pham
2023-10-18 23:36   ` Yosry Ahmed
2023-10-20 19:14     ` Nhat Pham
2023-10-19 17:12 ` [PATCH v3 0/5] workload-specific and memory pressure-driven zswap writeback Andrew Morton
2023-10-19 17:33   ` Yosry Ahmed
2023-10-19 18:31     ` Nhat Pham
2023-10-19 18:36       ` Andrew Morton
2023-10-19 19:23         ` Hugh Dickins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231017232152.2605440-2-nphamcs@gmail.com \
    --to=nphamcs@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=cerasuolodomenico@gmail.com \
    --cc=cgroups@vger.kernel.org \
    --cc=ddstreet@ieee.org \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@meta.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeelb@google.com \
    --cc=shuah@kernel.org \
    --cc=sjenning@redhat.com \
    --cc=vitaly.wool@konsulko.com \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.