All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zhongkun He <hezhongkun.hzk@bytedance.com>
To: akpm@linux-foundation.org
Cc: hannes@cmpxchg.org, yosryahmed@google.com, nphamcs@gmail.com,
	sjenning@redhat.com, ddstreet@ieee.org, vitaly.wool@konsulko.com,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Zhongkun He <hezhongkun.hzk@bytedance.com>
Subject: [RFC PATCH] zswap: add writeback_time_threshold interface to shrink zswap pool
Date: Wed, 11 Oct 2023 13:11:17 +0800	[thread overview]
Message-ID: <20231011051117.2289518-1-hezhongkun.hzk@bytedance.com> (raw)

zswap does not have a suitable method to select objects that have not
been accessed for a long time, and just shrink the pool when the limit
is hit. There is a high probability of wasting memory in zswap if the
limit is too high.

This patch add a new interface writeback_time_threshold to shrink zswap
pool proactively based on the time threshold in second, e.g.::

echo 600 > /sys/module/zswap/parameters/writeback_time_threshold

If zswap_entrys have not been accessed for more than 600 seconds, they
will be swapout to swap. if set to 0, all of them will be swapout.

Signed-off-by: Zhongkun He <hezhongkun.hzk@bytedance.com>
---
 Documentation/admin-guide/mm/zswap.rst |  9 +++
 mm/zswap.c                             | 76 ++++++++++++++++++++++++++
 2 files changed, 85 insertions(+)

diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst
index 45b98390e938..9ffaed26c3c0 100644
--- a/Documentation/admin-guide/mm/zswap.rst
+++ b/Documentation/admin-guide/mm/zswap.rst
@@ -153,6 +153,15 @@ attribute, e. g.::
 
 Setting this parameter to 100 will disable the hysteresis.
 
+When there is a lot of cold memory according to the store time in the zswap,
+it can be swapout and save memory in userspace proactively. User can write
+writeback time threshold in second to enable it, e.g.::
+
+  echo 600 > /sys/module/zswap/parameters/writeback_time_threshold
+
+If zswap_entrys have not been accessed for more than 600 seconds, they will be
+swapout. if set to 0, all of them will be swapout.
+
 A debugfs interface is provided for various statistic about pool size, number
 of pages stored, same-value filled pages and various counters for the reasons
 pages are rejected.
diff --git a/mm/zswap.c b/mm/zswap.c
index 083c693602b8..c3a19b56a29b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -141,6 +141,16 @@ static bool zswap_exclusive_loads_enabled = IS_ENABLED(
 		CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON);
 module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644);
 
+/* zswap writeback time threshold in second */
+static unsigned int  zswap_writeback_time_thr;
+static int zswap_writeback_time_thr_param_set(const char *, const struct kernel_param *);
+static const struct kernel_param_ops zswap_writeback_param_ops = {
+	.set =		zswap_writeback_time_thr_param_set,
+	.get =          param_get_uint,
+};
+module_param_cb(writeback_time_threshold, &zswap_writeback_param_ops,
+			&zswap_writeback_time_thr, 0644);
+
 /* Number of zpools in zswap_pool (empirically determined for scalability) */
 #define ZSWAP_NR_ZPOOLS 32
 
@@ -197,6 +207,7 @@ struct zswap_pool {
  * value - value of the same-value filled pages which have same content
  * objcg - the obj_cgroup that the compressed memory is charged to
  * lru - handle to the pool's lru used to evict pages.
+ * sto_time - the store time of zswap_entry.
  */
 struct zswap_entry {
 	struct rb_node rbnode;
@@ -210,6 +221,7 @@ struct zswap_entry {
 	};
 	struct obj_cgroup *objcg;
 	struct list_head lru;
+	ktime_t sto_time;
 };
 
 /*
@@ -288,6 +300,31 @@ static void zswap_update_total_size(void)
 	zswap_pool_total_size = total;
 }
 
+static void zswap_reclaim_entry_by_timethr(void);
+
+static bool zswap_reach_timethr(struct zswap_pool *pool)
+{
+	struct zswap_entry *entry;
+	ktime_t expire_time = 0;
+	bool ret = false;
+
+	spin_lock(&pool->lru_lock);
+
+	if (list_empty(&pool->lru))
+		goto out;
+
+	entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
+	expire_time = ktime_add(entry->sto_time,
+			ns_to_ktime(zswap_writeback_time_thr * NSEC_PER_SEC));
+
+	if (ktime_after(ktime_get_boottime(), expire_time))
+		ret = true;
+out:
+	spin_unlock(&pool->lru_lock);
+	return ret;
+}
+
+
 /*********************************
 * zswap entry functions
 **********************************/
@@ -395,6 +432,7 @@ static void zswap_free_entry(struct zswap_entry *entry)
 	else {
 		spin_lock(&entry->pool->lru_lock);
 		list_del(&entry->lru);
+		entry->sto_time = 0;
 		spin_unlock(&entry->pool->lru_lock);
 		zpool_free(zswap_find_zpool(entry), entry->handle);
 		zswap_pool_put(entry->pool);
@@ -709,6 +747,28 @@ static void shrink_worker(struct work_struct *w)
 	zswap_pool_put(pool);
 }
 
+static void zswap_reclaim_entry_by_timethr(void)
+{
+	struct zswap_pool *pool = zswap_pool_current_get();
+	int ret, failures = 0;
+
+	if (!pool)
+		return;
+
+	while (zswap_reach_timethr(pool)) {
+		ret = zswap_reclaim_entry(pool);
+		if (ret) {
+			zswap_reject_reclaim_fail++;
+			if (ret != -EAGAIN)
+				break;
+			if (++failures == MAX_RECLAIM_RETRIES)
+				break;
+		}
+		cond_resched();
+	}
+	zswap_pool_put(pool);
+}
+
 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 {
 	int i;
@@ -1037,6 +1097,21 @@ static int zswap_enabled_param_set(const char *val,
 	return ret;
 }
 
+static int zswap_writeback_time_thr_param_set(const char *val,
+				const struct kernel_param *kp)
+{
+	int ret = -ENODEV;
+
+	/* if this is load-time (pre-init) param setting, just return. */
+	if (system_state != SYSTEM_RUNNING)
+		return ret;
+
+	ret = param_set_uint(val, kp);
+	if (!ret)
+		zswap_reclaim_entry_by_timethr();
+	return ret;
+}
+
 /*********************************
 * writeback code
 **********************************/
@@ -1360,6 +1435,7 @@ bool zswap_store(struct folio *folio)
 	if (entry->length) {
 		spin_lock(&entry->pool->lru_lock);
 		list_add(&entry->lru, &entry->pool->lru);
+		entry->sto_time = ktime_get_boottime();
 		spin_unlock(&entry->pool->lru_lock);
 	}
 	spin_unlock(&tree->lock);
-- 
2.25.1


             reply	other threads:[~2023-10-11  5:11 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-11  5:11 Zhongkun He [this message]
2023-10-11 19:36 ` [RFC PATCH] zswap: add writeback_time_threshold interface to shrink zswap pool Nhat Pham
2023-10-12 14:13   ` [External] " 贺中坤
2023-10-12 14:22     ` Johannes Weiner
2023-10-13 12:59       ` 贺中坤
2023-10-12 18:08     ` Nhat Pham
2023-10-13 13:38       ` 贺中坤
2023-10-13  2:46 ` Yosry Ahmed
2023-10-13 14:02   ` [External] " 贺中坤

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231011051117.2289518-1-hezhongkun.hzk@bytedance.com \
    --to=hezhongkun.hzk@bytedance.com \
    --cc=akpm@linux-foundation.org \
    --cc=ddstreet@ieee.org \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nphamcs@gmail.com \
    --cc=sjenning@redhat.com \
    --cc=vitaly.wool@konsulko.com \
    --cc=yosryahmed@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.