All of lore.kernel.org
 help / color / mirror / Atom feed
From: Honggyu Kim <honggyu.kim@sk.com>
To: sj@kernel.org, damon@lists.linux.dev, linux-mm@kvack.org
Cc: akpm@linux-foundation.org, apopple@nvidia.com,
	baolin.wang@linux.alibaba.com, dave.jiang@intel.com,
	honggyu.kim@sk.com, hyeongtak.ji@sk.com, kernel_team@skhynix.com,
	linmiaohe@huawei.com, linux-kernel@vger.kernel.org,
	linux-trace-kernel@vger.kernel.org, lizhijian@cn.fujitsu.com,
	mathieu.desnoyers@efficios.com, mhiramat@kernel.org,
	rakie.kim@sk.com, rostedt@goodmis.org, surenb@google.com,
	yangx.jy@fujitsu.com, ying.huang@intel.com, ziy@nvidia.com,
	42.hyeyoo@gmail.com
Subject: [PATCH v2 3/7] mm/damon: introduce DAMOS_DEMOTE action for demotion
Date: Mon, 26 Feb 2024 23:05:49 +0900	[thread overview]
Message-ID: <20240226140555.1615-4-honggyu.kim@sk.com> (raw)
In-Reply-To: <20240226140555.1615-1-honggyu.kim@sk.com>

This patch introduces DAMOS_DEMOTE action, which is similar to
DAMOS_PAGEOUT, but demote folios instead of swapping them out.

Since there are some common routines with pageout, many functions have
similar logics between pageout and demote.

damon_pa_migrate_folio_list() is a minimized version of
shrink_folio_list(), but it's minified only for demotion.

Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
Signed-off-by: Hyeongtak Ji <hyeongtak.ji@sk.com>
---
 include/linux/damon.h    |   2 +
 mm/damon/paddr.c         | 222 ++++++++++++++++++++++++++++++++++++++-
 mm/damon/sysfs-schemes.c |   1 +
 3 files changed, 224 insertions(+), 1 deletion(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index e00ddf1ed39c..86e66772766b 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -105,6 +105,7 @@ struct damon_target {
  * @DAMOS_NOHUGEPAGE:	Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
  * @DAMOS_LRU_PRIO:	Prioritize the region on its LRU lists.
  * @DAMOS_LRU_DEPRIO:	Deprioritize the region on its LRU lists.
+ * @DAMOS_DEMOTE:       Do demotion for the given region.
  * @DAMOS_STAT:		Do nothing but count the stat.
  * @NR_DAMOS_ACTIONS:	Total number of DAMOS actions
  *
@@ -122,6 +123,7 @@ enum damos_action {
 	DAMOS_NOHUGEPAGE,
 	DAMOS_LRU_PRIO,
 	DAMOS_LRU_DEPRIO,
+	DAMOS_DEMOTE,
 	DAMOS_STAT,		/* Do nothing but only record the stat */
 	NR_DAMOS_ACTIONS,
 };
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 277a1c4d833c..23e37ce57202 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -12,6 +12,9 @@
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
+#include <linux/memory-tiers.h>
+#include <linux/migrate.h>
+#include <linux/mm_inline.h>
 
 #include "../internal.h"
 #include "ops-common.h"
@@ -226,8 +229,214 @@ static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio)
 
 enum migration_mode {
 	MIG_PAGEOUT,
+	MIG_DEMOTE,
 };
 
+/*
+ * XXX: This is copied from demote_folio_list as renamed as migrate_folio_list.
+ * Take folios on @migrate_folios and attempt to migrate them to another node.
+ * Folios which are not migrated are left on @migrate_folios.
+ */
+static unsigned int migrate_folio_list(struct list_head *migrate_folios,
+				       struct pglist_data *pgdat,
+				       enum migration_mode mm)
+{
+	int target_nid = next_demotion_node(pgdat->node_id);
+	unsigned int nr_succeeded;
+	nodemask_t allowed_mask;
+
+	struct migration_target_control mtc = {
+		/*
+		 * Allocate from 'node', or fail quickly and quietly.
+		 * When this happens, 'page' will likely just be discarded
+		 * instead of migrated.
+		 */
+		.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | __GFP_NOWARN |
+			__GFP_NOMEMALLOC | GFP_NOWAIT,
+		.nid = target_nid,
+		.nmask = &allowed_mask
+	};
+
+	if (pgdat->node_id == target_nid || target_nid == NUMA_NO_NODE)
+		return 0;
+
+	if (list_empty(migrate_folios))
+		return 0;
+
+	node_get_allowed_targets(pgdat, &allowed_mask);
+
+	/* Migration ignores all cpuset and mempolicy settings */
+	migrate_pages(migrate_folios, alloc_migrate_folio, NULL,
+		      (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
+		      &nr_succeeded);
+
+	__count_vm_events(PGDEMOTE_DIRECT, nr_succeeded);
+
+	return nr_succeeded;
+}
+
+enum folio_references {
+	FOLIOREF_RECLAIM,
+	FOLIOREF_KEEP,
+	FOLIOREF_ACTIVATE,
+};
+
+/*
+ * XXX: This is just copied and simplified from folio_check_references at
+ *      mm/vmscan.c but without having scan_control.
+ */
+static enum folio_references folio_check_references(struct folio *folio)
+{
+	int referenced_ptes, referenced_folio;
+	unsigned long vm_flags;
+
+	referenced_ptes = folio_referenced(folio, 1, NULL, &vm_flags);
+	referenced_folio = folio_test_clear_referenced(folio);
+
+	/* rmap lock contention: rotate */
+	if (referenced_ptes == -1)
+		return FOLIOREF_KEEP;
+
+	if (referenced_ptes) {
+		/*
+		 * All mapped folios start out with page table
+		 * references from the instantiating fault, so we need
+		 * to look twice if a mapped file/anon folio is used more
+		 * than once.
+		 *
+		 * Mark it and spare it for another trip around the
+		 * inactive list.  Another page table reference will
+		 * lead to its activation.
+		 *
+		 * Note: the mark is set for activated folios as well
+		 * so that recently deactivated but used folios are
+		 * quickly recovered.
+		 */
+		folio_set_referenced(folio);
+
+		if (referenced_folio || referenced_ptes > 1)
+			return FOLIOREF_ACTIVATE;
+
+		/*
+		 * Activate file-backed executable folios after first usage.
+		 */
+		if ((vm_flags & VM_EXEC) && folio_is_file_lru(folio))
+			return FOLIOREF_ACTIVATE;
+
+		return FOLIOREF_KEEP;
+	}
+
+	return FOLIOREF_RECLAIM;
+}
+
+/*
+ * XXX: This is minimized implmentation based on shrink_folio_list only for
+ *      the demotion calling demote_folio_list.
+ */
+static unsigned int damon_pa_migrate_folio_list(struct list_head *folio_list,
+						struct pglist_data *pgdat,
+						enum migration_mode mm)
+{
+	unsigned int nr_migrated = 0;
+	struct folio *folio;
+	LIST_HEAD(ret_folios);
+	LIST_HEAD(migrate_folios);
+
+	cond_resched();
+
+	while (!list_empty(folio_list)) {
+		struct folio *folio;
+		enum folio_references references;
+
+		cond_resched();
+
+		folio = lru_to_folio(folio_list);
+		list_del(&folio->lru);
+
+		if (!folio_trylock(folio))
+			goto keep;
+
+		VM_BUG_ON_FOLIO(folio_test_active(folio), folio);
+
+		references = folio_check_references(folio);
+		if (references == FOLIOREF_KEEP)
+			goto keep_locked;
+
+		/* Relocate its contents to another node. */
+		list_add(&folio->lru, &migrate_folios);
+		folio_unlock(folio);
+		continue;
+keep_locked:
+		folio_unlock(folio);
+keep:
+		list_add(&folio->lru, &ret_folios);
+		VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
+	}
+	/* 'folio_list' is always empty here */
+
+	/* Migrate folios selected for migration */
+	nr_migrated += migrate_folio_list(&migrate_folios, pgdat, mm);
+	/* Folios that could not be migrated are still in @migrate_folios */
+	if (!list_empty(&migrate_folios)) {
+		/* Folios which weren't migrated go back on @folio_list */
+		list_splice_init(&migrate_folios, folio_list);
+	}
+
+	try_to_unmap_flush();
+
+	list_splice(&ret_folios, folio_list);
+
+	while (!list_empty(folio_list)) {
+		folio = lru_to_folio(folio_list);
+		list_del(&folio->lru);
+		folio_putback_lru(folio);
+	}
+
+	return nr_migrated;
+}
+
+/*
+ * XXX: This is almost identical to reclaim_pages() in mm/vmscan.c, but it
+ *      internally calls damon_pa_migrate_folio_list() instead of
+ *      reclaim_folio_list().  We might be better to think if we can have a
+ *      common function for both cases.
+ */
+static unsigned long damon_pa_migrate_pages(struct list_head *folio_list,
+					    enum migration_mode mm)
+{
+	int nid;
+	unsigned int nr_migrated = 0;
+	LIST_HEAD(node_folio_list);
+	unsigned int noreclaim_flag;
+
+	if (list_empty(folio_list))
+		return nr_migrated;
+
+	noreclaim_flag = memalloc_noreclaim_save();
+
+	nid = folio_nid(lru_to_folio(folio_list));
+	do {
+		struct folio *folio = lru_to_folio(folio_list);
+
+		if (nid == folio_nid(folio)) {
+			folio_clear_active(folio);
+			list_move(&folio->lru, &node_folio_list);
+			continue;
+		}
+
+		nr_migrated += damon_pa_migrate_folio_list(&node_folio_list,
+							   NODE_DATA(nid), mm);
+		nid = folio_nid(lru_to_folio(folio_list));
+	} while (!list_empty(folio_list));
+
+	nr_migrated += damon_pa_migrate_folio_list(&node_folio_list,
+						   NODE_DATA(nid), mm);
+
+	memalloc_noreclaim_restore(noreclaim_flag);
+
+	return nr_migrated;
+}
+
 static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
 				      enum migration_mode mm)
 {
@@ -247,7 +456,11 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
 		folio_test_clear_young(folio);
 		if (!folio_isolate_lru(folio))
 			goto put_folio;
-		if (folio_test_unevictable(folio))
+		/*
+		 * Since unevictable folios can be demoted or promoted,
+		 * unevictable test is needed only for pageout.
+		 */
+		if (mm == MIG_PAGEOUT && folio_test_unevictable(folio))
 			folio_putback_lru(folio);
 		else
 			list_add(&folio->lru, &folio_list);
@@ -258,6 +471,9 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
 	case MIG_PAGEOUT:
 		applied = reclaim_pages(&folio_list);
 		break;
+	case MIG_DEMOTE:
+		applied = damon_pa_migrate_pages(&folio_list, mm);
+		break;
 	default:
 		/* Unexpected migration mode. */
 		return 0;
@@ -314,6 +530,8 @@ static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx,
 		return damon_pa_mark_accessed(r, scheme);
 	case DAMOS_LRU_DEPRIO:
 		return damon_pa_deactivate_pages(r, scheme);
+	case DAMOS_DEMOTE:
+		return damon_pa_migrate(r, scheme, MIG_DEMOTE);
 	case DAMOS_STAT:
 		break;
 	default:
@@ -334,6 +552,8 @@ static int damon_pa_scheme_score(struct damon_ctx *context,
 		return damon_hot_score(context, r, scheme);
 	case DAMOS_LRU_DEPRIO:
 		return damon_cold_score(context, r, scheme);
+	case DAMOS_DEMOTE:
+		return damon_cold_score(context, r, scheme);
 	default:
 		break;
 	}
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index fe0fe2562000..53e47fad5021 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -1186,6 +1186,7 @@ static const char * const damon_sysfs_damos_action_strs[] = {
 	"nohugepage",
 	"lru_prio",
 	"lru_deprio",
+	"demote",
 	"stat",
 };
 
-- 
2.34.1


  parent reply	other threads:[~2024-02-26 14:06 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-26 14:05 [RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory Honggyu Kim
2024-02-26 14:05 ` [PATCH v2 1/7] mm/damon: refactor DAMOS_PAGEOUT with migration_mode Honggyu Kim
2024-02-26 14:05 ` [PATCH v2 2/7] mm: make alloc_demote_folio externally invokable for migration Honggyu Kim
2024-02-26 14:05 ` Honggyu Kim [this message]
2024-02-26 14:05 ` [PATCH v2 4/7] mm/memory-tiers: add next_promotion_node to find promotion target Honggyu Kim
2024-02-26 14:05 ` [PATCH v2 5/7] mm/damon: introduce DAMOS_PROMOTE action for promotion Honggyu Kim
2024-02-26 14:05 ` [PATCH v2 6/7] mm/damon/sysfs-schemes: add target_nid on sysfs-schemes Honggyu Kim
2024-02-26 14:05 ` [PATCH v2 7/7] mm/damon/sysfs-schemes: apply target_nid for promote and demote actions Honggyu Kim
2024-02-27 23:51 ` [RFC PATCH v2 0/7] DAMON based 2-tier memory management for CXL memory SeongJae Park
2024-03-07  3:05   ` SeongJae Park
2024-03-08  8:31     ` Honggyu Kim
2024-03-17  8:36     ` Honggyu Kim
2024-03-17 15:31       ` SeongJae Park
2024-03-17 19:13         ` SeongJae Park
2024-03-18 13:33           ` Honggyu Kim
2024-03-18 13:27         ` Honggyu Kim
2024-03-18 19:07           ` SeongJae Park
2024-03-20  7:07             ` Honggyu Kim
2024-03-20 16:56               ` SeongJae Park
2024-03-22  8:27                 ` Honggyu Kim
2024-03-22 16:39                   ` SeongJae Park
2024-03-22  9:02   ` Honggyu Kim
2024-03-22 16:32     ` SeongJae Park
2024-03-25 12:01       ` Honggyu Kim
2024-03-25 22:53         ` SeongJae Park
2024-03-26 23:03           ` SeongJae Park
2024-04-05 10:13             ` Honggyu Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240226140555.1615-4-honggyu.kim@sk.com \
    --to=honggyu.kim@sk.com \
    --cc=42.hyeyoo@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=damon@lists.linux.dev \
    --cc=dave.jiang@intel.com \
    --cc=hyeongtak.ji@sk.com \
    --cc=kernel_team@skhynix.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=lizhijian@cn.fujitsu.com \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=rakie.kim@sk.com \
    --cc=rostedt@goodmis.org \
    --cc=sj@kernel.org \
    --cc=surenb@google.com \
    --cc=yangx.jy@fujitsu.com \
    --cc=ying.huang@intel.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.