From: Chunsheng Luo <luochunsheng@ustc.edu>
To: gregkh@linuxfoundation.org
Cc: rafael@kernel.org, akpm@linux-foundation.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Chunsheng Luo <luochunsheng@ustc.edu>
Subject: [PATCH] meminfo: provide estimated per-node's available memory
Date: Sun, 4 Feb 2024 03:34:14 -0500 [thread overview]
Message-ID: <20240204083414.107799-1-luochunsheng@ustc.edu> (raw)
The system offers an estimate of the per-node's available memory,
in addition to the system's available memory provided by /proc/meminfo.
like commit 34e431b0ae39("/proc/meminfo: provide estimated available
memory"), it is more convenient to provide such an estimate in
/sys/bus/node/devices/nodex/meminfo. If things change in the future,
we only have to change it in one place.
Shown below:
/sys/bus/node/devices/node1/meminfo:
Node 1 MemTotal: 4084480 kB
Node 1 MemFree: 3348820 kB
Node 1 MemAvailable: 3647972 kB
Node 1 MemUsed: 735660 kB
....
Link: https://github.com/numactl/numactl/issues/210
Signed-off-by: Chunsheng Luo <luochunsheng@ustc.edu>
---
drivers/base/node.c | 4 ++++
include/linux/mm.h | 1 +
mm/show_mem.c | 43 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 48 insertions(+)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 1c05640461dd..ba27f25d2b81 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -372,11 +372,13 @@ static ssize_t node_read_meminfo(struct device *dev,
int len = 0;
int nid = dev->id;
struct pglist_data *pgdat = NODE_DATA(nid);
+ long available;
struct sysinfo i;
unsigned long sreclaimable, sunreclaimable;
unsigned long swapcached = 0;
si_meminfo_node(&i, nid);
+ available = si_mem_node_available(nid);
sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B);
sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B);
#ifdef CONFIG_SWAP
@@ -385,6 +387,7 @@ static ssize_t node_read_meminfo(struct device *dev,
len = sysfs_emit_at(buf, len,
"Node %d MemTotal: %8lu kB\n"
"Node %d MemFree: %8lu kB\n"
+ "Node %d MemAvailable: %8lu kB\n"
"Node %d MemUsed: %8lu kB\n"
"Node %d SwapCached: %8lu kB\n"
"Node %d Active: %8lu kB\n"
@@ -397,6 +400,7 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d Mlocked: %8lu kB\n",
nid, K(i.totalram),
nid, K(i.freeram),
+ nid, K(available),
nid, K(i.totalram - i.freeram),
nid, K(swapcached),
nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f5a97dec5169..3caef083fe5b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3202,6 +3202,7 @@ static inline void show_mem(void)
extern long si_mem_available(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
+extern long si_mem_node_available(int nid);
#ifdef __HAVE_ARCH_RESERVED_KERNEL_PAGES
extern unsigned long arch_reserved_kernel_pages(void);
#endif
diff --git a/mm/show_mem.c b/mm/show_mem.c
index 8dcfafbd283c..37d4c7212b06 100644
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -86,6 +86,49 @@ void si_meminfo(struct sysinfo *val)
EXPORT_SYMBOL(si_meminfo);
#ifdef CONFIG_NUMA
+long si_mem_node_available(int nid)
+{
+ int zone_type;
+ long available;
+ unsigned long pagecache;
+ unsigned long wmark_low = 0;
+ unsigned long reclaimable;
+ pg_data_t *pgdat = NODE_DATA(nid);
+
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+ wmark_low += low_wmark_pages((&pgdat->node_zones[zone_type]));
+
+ /*
+ * Estimate the amount of memory available for userspace allocations,
+ * without causing swapping for mbind process.
+ */
+ available = sum_zone_node_page_state(nid, NR_FREE_PAGES) - pgdat->totalreserve_pages;
+
+ /*
+ * Not all the page cache can be freed, otherwise the system will
+ * start swapping or thrashing. Assume at least half of the page
+ * cache, or the low watermark worth of cache, needs to stay.
+ */
+ pagecache = node_page_state(pgdat, NR_ACTIVE_FILE) +
+ node_page_state(pgdat, NR_INACTIVE_FILE);
+ pagecache -= min(pagecache / 2, wmark_low);
+ available += pagecache;
+
+ /*
+ * Part of the reclaimable slab and other kernel memory consists of
+ * items that are in use, and cannot be freed. Cap this estimate at the
+ * low watermark.
+ */
+ reclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B) +
+ node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE);
+ reclaimable -= min(reclaimable / 2, wmark_low);
+ available += reclaimable;
+
+ if (available < 0)
+ available = 0;
+ return available;
+}
+
void si_meminfo_node(struct sysinfo *val, int nid)
{
int zone_type; /* needs to be signed */
--
2.43.0
next reply other threads:[~2024-02-04 8:34 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-04 8:34 Chunsheng Luo [this message]
2024-02-09 9:56 ` [PATCH] meminfo: provide estimated per-node's available memory Andi Kleen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240204083414.107799-1-luochunsheng@ustc.edu \
--to=luochunsheng@ustc.edu \
--cc=akpm@linux-foundation.org \
--cc=gregkh@linuxfoundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=rafael@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).