All of lore.kernel.org
 help / color / mirror / Atom feed
From: Liu Shixin <liushixin2@huawei.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	huang ying <huang.ying.caritas@gmail.com>,
	Aaron Lu <aaron.lu@intel.com>,
	Dave Hansen <dave.hansen@intel.com>,
	"Michal Hocko" <mhocko@suse.com>,
	Jesper Dangaard Brouer <brouer@redhat.com>,
	Vlastimil Babka <vbabka@suse.cz>, Kemi Wang <kemi.wang@intel.com>,
	"Kefeng Wang" <wangkefeng.wang@huawei.com>
Cc: <linux-kernel@vger.kernel.org>, <linux-mm@kvack.org>,
	Liu Shixin <liushixin2@huawei.com>
Subject: [PATCH -next v2] mm, proc: collect percpu free pages into the free pages
Date: Mon, 22 Aug 2022 11:33:54 +0800	[thread overview]
Message-ID: <20220822033354.952849-1-liushixin2@huawei.com> (raw)
In-Reply-To: <20220822023311.909316-1-liushixin2@huawei.com>

The page on pcplist could be used, but not counted into memory free or
avaliable, and pcp_free is only showed by show_mem() for now. Since commit
d8a759b57035 ("mm, page_alloc: double zone's batchsize"), there is a
significant decrease in the display of free memory, with a large number
of cpus and zones, the number of pages in the percpu list can be very
large, so it is better to let user to know the pcp count.

On a machine with 3 zones and 72 CPUs. Before commit d8a759b57035, the
maximum amount of pages in the pcp lists was theoretically 162MB(3*72*768KB).
After the patch, the lists can hold 324MB. It has been observed to be 114MB
in the idle state after system startup in practice(increased 80 MB).

Signed-off-by: Liu Shixin <liushixin2@huawei.com>
---
 mm/page_alloc.c | 51 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 17 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 033f1e26d15b..f89928d3ad4e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5853,6 +5853,26 @@ static unsigned long nr_free_zone_pages(int offset)
 	return sum;
 }
 
+static unsigned long nr_free_zone_pcplist_pages(struct zone *zone)
+{
+	unsigned long sum = 0;
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		sum += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+	return sum;
+}
+
+static unsigned long nr_free_pcplist_pages(void)
+{
+	unsigned long sum = 0;
+	struct zone *zone;
+
+	for_each_zone(zone)
+		sum += nr_free_zone_pcplist_pages(zone);
+	return sum;
+}
+
 /**
  * nr_free_buffer_pages - count number of pages beyond high watermark
  *
@@ -5894,7 +5914,8 @@ long si_mem_available(void)
 	 * Estimate the amount of memory available for userspace allocations,
 	 * without causing swapping or OOM.
 	 */
-	available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages;
+	available = global_zone_page_state(NR_FREE_PAGES) +
+		    nr_free_pcplist_pages() - totalreserve_pages;
 
 	/*
 	 * Not all the page cache can be freed, otherwise the system will
@@ -5924,7 +5945,8 @@ void si_meminfo(struct sysinfo *val)
 {
 	val->totalram = totalram_pages();
 	val->sharedram = global_node_page_state(NR_SHMEM);
-	val->freeram = global_zone_page_state(NR_FREE_PAGES);
+	val->freeram = global_zone_page_state(NR_FREE_PAGES) +
+		       nr_free_pcplist_pages();
 	val->bufferram = nr_blockdev_pages();
 	val->totalhigh = totalhigh_pages();
 	val->freehigh = nr_free_highpages();
@@ -5938,30 +5960,28 @@ void si_meminfo_node(struct sysinfo *val, int nid)
 {
 	int zone_type;		/* needs to be signed */
 	unsigned long managed_pages = 0;
+	unsigned long free_pages = sum_zone_node_page_state(nid, NR_FREE_PAGES);
 	unsigned long managed_highpages = 0;
 	unsigned long free_highpages = 0;
 	pg_data_t *pgdat = NODE_DATA(nid);
 
-	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
-		managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
-	val->totalram = managed_pages;
-	val->sharedram = node_page_state(pgdat, NR_SHMEM);
-	val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
-#ifdef CONFIG_HIGHMEM
 	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
 		struct zone *zone = &pgdat->node_zones[zone_type];
 
+		managed_pages += zone_managed_pages(zone);
+		free_pages += nr_free_zone_pcplist_pages(zone);
+#ifdef CONFIG_HIGHMEM
 		if (is_highmem(zone)) {
 			managed_highpages += zone_managed_pages(zone);
 			free_highpages += zone_page_state(zone, NR_FREE_PAGES);
 		}
+#endif
 	}
+	val->totalram = managed_pages;
+	val->sharedram = node_page_state(pgdat, NR_SHMEM);
+	val->freeram = free_pages;
 	val->totalhigh = managed_highpages;
 	val->freehigh = free_highpages;
-#else
-	val->totalhigh = managed_highpages;
-	val->freehigh = free_highpages;
-#endif
 	val->mem_unit = PAGE_SIZE;
 }
 #endif
@@ -6035,8 +6055,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 		if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
 			continue;
 
-		for_each_online_cpu(cpu)
-			free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+		free_pcp += nr_free_zone_pcplist_pages(zone);
 	}
 
 	printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
@@ -6128,9 +6147,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 		if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
 			continue;
 
-		free_pcp = 0;
-		for_each_online_cpu(cpu)
-			free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+		free_pcp = nr_free_zone_pcplist_pages(zone);
 
 		show_node(zone);
 		printk(KERN_CONT
-- 
2.25.1


  reply	other threads:[~2022-08-22  2:59 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-22  2:33 [PATCH -next] mm, proc: collect percpu free pages into the free pages Liu Shixin
2022-08-22  3:33 ` Liu Shixin [this message]
2022-08-22 21:12   ` [PATCH -next v2] " Andrew Morton
2022-08-22 21:13     ` Andrew Morton
2022-08-23 13:12       ` Liu Shixin
2022-08-23  7:50     ` Michal Hocko
2022-08-23 12:46       ` Liu Shixin
2022-08-23 13:37         ` Michal Hocko
2022-08-24 10:05           ` Liu Shixin
2022-08-24 10:12             ` Michal Hocko
2023-11-24 17:54           ` Dmytro Maluka
2023-11-25  2:22             ` Kefeng Wang
2023-11-27  8:50             ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220822033354.952849-1-liushixin2@huawei.com \
    --to=liushixin2@huawei.com \
    --cc=aaron.lu@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=brouer@redhat.com \
    --cc=dave.hansen@intel.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=huang.ying.caritas@gmail.com \
    --cc=kemi.wang@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=vbabka@suse.cz \
    --cc=wangkefeng.wang@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.