[PATCH rfc 3/3] mm: pcp: show per-order pages count

From: Kefeng Wang <wangkefeng.wang@huawei.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Ryan Roberts <ryan.roberts@arm.com>,
	David Hildenbrand <david@redhat.com>,
	Barry Song <v-songbaohua@oppo.com>,
	Vlastimil Babka <vbabka@suse.cz>, Zi Yan <ziy@nvidia.com>,
	"Matthew Wilcox (Oracle)" <willy@infradead.org>,
	Jonathan Corbet <corbet@lwn.net>, Yang Shi <shy828301@gmail.com>,
	Yu Zhao <yuzhao@google.com>, <linux-mm@kvack.org>,
	Kefeng Wang <wangkefeng.wang@huawei.com>
Subject: [PATCH rfc 3/3] mm: pcp: show per-order pages count
Date: Mon, 15 Apr 2024 16:12:20 +0800	[thread overview]
Message-ID: <20240415081220.3246839-4-wangkefeng.wang@huawei.com> (raw)
In-Reply-To: <20240415081220.3246839-1-wangkefeng.wang@huawei.com>

THIS IS ONLY FOR DEBUG.

Show more detail about per-order page count on each cpu in zoneinfo, and
a new pcp_order_stat shows the total counts of each hugepage size in sysfs.

  #cat /proc/zoneinfo
    ....
  cpu: 15
            count: 275
            high:  529
            batch: 63
            order0: 59
            order1: 28
            order2: 28
            order3: 6
            order4: 0
            order5: 0
            order6: 0
            order7: 0
            order8: 0
            order9: 0

  #cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/pcp_order_stat
  10

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/mmzone.h |  6 ++++++
 include/linux/vmstat.h | 19 +++++++++++++++++++
 mm/Kconfig.debug       |  8 ++++++++
 mm/huge_memory.c       | 27 +++++++++++++++++++++++++++
 mm/page_alloc.c        |  4 ++++
 mm/vmstat.c            | 16 ++++++++++++++++
 6 files changed, 80 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c745e2f1a0f2..c32c01468a77 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -665,6 +665,9 @@ enum zone_watermarks {
 #define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1))
 #define HIGHORDER_PCP_LIST_INDEX (NR_LOWORDER_PCP_LISTS - (PAGE_ALLOC_COSTLY_ORDER + 1))
 #define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP)
+#ifdef CONFIG_PCP_ORDER_STATS
+#define NR_PCP_ORDER (PAGE_ALLOC_COSTLY_ORDER + NR_PCP_THP + 1)
+#endif
 
 #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
 #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
@@ -702,6 +705,9 @@ struct per_cpu_pages {
 
 	/* Lists of pages, one per migrate type stored on the pcp-lists */
 	struct list_head lists[NR_PCP_LISTS];
+#ifdef CONFIG_PCP_ORDER_STATS
+	int per_order_count[NR_PCP_ORDER]; /* per-order page counts */
+#endif
 } ____cacheline_aligned_in_smp;
 
 struct per_cpu_zonestat {
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 735eae6e272c..91843f2d327f 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -624,4 +624,23 @@ static inline void lruvec_stat_sub_folio(struct folio *folio,
 {
 	lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
 }
+
+static inline void pcp_order_stat_mod(struct per_cpu_pages *pcp, int order,
+				      int val)
+{
+#ifdef CONFIG_PCP_ORDER_STATS
+	pcp->per_order_count[order] += val;
+#endif
+}
+
+static inline void pcp_order_stat_inc(struct per_cpu_pages *pcp, int order)
+{
+	pcp_order_stat_mod(pcp, order, 1);
+}
+
+static inline void pcp_order_stat_dec(struct per_cpu_pages *pcp, int order)
+{
+	pcp_order_stat_mod(pcp, order, -1);
+}
+
 #endif /* _LINUX_VMSTAT_H */
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index afc72fde0f03..57eef0ce809b 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -276,3 +276,11 @@ config PER_VMA_LOCK_STATS
 	  overhead in the page fault path.
 
 	  If in doubt, say N.
+
+config PCP_ORDER_STATS
+	bool "Statistics for per-order of PCP (Per-CPU pageset)"
+	help
+	  Say Y to show per-order statistics of Per-CPU pageset from zoneinfo
+	  and pcp_order_stat in sysfs.
+
+	  If in doubt, say N.
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9b8a8aa36526..0c6262bb8fe4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -599,12 +599,39 @@ DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
 DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
 DEFINE_MTHP_STAT_ATTR(anon_swpin_refault, MTHP_STAT_ANON_SWPIN_REFAULT);
 
+#ifdef CONFIG_PCP_ORDER_STATS
+static ssize_t pcp_order_stat_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	int order = to_thpsize(kobj)->order;
+	unsigned int counts = 0;
+	struct zone *zone;
+
+	for_each_populated_zone(zone) {
+		struct per_cpu_pages *pcp;
+		int i;
+
+		for_each_online_cpu(i) {
+			pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
+			counts += pcp->per_order_count[order];
+		}
+	}
+
+	return sysfs_emit(buf, "%u\n", counts);
+}
+
+static struct kobj_attribute pcp_order_stat_attr = __ATTR_RO(pcp_order_stat);
+#endif
+
 static struct attribute *stats_attrs[] = {
 	&anon_alloc_attr.attr,
 	&anon_alloc_fallback_attr.attr,
 	&anon_swpout_attr.attr,
 	&anon_swpout_fallback_attr.attr,
 	&anon_swpin_refault_attr.attr,
+#ifdef CONFIG_PCP_ORDER_STATS
+	&pcp_order_stat_attr.attr,
+#endif
 	NULL,
 };
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 25fd3fe30cb0..f44cdf8dec50 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1185,6 +1185,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 			list_del(&page->pcp_list);
 			count -= nr_pages;
 			pcp->count -= nr_pages;
+			pcp_order_stat_dec(pcp, order);
 
 			__free_one_page(page, pfn, zone, order, mt, FPI_NONE);
 			trace_mm_page_pcpu_drain(page, order, mt);
@@ -2560,6 +2561,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
 	pindex = order_to_pindex(migratetype, order);
 	list_add(&page->pcp_list, &pcp->lists[pindex]);
 	pcp->count += 1 << order;
+	pcp_order_stat_inc(pcp, order);
 
 	batch = READ_ONCE(pcp->batch);
 	/*
@@ -2957,6 +2959,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 					migratetype, alloc_flags);
 
 			pcp->count += alloced << order;
+			pcp_order_stat_mod(pcp, order, alloced);
 			if (unlikely(list_empty(list)))
 				return NULL;
 		}
@@ -2964,6 +2967,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 		page = list_first_entry(list, struct page, pcp_list);
 		list_del(&page->pcp_list);
 		pcp->count -= 1 << order;
+		pcp_order_stat_dec(pcp, order);
 	} while (check_new_pages(page, order));
 
 	return page;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index db79935e4a54..632bb1ed6a53 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1674,6 +1674,19 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
 	return false;
 }
 
+static void zoneinfo_show_pcp_order_stat(struct seq_file *m,
+					 struct per_cpu_pages *pcp)
+{
+#ifdef CONFIG_PCP_ORDER_STATS
+	int j;
+
+	for (j = 0; j < NR_PCP_ORDER; j++)
+		seq_printf(m,
+			   "\n              order%d: %i",
+			   j, pcp->per_order_count[j]);
+#endif
+}
+
 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 							struct zone *zone)
 {
@@ -1748,6 +1761,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 			   pcp->count,
 			   pcp->high,
 			   pcp->batch);
+
+		zoneinfo_show_pcp_order_stat(m, pcp);
+
 #ifdef CONFIG_SMP
 		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
 		seq_printf(m, "\n  vm stats threshold: %d",
-- 
2.27.0