All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kefeng Wang <wangkefeng.wang@huawei.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Ryan Roberts <ryan.roberts@arm.com>,
	David Hildenbrand <david@redhat.com>,
	Barry Song <v-songbaohua@oppo.com>,
	Vlastimil Babka <vbabka@suse.cz>, Zi Yan <ziy@nvidia.com>,
	"Matthew Wilcox (Oracle)" <willy@infradead.org>,
	Jonathan Corbet <corbet@lwn.net>, Yang Shi <shy828301@gmail.com>,
	Yu Zhao <yuzhao@google.com>, <linux-mm@kvack.org>,
	Kefeng Wang <wangkefeng.wang@huawei.com>
Subject: [PATCH rfc 3/3] mm: pcp: show per-order pages count
Date: Mon, 15 Apr 2024 16:12:20 +0800	[thread overview]
Message-ID: <20240415081220.3246839-4-wangkefeng.wang@huawei.com> (raw)
In-Reply-To: <20240415081220.3246839-1-wangkefeng.wang@huawei.com>

THIS IS ONLY FOR DEBUG.

Show more detail about per-order page count on each cpu in zoneinfo, and
a new pcp_order_stat shows the total counts of each hugepage size in sysfs.

  #cat /proc/zoneinfo
    ....
  cpu: 15
            count: 275
            high:  529
            batch: 63
            order0: 59
            order1: 28
            order2: 28
            order3: 6
            order4: 0
            order5: 0
            order6: 0
            order7: 0
            order8: 0
            order9: 0

  #cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/pcp_order_stat
  10

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/mmzone.h |  6 ++++++
 include/linux/vmstat.h | 19 +++++++++++++++++++
 mm/Kconfig.debug       |  8 ++++++++
 mm/huge_memory.c       | 27 +++++++++++++++++++++++++++
 mm/page_alloc.c        |  4 ++++
 mm/vmstat.c            | 16 ++++++++++++++++
 6 files changed, 80 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c745e2f1a0f2..c32c01468a77 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -665,6 +665,9 @@ enum zone_watermarks {
 #define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1))
 #define HIGHORDER_PCP_LIST_INDEX (NR_LOWORDER_PCP_LISTS - (PAGE_ALLOC_COSTLY_ORDER + 1))
 #define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP)
+#ifdef CONFIG_PCP_ORDER_STATS
+#define NR_PCP_ORDER (PAGE_ALLOC_COSTLY_ORDER + NR_PCP_THP + 1)
+#endif
 
 #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
 #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
@@ -702,6 +705,9 @@ struct per_cpu_pages {
 
 	/* Lists of pages, one per migrate type stored on the pcp-lists */
 	struct list_head lists[NR_PCP_LISTS];
+#ifdef CONFIG_PCP_ORDER_STATS
+	int per_order_count[NR_PCP_ORDER]; /* per-order page counts */
+#endif
 } ____cacheline_aligned_in_smp;
 
 struct per_cpu_zonestat {
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 735eae6e272c..91843f2d327f 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -624,4 +624,23 @@ static inline void lruvec_stat_sub_folio(struct folio *folio,
 {
 	lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
 }
+
+static inline void pcp_order_stat_mod(struct per_cpu_pages *pcp, int order,
+				      int val)
+{
+#ifdef CONFIG_PCP_ORDER_STATS
+	pcp->per_order_count[order] += val;
+#endif
+}
+
+static inline void pcp_order_stat_inc(struct per_cpu_pages *pcp, int order)
+{
+	pcp_order_stat_mod(pcp, order, 1);
+}
+
+static inline void pcp_order_stat_dec(struct per_cpu_pages *pcp, int order)
+{
+	pcp_order_stat_mod(pcp, order, -1);
+}
+
 #endif /* _LINUX_VMSTAT_H */
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index afc72fde0f03..57eef0ce809b 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -276,3 +276,11 @@ config PER_VMA_LOCK_STATS
 	  overhead in the page fault path.
 
 	  If in doubt, say N.
+
+config PCP_ORDER_STATS
+	bool "Statistics for per-order of PCP (Per-CPU pageset)"
+	help
+	  Say Y to show per-order statistics of Per-CPU pageset from zoneinfo
+	  and pcp_order_stat in sysfs.
+
+	  If in doubt, say N.
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9b8a8aa36526..0c6262bb8fe4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -599,12 +599,39 @@ DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
 DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
 DEFINE_MTHP_STAT_ATTR(anon_swpin_refault, MTHP_STAT_ANON_SWPIN_REFAULT);
 
+#ifdef CONFIG_PCP_ORDER_STATS
+static ssize_t pcp_order_stat_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	int order = to_thpsize(kobj)->order;
+	unsigned int counts = 0;
+	struct zone *zone;
+
+	for_each_populated_zone(zone) {
+		struct per_cpu_pages *pcp;
+		int i;
+
+		for_each_online_cpu(i) {
+			pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
+			counts += pcp->per_order_count[order];
+		}
+	}
+
+	return sysfs_emit(buf, "%u\n", counts);
+}
+
+static struct kobj_attribute pcp_order_stat_attr = __ATTR_RO(pcp_order_stat);
+#endif
+
 static struct attribute *stats_attrs[] = {
 	&anon_alloc_attr.attr,
 	&anon_alloc_fallback_attr.attr,
 	&anon_swpout_attr.attr,
 	&anon_swpout_fallback_attr.attr,
 	&anon_swpin_refault_attr.attr,
+#ifdef CONFIG_PCP_ORDER_STATS
+	&pcp_order_stat_attr.attr,
+#endif
 	NULL,
 };
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 25fd3fe30cb0..f44cdf8dec50 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1185,6 +1185,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 			list_del(&page->pcp_list);
 			count -= nr_pages;
 			pcp->count -= nr_pages;
+			pcp_order_stat_dec(pcp, order);
 
 			__free_one_page(page, pfn, zone, order, mt, FPI_NONE);
 			trace_mm_page_pcpu_drain(page, order, mt);
@@ -2560,6 +2561,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
 	pindex = order_to_pindex(migratetype, order);
 	list_add(&page->pcp_list, &pcp->lists[pindex]);
 	pcp->count += 1 << order;
+	pcp_order_stat_inc(pcp, order);
 
 	batch = READ_ONCE(pcp->batch);
 	/*
@@ -2957,6 +2959,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 					migratetype, alloc_flags);
 
 			pcp->count += alloced << order;
+			pcp_order_stat_mod(pcp, order, alloced);
 			if (unlikely(list_empty(list)))
 				return NULL;
 		}
@@ -2964,6 +2967,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
 		page = list_first_entry(list, struct page, pcp_list);
 		list_del(&page->pcp_list);
 		pcp->count -= 1 << order;
+		pcp_order_stat_dec(pcp, order);
 	} while (check_new_pages(page, order));
 
 	return page;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index db79935e4a54..632bb1ed6a53 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1674,6 +1674,19 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
 	return false;
 }
 
+static void zoneinfo_show_pcp_order_stat(struct seq_file *m,
+					 struct per_cpu_pages *pcp)
+{
+#ifdef CONFIG_PCP_ORDER_STATS
+	int j;
+
+	for (j = 0; j < NR_PCP_ORDER; j++)
+		seq_printf(m,
+			   "\n              order%d: %i",
+			   j, pcp->per_order_count[j]);
+#endif
+}
+
 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 							struct zone *zone)
 {
@@ -1748,6 +1761,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 			   pcp->count,
 			   pcp->high,
 			   pcp->batch);
+
+		zoneinfo_show_pcp_order_stat(m, pcp);
+
 #ifdef CONFIG_SMP
 		pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
 		seq_printf(m, "\n  vm stats threshold: %d",
-- 
2.27.0



  parent reply	other threads:[~2024-04-15  8:12 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-15  8:12 [PATCH rfc 0/3] mm: allow more high-order pages stored on PCP lists Kefeng Wang
2024-04-15  8:12 ` [PATCH rfc 1/3] mm: prepare more high-order pages to be stored on the per-cpu lists Kefeng Wang
2024-04-15 11:41   ` Baolin Wang
2024-04-15 12:25     ` Kefeng Wang
2024-04-15  8:12 ` [PATCH rfc 2/3] mm: add control to allow specified high-order pages stored on PCP list Kefeng Wang
2024-04-15  8:12 ` Kefeng Wang [this message]
2024-04-15  8:18 ` [PATCH rfc 0/3] mm: allow more high-order pages stored on PCP lists Barry Song
2024-04-15  8:59   ` Kefeng Wang
2024-04-15 10:52     ` David Hildenbrand
2024-04-15 11:14       ` Barry Song
2024-04-15 12:17       ` Kefeng Wang
2024-04-16  0:21         ` Barry Song
2024-04-16  4:50           ` Kefeng Wang
2024-04-16  4:58             ` Kefeng Wang
2024-04-16  5:26               ` Barry Song
2024-04-16  7:03                 ` David Hildenbrand
2024-04-16  8:06                   ` Kefeng Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240415081220.3246839-4-wangkefeng.wang@huawei.com \
    --to=wangkefeng.wang@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=corbet@lwn.net \
    --cc=david@redhat.com \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=ryan.roberts@arm.com \
    --cc=shy828301@gmail.com \
    --cc=v-songbaohua@oppo.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=ying.huang@intel.com \
    --cc=yuzhao@google.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.