All of lore.kernel.org
 help / color / mirror / Atom feed
From: Muchun Song <songmuchun@bytedance.com>
To: corbet@lwn.net, mike.kravetz@oracle.com,
	akpm@linux-foundation.org, mcgrof@kernel.org,
	keescook@chromium.org, yzaikin@google.com
Cc: linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, duanxiongchun@bytedance.com,
	Muchun Song <songmuchun@bytedance.com>
Subject: [PATCH 3/3] mm: hugetlb: add hugetlb_free_vmemmap sysctl
Date: Mon, 28 Feb 2022 15:10:22 +0800	[thread overview]
Message-ID: <20220228071022.26143-4-songmuchun@bytedance.com> (raw)
In-Reply-To: <20220228071022.26143-1-songmuchun@bytedance.com>

We must add "hugetlb_free_vmemmap=on" to boot cmdline and reboot the
server to enable the feature of freeing vmemmap pages of HugeTLB
pages. Rebooting usually taske a long time. Add a sysctl to enable
the feature at runtime and do not need to reboot.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 Documentation/admin-guide/sysctl/vm.rst | 13 +++++++++++++
 include/linux/hugetlb.h                 |  5 +++++
 include/linux/memory_hotplug.h          |  1 +
 kernel/sysctl.c                         | 11 +++++++++++
 mm/hugetlb_vmemmap.c                    | 23 +++++++++++++++++------
 mm/hugetlb_vmemmap.h                    |  4 +++-
 mm/memory_hotplug.c                     |  2 +-
 7 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index f4804ce37c58..01f18e6cc227 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -561,6 +561,19 @@ Change the minimum size of the hugepage pool.
 See Documentation/admin-guide/mm/hugetlbpage.rst
 
 
+hugetlb_free_vmemmap
+====================
+
+A toggle value indicating if vmemmap pages are allowed to be optimized.
+If it is off (0), then it can be set true (1).  Once true, the vmemmap
+pages associated with each HugeTLB page will be optimized, and the toggle
+cannot be set back to false.  It only optimizes the subsequent allocation
+of HugeTLB pages from buddy system, while already allocated HugeTLB pages
+will not be optimized.
+
+See Documentation/admin-guide/mm/hugetlbpage.rst
+
+
 nr_hugepages_mempolicy
 ======================
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 53c1b6082a4c..cc4ab21892f5 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -1080,6 +1080,11 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
 }
 #endif	/* CONFIG_HUGETLB_PAGE */
 
+#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
+int hugetlb_vmemmap_sysctl_handler(struct ctl_table *table, int write,
+				   void *buffer, size_t *length, loff_t *ppos);
+#endif
+
 static inline spinlock_t *huge_pte_lock(struct hstate *h,
 					struct mm_struct *mm, pte_t *pte)
 {
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index e0b2209ab71c..b30f9fdaed73 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -349,6 +349,7 @@ extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
 				      struct mhp_params *params);
 void arch_remove_linear_mapping(u64 start, u64 size);
 extern bool mhp_supports_memmap_on_memory(unsigned long size);
+extern bool memmap_on_memory;
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 #endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ab3e9c937268..77f039849b2a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2223,6 +2223,17 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= hugetlb_sysctl_handler,
 	},
+#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
+	{
+		.procname	= "hugetlb_free_vmemmap",
+		.data		= &hugetlb_free_vmemmap_enabled_key.key,
+		.maxlen		= sizeof(hugetlb_free_vmemmap_enabled_key.key),
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= hugetlb_vmemmap_sysctl_handler,
+		.extra1		= SYSCTL_ONE,
+	},
+#endif
 #ifdef CONFIG_NUMA
 	{
 		.procname       = "nr_hugepages_mempolicy",
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 836d1117f08b..3167021055d6 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -10,6 +10,7 @@
 
 #define pr_fmt(fmt)	"HugeTLB: " fmt
 
+#include <linux/memory_hotplug.h>
 #include "hugetlb_vmemmap.h"
 
 /*
@@ -118,17 +119,14 @@ void __init hugetlb_vmemmap_init(struct hstate *h)
 	BUILD_BUG_ON(__NR_USED_SUBPAGE >=
 		     RESERVE_VMEMMAP_SIZE / sizeof(struct page));
 
-	if (!hugetlb_free_vmemmap_enabled())
-		return;
-
-	if (IS_ENABLED(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON) &&
-	    !is_power_of_2(sizeof(struct page))) {
+	if (!is_power_of_2(sizeof(struct page))) {
 		/*
 		 * The hugetlb_free_vmemmap_enabled_key can be enabled when
 		 * CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON. It should
 		 * be disabled if "struct page" crosses page boundaries.
 		 */
-		static_branch_disable(&hugetlb_free_vmemmap_enabled_key);
+		if (IS_ENABLED(CONFIG_HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON))
+			static_branch_disable(&hugetlb_free_vmemmap_enabled_key);
 		return;
 	}
 
@@ -147,3 +145,16 @@ void __init hugetlb_vmemmap_init(struct hstate *h)
 	pr_info("can free %d vmemmap pages for %s\n", h->nr_free_vmemmap_pages,
 		h->name);
 }
+
+int hugetlb_vmemmap_sysctl_handler(struct ctl_table *table, int write,
+				   void *buffer, size_t *length, loff_t *ppos)
+{
+	/*
+	 * The vmemmap pages cannot be optimized if a "struct page" crosses page
+	 * boundaries or memory_hotplug.memmap_on_memory is enabled.
+	 */
+	if (write && (!is_power_of_2(sizeof(struct page)) || memmap_on_memory))
+		return -EPERM;
+
+	return proc_do_static_key(table, write, buffer, length, ppos);
+}
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index cb2bef8f9e73..b67a159027f4 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -21,7 +21,9 @@ void hugetlb_vmemmap_init(struct hstate *h);
  */
 static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h)
 {
-	return h->nr_free_vmemmap_pages;
+	if (hugetlb_free_vmemmap_enabled())
+		return h->nr_free_vmemmap_pages;
+	return 0;
 }
 #else
 static inline int alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c226a337c1ef..b5cc5abde05a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -46,7 +46,7 @@
 /*
  * memory_hotplug.memmap_on_memory parameter
  */
-static bool memmap_on_memory __ro_after_init;
+bool memmap_on_memory __ro_after_init;
 #ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
 module_param(memmap_on_memory, bool, 0444);
 MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
-- 
2.11.0


  parent reply	other threads:[~2022-02-28  7:11 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-28  7:10 [PATCH 0/3] add hugetlb_free_vmemmap sysctl Muchun Song
2022-02-28  7:10 ` [PATCH 1/3] mm: hugetlb: disable freeing vmemmap pages when struct page crosses page boundaries Muchun Song
2022-02-28  7:10 ` [PATCH 2/3] sysctl: allow to set extra1 to SYSCTL_ONE Muchun Song
2022-02-28  7:10 ` Muchun Song [this message]
2022-02-28 14:41   ` [PATCH 3/3] mm: hugetlb: add hugetlb_free_vmemmap sysctl Luis Chamberlain
2022-02-28 15:14     ` Muchun Song
2022-02-28 17:35   ` kernel test robot
2022-02-28 17:37   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220228071022.26143-4-songmuchun@bytedance.com \
    --to=songmuchun@bytedance.com \
    --cc=akpm@linux-foundation.org \
    --cc=corbet@lwn.net \
    --cc=duanxiongchun@bytedance.com \
    --cc=keescook@chromium.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mcgrof@kernel.org \
    --cc=mike.kravetz@oracle.com \
    --cc=yzaikin@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.