All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Kravetz <mike.kravetz@oracle.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Song Liu <songliubraving@fb.com>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Mel Gorman <mgorman@techsingularity.net>,
	Vlastimil Babka <vbabka@suse.cz>,
	Andrew Morton <akpm@linux-foundation.org>,
	Mike Kravetz <mike.kravetz@oracle.com>
Subject: [PATCH] mm: always consider THP when adjusting min_free_kbytes
Date: Tue,  4 Feb 2020 11:41:56 -0800	[thread overview]
Message-ID: <20200204194156.61672-1-mike.kravetz@oracle.com> (raw)

At system initialization time, min_free_kbytes is calculated based
on the amount of memory in the system.  If THP is enabled, then
khugepaged is started and min_free_kbytes may be adjusted in an
attempt to reserve some pageblocks for THP allocations.

When memory is offlined or onlined, min_free_kbytes is recalculated
and adjusted based on the amount of memory.  However, the adjustment
for THP is not considered.  Here is an example from a 2 node system
with 8GB of memory.

 # cat /proc/sys/vm/min_free_kbytes
 90112
 # echo 0 > /sys/devices/system/node/node1/memory56/online
 # cat /proc/sys/vm/min_free_kbytes
 11243
 # echo 1 > /sys/devices/system/node/node1/memory56/online
 # cat /proc/sys/vm/min_free_kbytes
 11412

One would expect that min_free_kbytes would return to it's original
value after the offline/online operations.

Create a simple interface for THP/khugepaged based adjustment and
call this whenever min_free_kbytes is adjusted.

Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 include/linux/khugepaged.h |  5 +++++
 mm/khugepaged.c            | 35 ++++++++++++++++++++++++++++++-----
 mm/page_alloc.c            |  4 +++-
 3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index bc45ea1efbf7..8f02d3575829 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -15,6 +15,7 @@ extern int __khugepaged_enter(struct mm_struct *mm);
 extern void __khugepaged_exit(struct mm_struct *mm);
 extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 				      unsigned long vm_flags);
+extern bool khugepaged_adjust_min_free_kbytes(void);
 #ifdef CONFIG_SHMEM
 extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr);
 #else
@@ -81,6 +82,10 @@ static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 {
 	return 0;
 }
+static bool khugepaged_adjust_min_free_kbytes(void)
+{
+	return false;
+}
 static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
 					   unsigned long addr)
 {
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b679908743cb..d8040cf19e98 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2138,7 +2138,7 @@ static int khugepaged(void *none)
 	return 0;
 }
 
-static void set_recommended_min_free_kbytes(void)
+bool __khugepaged_adjust_min_free_kbytes(void)
 {
 	struct zone *zone;
 	int nr_zones = 0;
@@ -2174,17 +2174,26 @@ static void set_recommended_min_free_kbytes(void)
 
 	if (recommended_min > min_free_kbytes) {
 		if (user_min_free_kbytes >= 0)
-			pr_info("raising min_free_kbytes from %d to %lu to help transparent hugepage allocations\n",
+			pr_info_once("raising min_free_kbytes from %d to %lu to help transparent hugepage allocations\n",
 				min_free_kbytes, recommended_min);
 
 		min_free_kbytes = recommended_min;
+		return true;
 	}
-	setup_per_zone_wmarks();
+
+	return false;
+}
+
+static void set_recommended_min_free_kbytes(void)
+{
+	if (__khugepaged_adjust_min_free_kbytes())
+		setup_per_zone_wmarks();
 }
 
-int start_stop_khugepaged(void)
+static struct task_struct *khugepaged_thread __read_mostly;
+
+int __ref start_stop_khugepaged(void)
 {
-	static struct task_struct *khugepaged_thread __read_mostly;
 	static DEFINE_MUTEX(khugepaged_mutex);
 	int err = 0;
 
@@ -2207,8 +2216,24 @@ int start_stop_khugepaged(void)
 	} else if (khugepaged_thread) {
 		kthread_stop(khugepaged_thread);
 		khugepaged_thread = NULL;
+		init_per_zone_wmark_min();
 	}
 fail:
 	mutex_unlock(&khugepaged_mutex);
 	return err;
 }
+
+bool khugepaged_adjust_min_free_kbytes(void)
+{
+	bool ret = false;
+
+	/*
+	 * This is a bit racy, and we could miss transitions.  However,
+	 * start/stop code above will make additional adjustments at the
+	 * end of transitions.
+	 */
+	if (khugepaged_enabled() && khugepaged_thread)
+		ret = __khugepaged_adjust_min_free_kbytes();
+
+	return ret;
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d047bf7d8fd4..a7b3a6663ba6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -68,6 +68,7 @@
 #include <linux/lockdep.h>
 #include <linux/nmi.h>
 #include <linux/psi.h>
+#include <linux/khugepaged.h>
 
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -7827,9 +7828,10 @@ int __meminit init_per_zone_wmark_min(void)
 		if (min_free_kbytes > 65536)
 			min_free_kbytes = 65536;
 	} else {
-		pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",
+		pr_warn_once("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",
 				new_min_free_kbytes, user_min_free_kbytes);
 	}
+	(void)khugepaged_adjust_min_free_kbytes();
 	setup_per_zone_wmarks();
 	refresh_zone_stat_thresholds();
 	setup_per_zone_lowmem_reserve();
-- 
2.24.1


             reply	other threads:[~2020-02-04 19:42 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-04 19:41 Mike Kravetz [this message]
2020-02-04 20:33 ` [PATCH] mm: always consider THP when adjusting min_free_kbytes David Rientjes
2020-02-04 20:33   ` David Rientjes
2020-02-04 21:42   ` Mike Kravetz
2020-02-04 21:53     ` Matthew Wilcox
2020-02-05  0:33       ` Mike Kravetz
2020-02-06  1:36         ` Mike Kravetz
2020-02-06 20:09           ` Khalid Aziz
2020-02-06 20:39           ` Matthew Wilcox
2020-02-06 21:23             ` Mike Kravetz
2020-02-06 21:32               ` Matthew Wilcox
2020-02-10 18:58                 ` Mike Kravetz
2020-02-04 23:37     ` Khalid Aziz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200204194156.61672-1-mike.kravetz@oracle.com \
    --to=mike.kravetz@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@techsingularity.net \
    --cc=songliubraving@fb.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.