From: Mike Kravetz <mike.kravetz@oracle.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: Song Liu <songliubraving@fb.com>,
"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
Mel Gorman <mgorman@techsingularity.net>,
Vlastimil Babka <vbabka@suse.cz>,
Andrew Morton <akpm@linux-foundation.org>,
Mike Kravetz <mike.kravetz@oracle.com>
Subject: [PATCH] mm: always consider THP when adjusting min_free_kbytes
Date: Tue, 4 Feb 2020 11:41:56 -0800 [thread overview]
Message-ID: <20200204194156.61672-1-mike.kravetz@oracle.com> (raw)
At system initialization time, min_free_kbytes is calculated based
on the amount of memory in the system. If THP is enabled, then
khugepaged is started and min_free_kbytes may be adjusted in an
attempt to reserve some pageblocks for THP allocations.
When memory is offlined or onlined, min_free_kbytes is recalculated
and adjusted based on the amount of memory. However, the adjustment
for THP is not considered. Here is an example from a 2 node system
with 8GB of memory.
# cat /proc/sys/vm/min_free_kbytes
90112
# echo 0 > /sys/devices/system/node/node1/memory56/online
# cat /proc/sys/vm/min_free_kbytes
11243
# echo 1 > /sys/devices/system/node/node1/memory56/online
# cat /proc/sys/vm/min_free_kbytes
11412
One would expect that min_free_kbytes would return to it's original
value after the offline/online operations.
Create a simple interface for THP/khugepaged based adjustment and
call this whenever min_free_kbytes is adjusted.
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
include/linux/khugepaged.h | 5 +++++
mm/khugepaged.c | 35 ++++++++++++++++++++++++++++++-----
mm/page_alloc.c | 4 +++-
3 files changed, 38 insertions(+), 6 deletions(-)
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index bc45ea1efbf7..8f02d3575829 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -15,6 +15,7 @@ extern int __khugepaged_enter(struct mm_struct *mm);
extern void __khugepaged_exit(struct mm_struct *mm);
extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
unsigned long vm_flags);
+extern bool khugepaged_adjust_min_free_kbytes(void);
#ifdef CONFIG_SHMEM
extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr);
#else
@@ -81,6 +82,10 @@ static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
{
return 0;
}
+static bool khugepaged_adjust_min_free_kbytes(void)
+{
+ return false;
+}
static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
unsigned long addr)
{
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b679908743cb..d8040cf19e98 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2138,7 +2138,7 @@ static int khugepaged(void *none)
return 0;
}
-static void set_recommended_min_free_kbytes(void)
+bool __khugepaged_adjust_min_free_kbytes(void)
{
struct zone *zone;
int nr_zones = 0;
@@ -2174,17 +2174,26 @@ static void set_recommended_min_free_kbytes(void)
if (recommended_min > min_free_kbytes) {
if (user_min_free_kbytes >= 0)
- pr_info("raising min_free_kbytes from %d to %lu to help transparent hugepage allocations\n",
+ pr_info_once("raising min_free_kbytes from %d to %lu to help transparent hugepage allocations\n",
min_free_kbytes, recommended_min);
min_free_kbytes = recommended_min;
+ return true;
}
- setup_per_zone_wmarks();
+
+ return false;
+}
+
+static void set_recommended_min_free_kbytes(void)
+{
+ if (__khugepaged_adjust_min_free_kbytes())
+ setup_per_zone_wmarks();
}
-int start_stop_khugepaged(void)
+static struct task_struct *khugepaged_thread __read_mostly;
+
+int __ref start_stop_khugepaged(void)
{
- static struct task_struct *khugepaged_thread __read_mostly;
static DEFINE_MUTEX(khugepaged_mutex);
int err = 0;
@@ -2207,8 +2216,24 @@ int start_stop_khugepaged(void)
} else if (khugepaged_thread) {
kthread_stop(khugepaged_thread);
khugepaged_thread = NULL;
+ init_per_zone_wmark_min();
}
fail:
mutex_unlock(&khugepaged_mutex);
return err;
}
+
+bool khugepaged_adjust_min_free_kbytes(void)
+{
+ bool ret = false;
+
+ /*
+ * This is a bit racy, and we could miss transitions. However,
+ * start/stop code above will make additional adjustments at the
+ * end of transitions.
+ */
+ if (khugepaged_enabled() && khugepaged_thread)
+ ret = __khugepaged_adjust_min_free_kbytes();
+
+ return ret;
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d047bf7d8fd4..a7b3a6663ba6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -68,6 +68,7 @@
#include <linux/lockdep.h>
#include <linux/nmi.h>
#include <linux/psi.h>
+#include <linux/khugepaged.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -7827,9 +7828,10 @@ int __meminit init_per_zone_wmark_min(void)
if (min_free_kbytes > 65536)
min_free_kbytes = 65536;
} else {
- pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",
+ pr_warn_once("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",
new_min_free_kbytes, user_min_free_kbytes);
}
+ (void)khugepaged_adjust_min_free_kbytes();
setup_per_zone_wmarks();
refresh_zone_stat_thresholds();
setup_per_zone_lowmem_reserve();
--
2.24.1
next reply other threads:[~2020-02-04 19:42 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-04 19:41 Mike Kravetz [this message]
2020-02-04 20:33 ` [PATCH] mm: always consider THP when adjusting min_free_kbytes David Rientjes
2020-02-04 21:42 ` Mike Kravetz
2020-02-04 21:53 ` Matthew Wilcox
2020-02-05 0:33 ` Mike Kravetz
2020-02-06 1:36 ` Mike Kravetz
2020-02-06 20:09 ` Khalid Aziz
2020-02-06 20:39 ` Matthew Wilcox
2020-02-06 21:23 ` Mike Kravetz
2020-02-06 21:32 ` Matthew Wilcox
2020-02-10 18:58 ` Mike Kravetz
2020-02-04 23:37 ` Khalid Aziz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200204194156.61672-1-mike.kravetz@oracle.com \
--to=mike.kravetz@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@techsingularity.net \
--cc=songliubraving@fb.com \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).