From: Mike Kravetz <mike.kravetz@oracle.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: David Hildenbrand <david@redhat.com>,
Michal Hocko <mhocko@suse.com>,
Oscar Salvador <osalvador@suse.de>, Zi Yan <ziy@nvidia.com>,
David Rientjes <rientjes@google.com>,
Andrew Morton <akpm@linux-foundation.org>,
Mike Kravetz <mike.kravetz@oracle.com>
Subject: [RFC PATCH 1/3] hugetlb: add demote hugetlb page sysfs interfaces
Date: Mon, 8 Mar 2021 16:18:53 -0800 [thread overview]
Message-ID: <20210309001855.142453-2-mike.kravetz@oracle.com> (raw)
In-Reply-To: <20210309001855.142453-1-mike.kravetz@oracle.com>
Two new sysfs files are added to demote hugtlb pages. These files are
both per-hugetlb page size and per node. Files are:
demote_size - The size in Kb that pages are demoted to.
demote - The number of huge pages to demote.
Writing a value to demote will result in an attempt to demote that
number of hugetlb pages to an appropriate number of demote_size pages.
This patch does not provide full demote functionality. It only provides
the sysfs interfaces and uses existing code to free pages to the buddy
allocator is demote_size == PAGESIZE.
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
include/linux/hugetlb.h | 1 +
mm/hugetlb.c | 117 +++++++++++++++++++++++++++++++++++++++-
2 files changed, 117 insertions(+), 1 deletion(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index cccd1aab69dd..5e9d6c8ab411 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -558,6 +558,7 @@ struct hstate {
int next_nid_to_alloc;
int next_nid_to_free;
unsigned int order;
+ unsigned int demote_order;
unsigned long mask;
unsigned long max_huge_pages;
unsigned long nr_huge_pages;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8fb42c6dd74b..161732ba7aaf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2492,7 +2492,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
static void __init hugetlb_init_hstates(void)
{
- struct hstate *h;
+ struct hstate *h, *h2;
for_each_hstate(h) {
if (minimum_order > huge_page_order(h))
@@ -2501,6 +2501,17 @@ static void __init hugetlb_init_hstates(void)
/* oversize hugepages were init'ed in early boot */
if (!hstate_is_gigantic(h))
hugetlb_hstate_alloc_pages(h);
+
+ /*
+ * Set demote order for each hstate. Note that
+ * h->demote_order is initially 0.
+ */
+ for_each_hstate(h2) {
+ if (h2 == h)
+ continue;
+ if (h2->order < h->order && h2->order > h->demote_order)
+ h->demote_order = h2->order;
+ }
}
VM_BUG_ON(minimum_order == UINT_MAX);
}
@@ -2710,6 +2721,20 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
return 0;
}
+static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+{
+ int rc = 0;
+
+ /* If no demote order, free to buddy */
+ if (!h->demote_order)
+ return free_pool_huge_page(h, nodes_allowed, 0);
+
+ /*
+ * TODO - demote fucntionality will be added in subsequent patch
+ */
+ return rc;
+}
+
#define HSTATE_ATTR_RO(_name) \
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
@@ -2908,12 +2933,100 @@ static ssize_t surplus_hugepages_show(struct kobject *kobj,
}
HSTATE_ATTR_RO(surplus_hugepages);
+static ssize_t demote_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "0\n");
+}
+
+static ssize_t demote_store_action(struct kobject *kobj, const char *buf,
+ size_t len)
+{
+ unsigned long nr_demote;
+ unsigned long nr_available;
+ nodemask_t nodes_allowed, *n_mask;
+ struct hstate *h;
+ int err;
+ int nid;
+
+ err = kstrtoul(buf, 10, &nr_demote);
+ if (err)
+ return err;
+ h = kobj_to_hstate(kobj, &nid);
+
+ spin_lock(&hugetlb_lock);
+ if (nid != NUMA_NO_NODE) {
+ nr_available = h->free_huge_pages_node[nid];
+ init_nodemask_of_node(&nodes_allowed, nid);
+ n_mask = &nodes_allowed;
+ } else {
+ nr_available = h->free_huge_pages;
+ n_mask = &node_states[N_MEMORY];
+ }
+ nr_available -= h->resv_huge_pages;
+ if (nr_available <= 0)
+ goto out;
+ nr_demote = min(nr_available, nr_demote);
+
+ while (nr_demote) {
+ if (!demote_pool_huge_page(h, n_mask))
+ break;
+
+ cond_resched_lock(&hugetlb_lock);
+ /*
+ * We may have dropped the lock above or in the routines to
+ * demote/free a page. Recompute nr_demote as counts could
+ * have changed and we want to make sure we do not demote
+ * a reserved huge page.
+ */
+ nr_demote--;
+ if (nid != NUMA_NO_NODE)
+ nr_available = h->free_huge_pages_node[nid];
+ else
+ nr_available = h->free_huge_pages;
+ nr_available -= h->resv_huge_pages;
+ if (nr_available <= 0)
+ nr_demote = 0;
+ else
+ nr_demote = min(nr_available, nr_demote);
+ }
+
+out:
+ spin_unlock(&hugetlb_lock);
+
+ return len;
+}
+
+static ssize_t demote_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t len)
+{
+ return demote_store_action(kobj, buf, len);
+}
+HSTATE_ATTR(demote);
+
+static ssize_t demote_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct hstate *h;
+ unsigned long demote_size;
+ int nid;
+
+ h = kobj_to_hstate(kobj, &nid);
+ demote_size = h->demote_order;
+
+ return sysfs_emit(buf, "%lukB\n",
+ (unsigned long)(PAGE_SIZE << h->demote_order) / SZ_1K);
+}
+HSTATE_ATTR_RO(demote_size);
+
static struct attribute *hstate_attrs[] = {
&nr_hugepages_attr.attr,
&nr_overcommit_hugepages_attr.attr,
&free_hugepages_attr.attr,
&resv_hugepages_attr.attr,
&surplus_hugepages_attr.attr,
+ &demote_size_attr.attr,
+ &demote_attr.attr,
#ifdef CONFIG_NUMA
&nr_hugepages_mempolicy_attr.attr,
#endif
@@ -2983,6 +3096,8 @@ static struct attribute *per_node_hstate_attrs[] = {
&nr_hugepages_attr.attr,
&free_hugepages_attr.attr,
&surplus_hugepages_attr.attr,
+ &demote_size_attr.attr,
+ &demote_attr.attr,
NULL,
};
--
2.29.2
next prev parent reply other threads:[~2021-03-09 0:20 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-09 0:18 [RFC PATCH 0/3] hugetlb: add demote/split page functionality Mike Kravetz
2021-03-09 0:18 ` Mike Kravetz [this message]
2021-03-09 0:18 ` [RFC PATCH 2/3] hugetlb: add HPageCma flag and code to free non-gigantic pages in CMA Mike Kravetz
2021-03-09 0:18 ` [RFC PATCH 3/3] hugetlb: add hugetlb demote page support Mike Kravetz
2021-03-09 9:01 ` [RFC PATCH 0/3] hugetlb: add demote/split page functionality David Hildenbrand
2021-03-09 17:11 ` Mike Kravetz
2021-03-09 17:50 ` David Hildenbrand
2021-03-09 18:21 ` Mike Kravetz
2021-03-09 19:01 ` David Hildenbrand
2021-03-10 15:58 ` Oscar Salvador
2021-03-10 16:23 ` Michal Hocko
2021-03-10 16:46 ` Zi Yan
2021-03-10 17:05 ` Michal Hocko
2021-03-10 17:36 ` Zi Yan
2021-03-10 19:56 ` Mike Kravetz
2021-03-10 19:45 ` Mike Kravetz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210309001855.142453-2-mike.kravetz@oracle.com \
--to=mike.kravetz@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=david@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.com \
--cc=osalvador@suse.de \
--cc=rientjes@google.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).