From: Mina Almasry <almasrymina@google.com>
To: unlisted-recipients:; (no To-header on input)
Cc: Andrew Morton <akpm@linux-foundation.org>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Qian Cai <cai@lca.pw>,
mike.kravetz@oracle.com, Mina Almasry <almasrymina@google.com>
Subject: [PATCH v2] mm/hugetlb: Fix file_region entry allocations
Date: Tue, 18 Feb 2020 17:27:37 -0800 [thread overview]
Message-ID: <20200219012736.20363-1-almasrymina@google.com> (raw)
In-Reply-To: <67aa82a8-3c8d-d1eb-7e83-4f722b1eeb2a@oracle.com>
Commit a9e443086489e ("hugetlb: disable region_add file_region
coalescing") introduced a bug with adding file_region entries
that is fixed here:
1. Refactor file_region entry allocation logic into 1 function called
from region_add and region_chg since the code is now identical.
2. region_chg only modifies resv->adds_in_progress after the regions
have been allocated. In the past it used to increment
adds_in_progress and then drop the lock, which would confuse racing
region_add calls into thinking they need to allocate entries when
they are not allowed.
3. In region_add, only try to allocate regions when
actual_regions_needed > in_regions_needed. This is not causing a bug
but is better for cleanliness and reasoning about the code.
Tested using ltp hugemmap0* tests, and libhugetlbfs tests.
Reported-by: Qian Cai <cai@lca.pw>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Fixes: Commit a9e443086489e ("hugetlb: disable region_add file_region
coalescing")
---
Changes in v2:
- Added __must_hold.
- Fixed formatting issues due to clang format adding space after
list_for_each_entry_safe
---
mm/hugetlb.c | 150 +++++++++++++++++++++++++--------------------------
1 file changed, 75 insertions(+), 75 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8171d2211be77..94e27dfec0435 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -439,6 +439,67 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
return add;
}
+/* Must be called with resv->lock acquired. Will drop lock to allocate entries.
+ */
+static int allocate_file_region_entries(struct resv_map *resv,
+ int regions_needed)
+ __must_hold(&resv->lock)
+{
+ struct list_head allocated_regions;
+ int to_allocate = 0, i = 0;
+ struct file_region *trg = NULL, *rg = NULL;
+
+ VM_BUG_ON(regions_needed < 0);
+
+ INIT_LIST_HEAD(&allocated_regions);
+
+ /*
+ * Check for sufficient descriptors in the cache to accommodate
+ * the number of in progress add operations plus regions_needed.
+ *
+ * This is a while loop because when we drop the lock, some other call
+ * to region_add or region_del may have consumed some region_entries,
+ * so we keep looping here until we finally have enough entries for
+ * (adds_in_progress + regions_needed).
+ */
+ while (resv->region_cache_count <
+ (resv->adds_in_progress + regions_needed)) {
+ to_allocate = resv->adds_in_progress + regions_needed -
+ resv->region_cache_count;
+
+ /* At this point, we should have enough entries in the cache
+ * for all the existings adds_in_progress. We should only be
+ * needing to allocate for regions_needed.
+ */
+ VM_BUG_ON(resv->region_cache_count < resv->adds_in_progress);
+
+ spin_unlock(&resv->lock);
+ for (i = 0; i < to_allocate; i++) {
+ trg = kmalloc(sizeof(*trg), GFP_KERNEL);
+ if (!trg)
+ goto out_of_memory;
+ list_add(&trg->link, &allocated_regions);
+ }
+
+ spin_lock(&resv->lock);
+
+ list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
+ list_del(&rg->link);
+ list_add(&rg->link, &resv->region_cache);
+ resv->region_cache_count++;
+ }
+ }
+
+ return 0;
+
+out_of_memory:
+ list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
+ list_del(&rg->link);
+ kfree(rg);
+ }
+ return -ENOMEM;
+}
+
/*
* Add the huge page range represented by [f, t) to the reserve
* map. Regions will be taken from the cache to fill in this range.
@@ -460,11 +521,7 @@ static long region_add(struct resv_map *resv, long f, long t,
long in_regions_needed, struct hstate *h,
struct hugetlb_cgroup *h_cg)
{
- long add = 0, actual_regions_needed = 0, i = 0;
- struct file_region *trg = NULL, *rg = NULL;
- struct list_head allocated_regions;
-
- INIT_LIST_HEAD(&allocated_regions);
+ long add = 0, actual_regions_needed = 0;
spin_lock(&resv->lock);
retry:
@@ -476,34 +533,24 @@ static long region_add(struct resv_map *resv, long f, long t,
/*
* Check for sufficient descriptors in the cache to accommodate
* this add operation. Note that actual_regions_needed may be greater
- * than in_regions_needed. In this case, we need to make sure that we
+ * than in_regions_needed, as the resv_map may have been modified since
+ * the region_chg call. In this case, we need to make sure that we
* allocate extra entries, such that we have enough for all the
* existing adds_in_progress, plus the excess needed for this
* operation.
*/
- if (resv->region_cache_count <
- resv->adds_in_progress +
- (actual_regions_needed - in_regions_needed)) {
+ if (actual_regions_needed > in_regions_needed &&
+ resv->region_cache_count <
+ resv->adds_in_progress +
+ (actual_regions_needed - in_regions_needed)) {
/* region_add operation of range 1 should never need to
* allocate file_region entries.
*/
VM_BUG_ON(t - f <= 1);
- /* Must drop lock to allocate a new descriptor. */
- spin_unlock(&resv->lock);
- for (i = 0; i < (actual_regions_needed - in_regions_needed);
- i++) {
- trg = kmalloc(sizeof(*trg), GFP_KERNEL);
- if (!trg)
- goto out_of_memory;
- list_add(&trg->link, &allocated_regions);
- }
- spin_lock(&resv->lock);
-
- list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
- list_del(&rg->link);
- list_add(&rg->link, &resv->region_cache);
- resv->region_cache_count++;
+ if (allocate_file_region_entries(
+ resv, actual_regions_needed - in_regions_needed)) {
+ return -ENOMEM;
}
goto retry;
@@ -516,13 +563,6 @@ static long region_add(struct resv_map *resv, long f, long t,
spin_unlock(&resv->lock);
VM_BUG_ON(add < 0);
return add;
-
-out_of_memory:
- list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
- list_del(&rg->link);
- kfree(rg);
- }
- return -ENOMEM;
}
/*
@@ -548,11 +588,7 @@ static long region_add(struct resv_map *resv, long f, long t,
static long region_chg(struct resv_map *resv, long f, long t,
long *out_regions_needed)
{
- struct file_region *trg = NULL, *rg = NULL;
- long chg = 0, i = 0, to_allocate = 0;
- struct list_head allocated_regions;
-
- INIT_LIST_HEAD(&allocated_regions);
+ long chg = 0;
spin_lock(&resv->lock);
@@ -563,49 +599,13 @@ static long region_chg(struct resv_map *resv, long f, long t,
if (*out_regions_needed == 0)
*out_regions_needed = 1;
- resv->adds_in_progress += *out_regions_needed;
-
- /*
- * Check for sufficient descriptors in the cache to accommodate
- * the number of in progress add operations.
- */
- while (resv->region_cache_count < resv->adds_in_progress) {
- to_allocate = resv->adds_in_progress - resv->region_cache_count;
-
- /* Must drop lock to allocate a new descriptor. Note that even
- * though we drop the lock here, we do not make another call to
- * add_reservation_in_range after re-acquiring the lock.
- * Essentially this branch makes sure that we have enough
- * descriptors in the cache as suggested by the first call to
- * add_reservation_in_range. If more regions turn out to be
- * required, region_add will deal with it.
- */
- spin_unlock(&resv->lock);
- for (i = 0; i < to_allocate; i++) {
- trg = kmalloc(sizeof(*trg), GFP_KERNEL);
- if (!trg)
- goto out_of_memory;
- list_add(&trg->link, &allocated_regions);
- }
-
- spin_lock(&resv->lock);
+ if (allocate_file_region_entries(resv, *out_regions_needed))
+ return -ENOMEM;
- list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
- list_del(&rg->link);
- list_add(&rg->link, &resv->region_cache);
- resv->region_cache_count++;
- }
- }
+ resv->adds_in_progress += *out_regions_needed;
spin_unlock(&resv->lock);
return chg;
-
-out_of_memory:
- list_for_each_entry_safe(rg, trg, &allocated_regions, link) {
- list_del(&rg->link);
- kfree(rg);
- }
- return -ENOMEM;
}
/*
--
2.25.0.265.gbab2e86ba0-goog
next prev parent reply other threads:[~2020-02-19 1:28 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-18 22:26 [PATCH -next] mm/hugetlb: Fix file_region entry allocations Mina Almasry
2020-02-18 23:43 ` Mike Kravetz
2020-02-19 1:27 ` Mina Almasry [this message]
2020-02-19 14:12 ` [PATCH v2] " Miguel Ojeda
2020-02-19 23:28 ` Mina Almasry
2020-02-20 9:37 ` Miguel Ojeda
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200219012736.20363-1-almasrymina@google.com \
--to=almasrymina@google.com \
--cc=akpm@linux-foundation.org \
--cc=cai@lca.pw \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mike.kravetz@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).