linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Liu Bo <bo.liu@linux.alibaba.com>
To: stable@vger.kernel.org
Cc: linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
	Jan Kara <jack@suse.cz>, Dan Williams <dan.j.williams@intel.com>,
	Matthew Wilcox <willy@infradead.org>,
	Peng Tao <tao.peng@linux.alibaba.com>
Subject: [PATCH] mm: fix livelock caused by iterating multi order entry
Date: Fri, 19 Jul 2019 08:12:40 +0800	[thread overview]
Message-ID: <1563495160-25647-1-git-send-email-bo.liu@linux.alibaba.com> (raw)

The livelock can be triggerred in the following pattern,

	while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
				min(end - index, (pgoff_t)PAGEVEC_SIZE),
				indices)) {
		...
		for (i = 0; i < pagevec_count(&pvec); i++) {
			index = indices[i];
			...
		}
		index++; /* BUG */
	}

multi order exceptional entry is not specially considered in
invalidate_inode_pages2_range() and it ended up with a livelock because
both index 0 and index 1 finds the same pmd, but this pmd is binded to
index 0, so index is set to 0 again.

This introduces a helper to take the pmd entry's length into account when
deciding the next index.

Note that there're other users of the above pattern which doesn't need to
fix,

- dax_layout_busy_page
It's been fixed in commit d7782145e1ad
("filesystem-dax: Fix dax_layout_busy_page() livelock")

- truncate_inode_pages_range
This won't loop forever since the exceptional entries are immediately
removed from radix tree after the search.

Fixes: 642261a ("dax: add struct iomap based DAX PMD support")
Cc: <stable@vger.kernel.org> since 4.9 to 4.19
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
---

The problem is gone after commit f280bf092d48 ("page cache: Convert
find_get_entries to XArray"), but since xarray seems too new to backport
to 4.19, I made this fix based on radix tree implementation.

 fs/dax.c            | 19 +++++++++++++++++++
 include/linux/dax.h |  8 ++++++++
 mm/truncate.c       | 26 ++++++++++++++++++++++++--
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index ac334bc..cd05337 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -764,6 +764,25 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
 	return __dax_invalidate_mapping_entry(mapping, index, false);
 }
 
+pgoff_t dax_get_multi_order(struct address_space *mapping, pgoff_t index,
+			    void *entry)
+{
+	struct radix_tree_root *pages = &mapping->i_pages;
+	pgoff_t nr_pages = 1;
+
+	if (!dax_mapping(mapping))
+		return nr_pages;
+
+	xa_lock_irq(pages);
+	entry = get_unlocked_mapping_entry(mapping, index, NULL);
+	if (entry)
+		nr_pages = 1UL << dax_radix_order(entry);
+	put_unlocked_mapping_entry(mapping, index, entry);
+	xa_unlock_irq(pages);
+
+	return nr_pages;
+}
+
 static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
 		sector_t sector, size_t size, struct page *to,
 		unsigned long vaddr)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a846184..f3c95c6 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -91,6 +91,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 struct page *dax_layout_busy_page(struct address_space *mapping);
 bool dax_lock_mapping_entry(struct page *page);
 void dax_unlock_mapping_entry(struct page *page);
+pgoff_t dax_get_multi_order(struct address_space *mapping, pgoff_t index,
+			    void *entry);
 #else
 static inline bool bdev_dax_supported(struct block_device *bdev,
 		int blocksize)
@@ -134,6 +136,12 @@ static inline bool dax_lock_mapping_entry(struct page *page)
 static inline void dax_unlock_mapping_entry(struct page *page)
 {
 }
+
+static inline pgoff_t dax_get_multi_order(struct address_space *mapping,
+					  pgoff_t index, void *entry)
+{
+	return 1;
+}
 #endif
 
 int dax_read_lock(void);
diff --git a/mm/truncate.c b/mm/truncate.c
index 71b65aa..835911f 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -557,6 +557,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
 			indices)) {
+		pgoff_t nr_pages = 1;
+
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -568,6 +570,15 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 			if (radix_tree_exceptional_entry(page)) {
 				invalidate_exceptional_entry(mapping, index,
 							     page);
+				/*
+				 * Account for multi-order entries at
+				 * the end of the pagevec.
+				 */
+				if (i < pagevec_count(&pvec) - 1)
+					continue;
+
+				nr_pages = dax_get_multi_order(mapping, index,
+							       page);
 				continue;
 			}
 
@@ -607,7 +618,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
 		cond_resched();
-		index++;
+		index += nr_pages;
 	}
 	return count;
 }
@@ -688,6 +699,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 	while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
 			indices)) {
+		pgoff_t nr_pages = 1;
+
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
@@ -700,6 +713,15 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 				if (!invalidate_exceptional_entry2(mapping,
 								   index, page))
 					ret = -EBUSY;
+				/*
+				 * Account for multi-order entries at
+				 * the end of the pagevec.
+				 */
+				if (i < pagevec_count(&pvec) - 1)
+					continue;
+
+				nr_pages = dax_get_multi_order(mapping, index,
+							       page);
 				continue;
 			}
 
@@ -739,7 +761,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 		pagevec_remove_exceptionals(&pvec);
 		pagevec_release(&pvec);
 		cond_resched();
-		index++;
+		index += nr_pages;
 	}
 	/*
 	 * For DAX we invalidate page tables after invalidating radix tree.  We
-- 
1.8.3.1


             reply	other threads:[~2019-07-19  0:12 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-19  0:12 Liu Bo [this message]
2019-07-19  2:53 ` [PATCH] mm: fix livelock caused by iterating multi order entry Dan Williams
2019-07-19  3:43   ` Greg KH
2019-07-19 21:43   ` Liu Bo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1563495160-25647-1-git-send-email-bo.liu@linux.alibaba.com \
    --to=bo.liu@linux.alibaba.com \
    --cc=dan.j.williams@intel.com \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=stable@vger.kernel.org \
    --cc=tao.peng@linux.alibaba.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).