linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Fix read-only THP for non-tmpfs filesystems
@ 2020-09-03 18:30 Matthew Wilcox (Oracle)
  2020-09-03 18:30 ` [PATCH 1/3] XArray: Add xa_get_order Matthew Wilcox (Oracle)
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-03 18:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Matthew Wilcox (Oracle),
	linux-mm, Song Liu, Kirill A . Shutemov, Qian Cai

As described more verbosely in the [3/3] changelog, we can
inadvertently put an order-0 page in the page cache which
occupies 512 consecutive entries.  Users are running into
this if they enable the READ_ONLY_THP_FOR_FS config option;
see https://bugzilla.kernel.org/show_bug.cgi?id=206569
and Qian Cai has also reported it here:
https://lore.kernel.org/lkml/20200616013309.GB815@lca.pw/

This is a rather intrusive way of fixing the problem, but has the
advantage that I've actually been testing it with the THP patches,
which means that it sees far more use than it does upstream -- indeed,
Song has been entirely unable to reproduce it.  It also has the advantage
that it removes a few patches from my gargantuan backlog of THP patches.

Matthew Wilcox (Oracle) (3):
  XArray: Add xa_get_order
  XArray: Add xas_split
  mm/filemap: Fix storing to a THP shadow entry

 Documentation/core-api/xarray.rst |  16 +--
 include/linux/xarray.h            |  22 ++++
 lib/test_xarray.c                 |  62 ++++++++++
 lib/xarray.c                      | 196 ++++++++++++++++++++++++++++--
 mm/filemap.c                      |  42 +++++--
 5 files changed, 311 insertions(+), 27 deletions(-)

-- 
2.28.0



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] XArray: Add xa_get_order
  2020-09-03 18:30 [PATCH 0/3] Fix read-only THP for non-tmpfs filesystems Matthew Wilcox (Oracle)
@ 2020-09-03 18:30 ` Matthew Wilcox (Oracle)
  2020-09-03 18:30 ` [PATCH 2/3] XArray: Add xas_split Matthew Wilcox (Oracle)
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-03 18:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Matthew Wilcox (Oracle),
	linux-mm, Song Liu, Kirill A . Shutemov, Qian Cai

This function returns the order of the entry at the index.  We need this
because there isn't space in the shadow entry to encode its order.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/xarray.h |  9 +++++++++
 lib/test_xarray.c      | 21 +++++++++++++++++++++
 lib/xarray.c           | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index b4d70e7568b2..5b7f4ebcf4ff 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -1505,6 +1505,15 @@ void xas_pause(struct xa_state *);
 
 void xas_create_range(struct xa_state *);
 
+#ifdef CONFIG_XARRAY_MULTI
+int xa_get_order(struct xarray *, unsigned long index);
+#else
+static inline int xa_get_order(struct xarray *xa, unsigned long index)
+{
+	return 0;
+}
+#endif
+
 /**
  * xas_reload() - Refetch an entry from the xarray.
  * @xas: XArray operation state.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index d4f97925dbd8..bdd4d7995f79 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -1649,6 +1649,26 @@ static noinline void check_account(struct xarray *xa)
 #endif
 }
 
+static noinline void check_get_order(struct xarray *xa)
+{
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+	unsigned int order;
+	unsigned long i, j;
+
+	for (i = 0; i < 3; i++)
+		XA_BUG_ON(xa, xa_get_order(xa, i) != 0);
+
+	for (order = 0; order < max_order; order++) {
+		for (i = 0; i < 10; i++) {
+			xa_store_order(xa, i << order, order,
+					xa_mk_index(i << order), GFP_KERNEL);
+			for (j = i << order; j < (i + 1) << order; j++)
+				XA_BUG_ON(xa, xa_get_order(xa, j) != order);
+			xa_erase(xa, i << order);
+		}
+	}
+}
+
 static noinline void check_destroy(struct xarray *xa)
 {
 	unsigned long index;
@@ -1697,6 +1717,7 @@ static int xarray_checks(void)
 	check_reserve(&array);
 	check_reserve(&xa0);
 	check_multi_store(&array);
+	check_get_order(&array);
 	check_xa_alloc();
 	check_find(&array);
 	check_find_entry(&array);
diff --git a/lib/xarray.c b/lib/xarray.c
index e9e641d3c0c3..b8cc769cefc0 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1592,6 +1592,45 @@ void *xa_store_range(struct xarray *xa, unsigned long first,
 	return xas_result(&xas, NULL);
 }
 EXPORT_SYMBOL(xa_store_range);
+
+/**
+ * xa_get_order() - Get the order of an entry.
+ * @xa: XArray.
+ * @index: Index of the entry.
+ *
+ * Return: A number between 0 and 63 indicating the order of the entry.
+ */
+int xa_get_order(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	void *entry;
+	int order = 0;
+
+	rcu_read_lock();
+	entry = xas_load(&xas);
+
+	if (!entry)
+		goto unlock;
+
+	if (!xas.xa_node)
+		goto unlock;
+
+	for (;;) {
+		unsigned int slot = xas.xa_offset + (1 << order);
+
+		if (slot >= XA_CHUNK_SIZE)
+			break;
+		if (!xa_is_sibling(xas.xa_node->slots[slot]))
+			break;
+		order++;
+	}
+
+	order += xas.xa_node->shift;
+unlock:
+	rcu_read_unlock();
+
+	return order;
+}
 #endif /* CONFIG_XARRAY_MULTI */
 
 /**
-- 
2.28.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] XArray: Add xas_split
  2020-09-03 18:30 [PATCH 0/3] Fix read-only THP for non-tmpfs filesystems Matthew Wilcox (Oracle)
  2020-09-03 18:30 ` [PATCH 1/3] XArray: Add xa_get_order Matthew Wilcox (Oracle)
@ 2020-09-03 18:30 ` Matthew Wilcox (Oracle)
  2020-09-03 18:30 ` [PATCH 3/3] mm/filemap: Fix storing to a THP shadow entry Matthew Wilcox (Oracle)
  2020-09-04 17:23 ` [PATCH 0/3] Fix read-only THP for non-tmpfs filesystems Andrew Morton
  3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-03 18:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Matthew Wilcox (Oracle),
	linux-mm, Song Liu, Kirill A . Shutemov, Qian Cai

In order to use multi-index entries for huge pages in the page cache,
we need to be able to split a multi-index entry (eg if a file is
truncated in the middle of a huge page entry).  This version does not
support splitting more than one level of the tree at a time.  This is an
acceptable limitation for the page cache as we do not expect to support
order-12 pages in the near future.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  16 +--
 include/linux/xarray.h            |  13 +++
 lib/test_xarray.c                 |  41 ++++++++
 lib/xarray.c                      | 157 ++++++++++++++++++++++++++++--
 4 files changed, 211 insertions(+), 16 deletions(-)

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 640934b6f7b4..a137a0e6d068 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -475,13 +475,15 @@ or iterations will move the index to the first index in the range.
 Each entry will only be returned once, no matter how many indices it
 occupies.
 
-Using xas_next() or xas_prev() with a multi-index xa_state
-is not supported.  Using either of these functions on a multi-index entry
-will reveal sibling entries; these should be skipped over by the caller.
-
-Storing ``NULL`` into any index of a multi-index entry will set the entry
-at every index to ``NULL`` and dissolve the tie.  Splitting a multi-index
-entry into entries occupying smaller ranges is not yet supported.
+Using xas_next() or xas_prev() with a multi-index xa_state is not
+supported.  Using either of these functions on a multi-index entry will
+reveal sibling entries; these should be skipped over by the caller.
+
+Storing ``NULL`` into any index of a multi-index entry will set the
+entry at every index to ``NULL`` and dissolve the tie.  A multi-index
+entry can be split into entries occupying smaller ranges by calling
+xas_split_alloc() without the xa_lock held, followed by taking the lock
+and calling xas_split().
 
 Functions and structures
 ========================
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 5b7f4ebcf4ff..5cdf441f6377 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -1507,11 +1507,24 @@ void xas_create_range(struct xa_state *);
 
 #ifdef CONFIG_XARRAY_MULTI
 int xa_get_order(struct xarray *, unsigned long index);
+void xas_split(struct xa_state *, void *entry, unsigned int order);
+void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
 #else
 static inline int xa_get_order(struct xarray *xa, unsigned long index)
 {
 	return 0;
 }
+
+static inline void xas_split(struct xa_state *xas, void *entry,
+		unsigned int order)
+{
+	xas_store(xas, entry);
+}
+
+static inline void xas_split_alloc(struct xa_state *xas, void *entry,
+		unsigned int order, gfp_t gfp)
+{
+}
 #endif
 
 /**
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index bdd4d7995f79..16fe5adcac62 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -1503,6 +1503,46 @@ static noinline void check_store_range(struct xarray *xa)
 	}
 }
 
+#ifdef CONFIG_XARRAY_MULTI
+static void check_split_1(struct xarray *xa, unsigned long index,
+							unsigned int order)
+{
+	XA_STATE(xas, xa, index);
+	void *entry;
+	unsigned int i = 0;
+
+	xa_store_order(xa, index, order, xa, GFP_KERNEL);
+
+	xas_split_alloc(&xas, xa, order, GFP_KERNEL);
+	xas_lock(&xas);
+	xas_split(&xas, xa, order);
+	xas_unlock(&xas);
+
+	xa_for_each(xa, index, entry) {
+		XA_BUG_ON(xa, entry != xa);
+		i++;
+	}
+	XA_BUG_ON(xa, i != 1 << order);
+
+	xa_destroy(xa);
+}
+
+static noinline void check_split(struct xarray *xa)
+{
+	unsigned int order;
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) {
+		check_split_1(xa, 0, order);
+		check_split_1(xa, 1UL << order, order);
+		check_split_1(xa, 3UL << order, order);
+	}
+}
+#else
+static void check_split(struct xarray *xa) { }
+#endif
+
 static void check_align_1(struct xarray *xa, char *name)
 {
 	int i;
@@ -1729,6 +1769,7 @@ static int xarray_checks(void)
 	check_store_range(&array);
 	check_store_iter(&array);
 	check_align(&xa0);
+	check_split(&array);
 
 	check_workingset(&array, 0);
 	check_workingset(&array, 64);
diff --git a/lib/xarray.c b/lib/xarray.c
index b8cc769cefc0..ff3652657bd8 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -266,13 +266,15 @@ static void xa_node_free(struct xa_node *node)
  */
 static void xas_destroy(struct xa_state *xas)
 {
-	struct xa_node *node = xas->xa_alloc;
+	struct xa_node *next, *node = xas->xa_alloc;
 
-	if (!node)
-		return;
-	XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
-	kmem_cache_free(radix_tree_node_cachep, node);
-	xas->xa_alloc = NULL;
+	while (node) {
+		XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+		next = rcu_dereference_raw(node->parent);
+		/* XXX: need to free children */
+		kmem_cache_free(radix_tree_node_cachep, node);
+		xas->xa_alloc = node = next;
+	}
 }
 
 /**
@@ -304,6 +306,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp)
 	xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
 	if (!xas->xa_alloc)
 		return false;
+	xas->xa_alloc->parent = NULL;
 	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
 	xas->xa_node = XAS_RESTART;
 	return true;
@@ -339,6 +342,7 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
 	}
 	if (!xas->xa_alloc)
 		return false;
+	xas->xa_alloc->parent = NULL;
 	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
 	xas->xa_node = XAS_RESTART;
 	return true;
@@ -403,7 +407,7 @@ static unsigned long xas_size(const struct xa_state *xas)
 /*
  * Use this to calculate the maximum index that will need to be created
  * in order to add the entry described by @xas.  Because we cannot store a
- * multiple-index entry at index 0, the calculation is a little more complex
+ * multi-index entry at index 0, the calculation is a little more complex
  * than you might expect.
  */
 static unsigned long xas_max(struct xa_state *xas)
@@ -946,6 +950,141 @@ void xas_init_marks(const struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_init_marks);
 
+#ifdef CONFIG_XARRAY_MULTI
+static unsigned int node_get_marks(struct xa_node *node, unsigned int offset)
+{
+	unsigned int marks = 0;
+	xa_mark_t mark = XA_MARK_0;
+
+	for (;;) {
+		if (node_get_mark(node, offset, mark))
+			marks |= 1 << (__force unsigned int)mark;
+		if (mark == XA_MARK_MAX)
+			break;
+		mark_inc(mark);
+	}
+
+	return marks;
+}
+
+static void node_set_marks(struct xa_node *node, unsigned int offset,
+			struct xa_node *child, unsigned int marks)
+{
+	xa_mark_t mark = XA_MARK_0;
+
+	for (;;) {
+		if (marks & (1 << (__force unsigned int)mark))
+			node_set_mark(node, offset, mark);
+		if (child)
+			node_mark_all(child, mark);
+		if (mark == XA_MARK_MAX)
+			break;
+		mark_inc(mark);
+	}
+}
+
+/**
+ * Allocate memory for splitting an entry of @order size into the order
+ * stored in the @xas.
+ */
+void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
+		gfp_t gfp)
+{
+	unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
+	unsigned int mask = xas->xa_sibs;
+
+	/* XXX: no support for splitting really large entries yet */
+	if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order))
+		goto nomem;
+	if (xas->xa_shift + XA_CHUNK_SHIFT > order)
+		return;
+
+	do {
+		unsigned int i;
+		void *sibling;
+		struct xa_node *node;
+
+		node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+		if (!node)
+			goto nomem;
+		node->array = xas->xa;
+		for (i = 0; i < XA_CHUNK_SIZE; i++) {
+			if ((i & mask) == 0) {
+				RCU_INIT_POINTER(node->slots[i], entry);
+				sibling = xa_mk_sibling(0);
+			} else {
+				RCU_INIT_POINTER(node->slots[i], sibling);
+			}
+		}
+		RCU_INIT_POINTER(node->parent, xas->xa_alloc);
+		xas->xa_alloc = node;
+	} while (sibs-- > 0);
+
+	return;
+nomem:
+	xas_destroy(xas);
+	xas_set_err(xas, -ENOMEM);
+}
+
+/**
+ * xas_split() - Split a multi-index entry into smaller entries.
+ * @xas: XArray operation state.
+ * @entry: New entry to store in the array.
+ * @order: New entry order.
+ *
+ * The value in the entry is copied to all the replacement entries.
+ *
+ * Context: Any context.  The caller should hold the xa_lock.
+ */
+void xas_split(struct xa_state *xas, void *entry, unsigned int order)
+{
+	unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
+	unsigned int offset, marks;
+	struct xa_node *node;
+	void *curr = xas_load(xas);
+	int values = 0;
+
+	node = xas->xa_node;
+	if (xas_top(node))
+		return;
+
+	marks = node_get_marks(node, xas->xa_offset);
+
+	offset = xas->xa_offset + sibs;
+	do {
+		if (xas->xa_shift < node->shift) {
+			struct xa_node *child = xas->xa_alloc;
+
+			xas->xa_alloc = rcu_dereference_raw(child->parent);
+			child->shift = node->shift - XA_CHUNK_SHIFT;
+			child->offset = offset;
+			child->count = XA_CHUNK_SIZE;
+			child->nr_values = xa_is_value(entry) ?
+					XA_CHUNK_SIZE : 0;
+			RCU_INIT_POINTER(child->parent, node);
+			node_set_marks(node, offset, child, marks);
+			rcu_assign_pointer(node->slots[offset],
+					xa_mk_node(child));
+			if (xa_is_value(curr))
+				values--;
+		} else {
+			unsigned int canon = offset - xas->xa_sibs;
+
+			node_set_marks(node, canon, NULL, marks);
+			rcu_assign_pointer(node->slots[canon], entry);
+			while (offset > canon)
+				rcu_assign_pointer(node->slots[offset--],
+						xa_mk_sibling(canon));
+			values += (xa_is_value(entry) - xa_is_value(curr)) *
+					(xas->xa_sibs + 1);
+		}
+	} while (offset-- > xas->xa_offset);
+
+	node->nr_values += values;
+}
+EXPORT_SYMBOL_GPL(xas_split);
+#endif
+
 /**
  * xas_pause() - Pause a walk to drop a lock.
  * @xas: XArray operation state.
@@ -1407,7 +1546,7 @@ EXPORT_SYMBOL(__xa_store);
  * @gfp: Memory allocation flags.
  *
  * After this function returns, loads from this index will return @entry.
- * Storing into an existing multislot entry updates the entry of every index.
+ * Storing into an existing multi-index entry updates the entry of every index.
  * The marks associated with @index are unaffected unless @entry is %NULL.
  *
  * Context: Any context.  Takes and releases the xa_lock.
@@ -1549,7 +1688,7 @@ static void xas_set_range(struct xa_state *xas, unsigned long first,
  *
  * After this function returns, loads from any index between @first and @last,
  * inclusive will return @entry.
- * Storing into an existing multislot entry updates the entry of every index.
+ * Storing into an existing multi-index entry updates the entry of every index.
  * The marks associated with @index are unaffected unless @entry is %NULL.
  *
  * Context: Process context.  Takes and releases the xa_lock.  May sleep
-- 
2.28.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] mm/filemap: Fix storing to a THP shadow entry
  2020-09-03 18:30 [PATCH 0/3] Fix read-only THP for non-tmpfs filesystems Matthew Wilcox (Oracle)
  2020-09-03 18:30 ` [PATCH 1/3] XArray: Add xa_get_order Matthew Wilcox (Oracle)
  2020-09-03 18:30 ` [PATCH 2/3] XArray: Add xas_split Matthew Wilcox (Oracle)
@ 2020-09-03 18:30 ` Matthew Wilcox (Oracle)
  2020-09-04 17:23 ` [PATCH 0/3] Fix read-only THP for non-tmpfs filesystems Andrew Morton
  3 siblings, 0 replies; 5+ messages in thread
From: Matthew Wilcox (Oracle) @ 2020-09-03 18:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Matthew Wilcox (Oracle),
	linux-mm, Song Liu, Kirill A . Shutemov, Qian Cai

When a THP is removed from the page cache by reclaim, we replace it with
a shadow entry that occupies all slots of the XArray previously occupied
by the THP.  If the user then accesses that page again, we only allocate
a single page, but storing it into the shadow entry replaces all entries
with that one page.  That leads to bugs like

page dumped because: VM_BUG_ON_PAGE(page_to_pgoff(page) != offset)
------------[ cut here ]------------
kernel BUG at mm/filemap.c:2529!

https://bugzilla.kernel.org/show_bug.cgi?id=206569

This is hard to reproduce with mainline, but happens regularly with the
THP patchset (as so many more THPs are created).  This solution is take
from the THP patchset.  It splits the shadow entry into order-0 pieces
at the time that we bring a new page into cache.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS")
---
 mm/filemap.c | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 1aaea26556cc..1ef6b71d68a6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -829,13 +829,12 @@ EXPORT_SYMBOL_GPL(replace_page_cache_page);
 
 static int __add_to_page_cache_locked(struct page *page,
 				      struct address_space *mapping,
-				      pgoff_t offset, gfp_t gfp_mask,
+				      pgoff_t offset, gfp_t gfp,
 				      void **shadowp)
 {
 	XA_STATE(xas, &mapping->i_pages, offset);
 	int huge = PageHuge(page);
 	int error;
-	void *old;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
@@ -846,25 +845,46 @@ static int __add_to_page_cache_locked(struct page *page,
 	page->index = offset;
 
 	if (!huge) {
-		error = mem_cgroup_charge(page, current->mm, gfp_mask);
+		error = mem_cgroup_charge(page, current->mm, gfp);
 		if (error)
 			goto error;
 	}
 
+	gfp &= GFP_RECLAIM_MASK;
+
 	do {
+		unsigned int order = xa_get_order(xas.xa, xas.xa_index);
+		void *entry, *old = NULL;
+
+		if (order > thp_order(page))
+			xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
+					order, gfp);
 		xas_lock_irq(&xas);
-		old = xas_load(&xas);
-		if (old && !xa_is_value(old))
-			xas_set_err(&xas, -EEXIST);
+		xas_for_each_conflict(&xas, entry) {
+			old = entry;
+			if (!xa_is_value(entry)) {
+				xas_set_err(&xas, -EEXIST);
+				goto unlock;
+			}
+		}
+
+		if (old) {
+			if (shadowp)
+				*shadowp = old;
+			/* entry may have been split before we acquired lock */
+			order = xa_get_order(xas.xa, xas.xa_index);
+			if (order > thp_order(page)) {
+				xas_split(&xas, old, order);
+				xas_reset(&xas);
+			}
+		}
+
 		xas_store(&xas, page);
 		if (xas_error(&xas))
 			goto unlock;
 
-		if (xa_is_value(old)) {
+		if (old)
 			mapping->nrexceptional--;
-			if (shadowp)
-				*shadowp = old;
-		}
 		mapping->nrpages++;
 
 		/* hugetlb pages do not participate in page cache accounting */
@@ -872,7 +892,7 @@ static int __add_to_page_cache_locked(struct page *page,
 			__inc_lruvec_page_state(page, NR_FILE_PAGES);
 unlock:
 		xas_unlock_irq(&xas);
-	} while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
+	} while (xas_nomem(&xas, gfp));
 
 	if (xas_error(&xas)) {
 		error = xas_error(&xas);
-- 
2.28.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 0/3] Fix read-only THP for non-tmpfs filesystems
  2020-09-03 18:30 [PATCH 0/3] Fix read-only THP for non-tmpfs filesystems Matthew Wilcox (Oracle)
                   ` (2 preceding siblings ...)
  2020-09-03 18:30 ` [PATCH 3/3] mm/filemap: Fix storing to a THP shadow entry Matthew Wilcox (Oracle)
@ 2020-09-04 17:23 ` Andrew Morton
  3 siblings, 0 replies; 5+ messages in thread
From: Andrew Morton @ 2020-09-04 17:23 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle); +Cc: linux-mm, Song Liu, Kirill A . Shutemov, Qian Cai

On Thu,  3 Sep 2020 19:30:26 +0100 "Matthew Wilcox (Oracle)" <willy@infradead.org> wrote:

> As described more verbosely in the [3/3] changelog, we can
> inadvertently put an order-0 page in the page cache which
> occupies 512 consecutive entries.  Users are running into
> this if they enable the READ_ONLY_THP_FOR_FS config option;
> see https://bugzilla.kernel.org/show_bug.cgi?id=206569
> and Qian Cai has also reported it here:
> https://lore.kernel.org/lkml/20200616013309.GB815@lca.pw/
> 
> This is a rather intrusive way of fixing the problem, but has the
> advantage that I've actually been testing it with the THP patches,
> which means that it sees far more use than it does upstream -- indeed,
> Song has been entirely unable to reproduce it.  It also has the advantage
> that it removes a few patches from my gargantuan backlog of THP patches.

ERROR: modpost: "xa_get_order" [lib/test_xarray.ko] undefined!
ERROR: modpost: "xas_split_alloc" [lib/test_xarray.ko] undefined!

xarray is an odd mixture of EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL().  I
did this:

--- a/lib/xarray.c~xarray-add-xas_split-fix
+++ a/lib/xarray.c
@@ -1025,6 +1025,7 @@ nomem:
 	xas_destroy(xas);
 	xas_set_err(xas, -ENOMEM);
 }
+EXPORT_SYMBOL_GPL(xas_split_alloc);
 
 /**
  * xas_split() - Split a multi-index entry into smaller entries.
_

and

--- a/lib/xarray.c~xarray-add-xa_get_order-fix
+++ a/lib/xarray.c
@@ -1631,6 +1631,7 @@ unlock:
 
 	return order;
 }
+EXPORT_SYMBOL(xa_get_order);
 #endif /* CONFIG_XARRAY_MULTI */
 
 /**
_



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-09-04 18:10 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-03 18:30 [PATCH 0/3] Fix read-only THP for non-tmpfs filesystems Matthew Wilcox (Oracle)
2020-09-03 18:30 ` [PATCH 1/3] XArray: Add xa_get_order Matthew Wilcox (Oracle)
2020-09-03 18:30 ` [PATCH 2/3] XArray: Add xas_split Matthew Wilcox (Oracle)
2020-09-03 18:30 ` [PATCH 3/3] mm/filemap: Fix storing to a THP shadow entry Matthew Wilcox (Oracle)
2020-09-04 17:23 ` [PATCH 0/3] Fix read-only THP for non-tmpfs filesystems Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).