All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Wilcox <willy@infradead.org>
To: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Jan Kara <jack@suse.cz>,
	William Kucharski <william.kucharski@oracle.com>,
	linux-fsdevel@vger.kernel.org, linux-mm@kvack.org, hch@lst.de,
	hannes@cmpxchg.org, yang.shi@linux.alibaba.com,
	dchinner@redhat.com, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v4 00/16] Overhaul multi-page lookups for THP
Date: Wed, 25 Nov 2020 02:32:34 +0000	[thread overview]
Message-ID: <20201125023234.GH4327@casper.infradead.org> (raw)
In-Reply-To: <20201117234302.GC29991@casper.infradead.org>

On Tue, Nov 17, 2020 at 11:43:02PM +0000, Matthew Wilcox wrote:
> On Tue, Nov 17, 2020 at 07:15:13PM +0000, Matthew Wilcox wrote:
> > I find both of these functions exceptionally confusing.  Does this
> > make it easier to understand?
> 
> Never mind, this is buggy.  I'll send something better tomorrow.

That took a week, not a day.  *sigh*.  At least this is shorter.

commit 1a02863ce04fd325922d6c3db6d01e18d55f966b
Author: Matthew Wilcox (Oracle) <willy@infradead.org>
Date:   Tue Nov 17 10:45:18 2020 -0500

    fix mm-truncateshmem-handle-truncates-that-split-thps.patch

diff --git a/mm/internal.h b/mm/internal.h
index 75ae680d0a2c..4ac239e8c42c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -622,5 +622,6 @@ struct migration_target_control {
 	gfp_t gfp_mask;
 };
 
-bool truncate_inode_partial_page(struct page *page, loff_t start, loff_t end);
+pgoff_t truncate_inode_partial_page(struct address_space *mapping,
+		struct page *page, loff_t start, loff_t end);
 #endif	/* __MM_INTERNAL_H */
diff --git a/mm/shmem.c b/mm/shmem.c
index 5da4f1a3e663..6fd00948e592 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -866,26 +866,33 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	pgoff_t end = (lend + 1) >> PAGE_SHIFT;
+	pgoff_t start, end;
 	struct pagevec pvec;
 	pgoff_t indices[PAGEVEC_SIZE];
 	struct page *page;
 	long nr_swaps_freed = 0;
 	pgoff_t index;
 	int i;
-	bool partial_end;
 
-	if (lend == -1)
-		end = -1;	/* unsigned, so actually very big */
+	page = NULL;
+	start = lstart >> PAGE_SHIFT;
+	shmem_getpage(inode, start, &page, SGP_READ);
+	if (page) {
+		page = thp_head(page);
+		set_page_dirty(page);
+		start = truncate_inode_partial_page(mapping, page, lstart,
+							lend);
+	}
+
+	/* 'end' includes a partial page */
+	end = lend / PAGE_SIZE;
 
 	pagevec_init(&pvec);
 	index = start;
 	while (index < end && find_lock_entries(mapping, index, end - 1,
-			&pvec, indices)) {
+							&pvec, indices)) {
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			page = pvec.pages[i];
-
 			index = indices[i];
 
 			if (xa_is_value(page)) {
@@ -895,8 +902,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 								index, page);
 				continue;
 			}
-			index += thp_nr_pages(page) - 1;
-
 			if (!unfalloc || !PageUptodate(page))
 				truncate_inode_page(mapping, page);
 			unlock_page(page);
@@ -907,85 +912,60 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 		index++;
 	}
 
-	partial_end = ((lend + 1) % PAGE_SIZE) > 0;
-	page = NULL;
-	shmem_getpage(inode, lstart >> PAGE_SHIFT, &page, SGP_READ);
-	if (page) {
-		bool same_page;
-
-		page = thp_head(page);
-		same_page = lend < page_offset(page) + thp_size(page);
-		if (same_page)
-			partial_end = false;
-		set_page_dirty(page);
-		if (!truncate_inode_partial_page(page, lstart, lend)) {
-			start = page->index + thp_nr_pages(page);
-			if (same_page)
-				end = page->index;
-		}
-		unlock_page(page);
-		put_page(page);
-		page = NULL;
-	}
-
-	if (partial_end)
-		shmem_getpage(inode, end, &page, SGP_READ);
-	if (page) {
-		page = thp_head(page);
-		set_page_dirty(page);
-		if (!truncate_inode_partial_page(page, lstart, lend))
-			end = page->index;
-		unlock_page(page);
-		put_page(page);
-	}
-
 	index = start;
-	while (index < end) {
+	while (index <= end) {
 		cond_resched();
 
-		if (!find_get_entries(mapping, index, end - 1, &pvec,
-				indices)) {
+		if (!find_get_entries(mapping, index, end, &pvec, indices)) {
 			/* If all gone or hole-punch or unfalloc, we're done */
-			if (index == start || end != -1)
+			if (index == start || lend != (loff_t)-1)
 				break;
 			/* But if truncating, restart to make sure all gone */
 			index = start;
 			continue;
 		}
+
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			page = pvec.pages[i];
 
-			index = indices[i];
 			if (xa_is_value(page)) {
 				if (unfalloc)
 					continue;
-				if (shmem_free_swap(mapping, index, page)) {
-					/* Swap was replaced by page: retry */
-					index--;
-					break;
+				index = indices[i];
+				if (index == end) {
+					/* Partial page swapped out? */
+					shmem_getpage(inode, end, &page,
+								SGP_READ);
+				} else {
+					if (shmem_free_swap(mapping, index,
+								page)) {
+						/* Swap replaced: retry */
+						break;
+					}
+					nr_swaps_freed++;
+					continue;
 				}
-				nr_swaps_freed++;
-				continue;
+			} else {
+				lock_page(page);
 			}
 
-			lock_page(page);
-
 			if (!unfalloc || !PageUptodate(page)) {
 				if (page_mapping(page) != mapping) {
 					/* Page was replaced by swap: retry */
 					unlock_page(page);
-					index--;
+					put_page(page);
 					break;
 				}
 				VM_BUG_ON_PAGE(PageWriteback(page), page);
-				truncate_inode_page(mapping, page);
+				index = truncate_inode_partial_page(mapping,
+						page, lstart, lend);
+				if (index > end)
+					end = indices[i] - 1;
 			}
-			index = page->index + thp_nr_pages(page) - 1;
-			unlock_page(page);
 		}
+		index = indices[i - 1] + 1;
 		pagevec_remove_exceptionals(&pvec);
-		pagevec_release(&pvec);
-		index++;
+		pagevec_reinit(&pvec);
 	}
 
 	spin_lock_irq(&info->lock);
diff --git a/mm/truncate.c b/mm/truncate.c
index ddb94fc0bc9e..e9fb4f1db837 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -225,19 +225,20 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
 }
 
 /*
- * Handle partial (transparent) pages.  The page may be entirely within the
- * range if a split has raced with us.  If not, we zero the part of the
- * page that's within the [start, end] range, and then split the page if
- * it's a THP.  split_page_range() will discard pages which now lie beyond
- * i_size, and we rely on the caller to discard pages which lie within a
+ * Handle partial (transparent) pages.  If the page is entirely within
+ * the range, we discard it.  If not, we split the page if it's a THP
+ * and zero the part of the page that's within the [start, end] range.
+ * split_page_range() will discard any of the subpages which now lie
+ * beyond i_size, and the caller will discard pages which lie within a
  * newly created hole.
  *
- * Returns false if THP splitting failed so the caller can avoid
- * discarding the entire page which is stubbornly unsplit.
+ * Return: The index after the current page.
  */
-bool truncate_inode_partial_page(struct page *page, loff_t start, loff_t end)
+pgoff_t truncate_inode_partial_page(struct address_space *mapping,
+		struct page *page, loff_t start, loff_t end)
 {
 	loff_t pos = page_offset(page);
+	pgoff_t next_index = page->index + thp_nr_pages(page);
 	unsigned int offset, length;
 
 	if (pos < start)
@@ -251,24 +252,33 @@ bool truncate_inode_partial_page(struct page *page, loff_t start, loff_t end)
 		length = end + 1 - pos - offset;
 
 	wait_on_page_writeback(page);
-	if (length == thp_size(page)) {
-		truncate_inode_page(page->mapping, page);
-		return true;
-	}
-
-	/*
-	 * We may be zeroing pages we're about to discard, but it avoids
-	 * doing a complex calculation here, and then doing the zeroing
-	 * anyway if the page split fails.
-	 */
-	zero_user(page, offset, length);
+	if (length == thp_size(page))
+		goto truncate;
 
 	cleancache_invalidate_page(page->mapping, page);
 	if (page_has_private(page))
 		do_invalidatepage(page, offset, length);
 	if (!PageTransHuge(page))
-		return true;
-	return split_huge_page(page) == 0;
+		goto zero;
+	page += offset / PAGE_SIZE;
+	if (split_huge_page(page) < 0) {
+		page -= offset / PAGE_SIZE;
+		goto zero;
+	}
+	next_index = page->index + 1;
+	offset %= PAGE_SIZE;
+	if (offset == 0 && length >= PAGE_SIZE)
+		goto truncate;
+	length = PAGE_SIZE - offset;
+zero:
+	zero_user(page, offset, length);
+	goto out;
+truncate:
+	truncate_inode_page(mapping, page);
+out:
+	unlock_page(page);
+	put_page(page);
+	return next_index;
 }
 
 /*
@@ -322,10 +332,6 @@ int invalidate_inode_page(struct page *page)
  * The first pass will remove most pages, so the search cost of the second pass
  * is low.
  *
- * We pass down the cache-hot hint to the page freeing code.  Even if the
- * mapping is large, it is probably the case that the final pages are the most
- * recently touched, and freeing happens in ascending file offset order.
- *
  * Note that since ->invalidatepage() accepts range to invalidate
  * truncate_inode_pages_range is able to handle cases where lend + 1 is not
  * page aligned properly.
@@ -333,34 +339,24 @@ int invalidate_inode_page(struct page *page)
 void truncate_inode_pages_range(struct address_space *mapping,
 				loff_t lstart, loff_t lend)
 {
-	pgoff_t		start;		/* inclusive */
-	pgoff_t		end;		/* exclusive */
+	pgoff_t start, end;
 	struct pagevec	pvec;
 	pgoff_t		indices[PAGEVEC_SIZE];
 	pgoff_t		index;
 	int		i;
 	struct page *	page;
-	bool partial_end;
 
 	if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
 		goto out;
 
-	/*
-	 * 'start' and 'end' always covers the range of pages to be fully
-	 * truncated. Partial pages are covered with 'partial_start' at the
-	 * start of the range and 'partial_end' at the end of the range.
-	 * Note that 'end' is exclusive while 'lend' is inclusive.
-	 */
-	start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if (lend == -1)
-		/*
-		 * lend == -1 indicates end-of-file so we have to set 'end'
-		 * to the highest possible pgoff_t and since the type is
-		 * unsigned we're using -1.
-		 */
-		end = -1;
-	else
-		end = (lend + 1) >> PAGE_SHIFT;
+	start = lstart >> PAGE_SHIFT;
+	page = find_lock_head(mapping, start);
+	if (page)
+		start = truncate_inode_partial_page(mapping, page, lstart,
+							lend);
+
+	/* 'end' includes a partial page */
+	end = lend / PAGE_SIZE;
 
 	pagevec_init(&pvec);
 	index = start;
@@ -377,37 +373,11 @@ void truncate_inode_pages_range(struct address_space *mapping,
 		cond_resched();
 	}
 
-	partial_end = ((lend + 1) % PAGE_SIZE) > 0;
-	page = find_lock_head(mapping, lstart >> PAGE_SHIFT);
-	if (page) {
-		bool same_page = lend < page_offset(page) + thp_size(page);
-		if (same_page)
-			partial_end = false;
-		if (!truncate_inode_partial_page(page, lstart, lend)) {
-			start = page->index + thp_nr_pages(page);
-			if (same_page)
-				end = page->index;
-		}
-		unlock_page(page);
-		put_page(page);
-		page = NULL;
-	}
-
-	if (partial_end)
-		page = find_lock_head(mapping, end);
-	if (page) {
-		if (!truncate_inode_partial_page(page, lstart, lend))
-			end = page->index;
-		unlock_page(page);
-		put_page(page);
-	}
-
 	index = start;
-	while (index < end) {
+	while (index <= end) {
 		cond_resched();
 
-		if (!find_get_entries(mapping, index, end - 1, &pvec,
-				indices)) {
+		if (!find_get_entries(mapping, index, end, &pvec, indices)) {
 			/* If all gone from start onwards, we're done */
 			if (index == start)
 				break;
@@ -418,22 +388,19 @@ void truncate_inode_pages_range(struct address_space *mapping,
 
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			page = pvec.pages[i];
-
-			/* We rely upon deletion not changing page->index */
-			index = indices[i];
-
 			if (xa_is_value(page))
 				continue;
 
 			lock_page(page);
-			index = page->index + thp_nr_pages(page) - 1;
-			wait_on_page_writeback(page);
-			truncate_inode_page(mapping, page);
-			unlock_page(page);
+			index = truncate_inode_partial_page(mapping, page,
+							lstart, lend);
+			/* Couldn't split a THP? */
+			if (index > end)
+				end = indices[i] - 1;
 		}
+		index = indices[i - 1] + 1;
 		truncate_exceptional_pvec_entries(mapping, &pvec, indices);
-		pagevec_release(&pvec);
-		index++;
+		pagevec_reinit(&pvec);
 	}
 
 out:

  reply	other threads:[~2020-11-25  2:33 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-12 21:26 [PATCH v4 00/16] Overhaul multi-page lookups for THP Matthew Wilcox (Oracle)
2020-11-12 21:26 ` [PATCH v4 01/16] mm: Make pagecache tagged lookups return only head pages Matthew Wilcox (Oracle)
2020-11-14  9:53   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 02/16] mm/shmem: Use pagevec_lookup in shmem_unlock_mapping Matthew Wilcox (Oracle)
2020-11-14  9:53   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 03/16] mm/swap: Optimise get_shadow_from_swap_cache Matthew Wilcox (Oracle)
2020-11-14  9:53   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 04/16] mm: Add FGP_ENTRY Matthew Wilcox (Oracle)
2020-11-14 10:00   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 05/16] mm/filemap: Rename find_get_entry to mapping_get_entry Matthew Wilcox (Oracle)
2020-11-14 10:01   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 06/16] mm/filemap: Add helper for finding pages Matthew Wilcox (Oracle)
2020-11-14 10:03   ` Christoph Hellwig
2020-11-14 15:15     ` Matthew Wilcox
2020-11-12 21:26 ` [PATCH v4 07/16] mm/filemap: Add mapping_seek_hole_data Matthew Wilcox (Oracle)
2020-11-14 10:04   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 08/16] iomap: Use mapping_seek_hole_data Matthew Wilcox (Oracle)
2020-11-14 10:06   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 09/16] mm: Add and use find_lock_entries Matthew Wilcox (Oracle)
2020-11-14 10:07   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 10/16] mm: Add an 'end' parameter to find_get_entries Matthew Wilcox (Oracle)
2020-11-14 10:08   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 11/16] mm: Add an 'end' parameter to pagevec_lookup_entries Matthew Wilcox (Oracle)
2020-11-14 10:19   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 12/16] mm: Remove nr_entries parameter from pagevec_lookup_entries Matthew Wilcox (Oracle)
2020-11-14 10:19   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 13/16] mm: Pass pvec directly to find_get_entries Matthew Wilcox (Oracle)
2020-11-14 10:21   ` Christoph Hellwig
2020-11-14 15:22     ` Matthew Wilcox
2020-11-12 21:26 ` [PATCH v4 14/16] mm: Remove pagevec_lookup_entries Matthew Wilcox (Oracle)
2020-11-14 10:22   ` Christoph Hellwig
2020-11-12 21:26 ` [PATCH v4 15/16] mm/truncate,shmem: Handle truncates that split THPs Matthew Wilcox (Oracle)
2020-11-12 21:26 ` [PATCH v4 16/16] mm/filemap: Return only head pages from find_get_entries Matthew Wilcox (Oracle)
2020-11-14 10:23   ` Christoph Hellwig
2020-11-16 10:34 ` [PATCH v4 00/16] Overhaul multi-page lookups for THP Hugh Dickins
2020-11-16 10:34   ` Hugh Dickins
2020-11-16 15:14   ` Matthew Wilcox
2020-11-16 21:27     ` Hugh Dickins
2020-11-16 21:27       ` Hugh Dickins
2020-11-17 15:39   ` Matthew Wilcox
2020-11-17 16:26     ` Hugh Dickins
2020-11-17 16:26       ` Hugh Dickins
2020-11-17 19:15       ` Matthew Wilcox
2020-11-17 23:43         ` Matthew Wilcox
2020-11-25  2:32           ` Matthew Wilcox [this message]
2020-11-25  2:50             ` Hugh Dickins
2020-11-25  2:50               ` Hugh Dickins
2020-11-25  2:56               ` Hugh Dickins
2020-11-25  2:56                 ` Hugh Dickins
2020-11-25 23:08             ` Andrew Morton
2020-11-26  0:11               ` Hugh Dickins
2020-11-26  0:11                 ` Hugh Dickins
2020-11-26 12:15                 ` Matthew Wilcox
2020-11-26 19:24                   ` Hugh Dickins
2020-11-26 19:24                     ` Hugh Dickins
2020-11-26 20:07                     ` Matthew Wilcox
2020-11-30 19:45                       ` Hugh Dickins
2020-11-30 19:45                         ` Hugh Dickins
2020-12-01  4:52                         ` Hugh Dickins
2020-12-01  4:52                           ` Hugh Dickins
     [not found]             ` <CGME20201203154604eucas1p200d001d25dd344a1dd1c7da34f35aad0@eucas1p2.samsung.com>
2020-12-03 15:46               ` Marek Szyprowski
     [not found]                 ` <CGME20201203172725eucas1p2fddec1d269c55095859d490942b78b93@eucas1p2.samsung.com>
2020-12-03 17:27                   ` Marek Szyprowski
2020-12-03 21:27                     ` Qian Cai
2020-12-03 21:27                       ` Qian Cai
2020-12-03 22:19                       ` Hugh Dickins
2020-12-03 22:19                         ` Hugh Dickins
2020-12-03 21:45                     ` Hugh Dickins
2020-12-03 21:45                       ` Hugh Dickins
2021-02-23 22:58 ` Andrew Morton
2021-02-23 23:27   ` Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201125023234.GH4327@casper.infradead.org \
    --to=willy@infradead.org \
    --cc=akpm@linux-foundation.org \
    --cc=dchinner@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=hch@lst.de \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=william.kucharski@oracle.com \
    --cc=yang.shi@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.