From 107014f091622dcb411c0ae38c99a95704a62f3f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 13 Oct 2020 12:03:40 -0700
Subject: [PATCH 4/4] mm: make filemap_map_pages() avoid the page lock if
 possible

Private mappings don't need to be 100% serialized with file truncation
etc, and act more like an optimized "read()" call.  So we can avoid
taking the page lock for them.

NOTE! This is a trial patch.  I'm not entirely happy about this, because
I think we can avoid the page lock for shared mappings too, by just
changing the order in which we do some of the checks.

In particular, once we have the page table lock (which we need anyway),
we could increment the page mapping count.  That - together with
re-checking that the page still isn't locked - should be a sufficient
guarantee that nobody has finished truncating that page yet, and any
future truncation will end up being serialized on the page table lock.

The compund page case probably needs some thinking about too.

Needs-review-by: Matthew Wilcox <willy@infradead.org>
Needs-review-by: Kirill A. Shutemov <kirill@shutemov.name>
Needs-review-by: Hugh Dickins <hughd@google.com>
Not-yet-signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 748b7b1b4f6d..6accb7905a36 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2788,6 +2788,7 @@ EXPORT_SYMBOL(filemap_fault);
 void filemap_map_pages(struct vm_fault *vmf,
 		pgoff_t start_pgoff, pgoff_t end_pgoff)
 {
+	bool shared = vmf->vma->vm_flags & VM_SHARED;
 	struct file *file = vmf->vma->vm_file;
 	struct address_space *mapping = file->f_mapping;
 	pgoff_t last_pgoff = start_pgoff;
@@ -2798,6 +2799,8 @@ void filemap_map_pages(struct vm_fault *vmf,
 
 	rcu_read_lock();
 	xas_for_each(&xas, page, end_pgoff) {
+		bool locked = false;
+
 		if (xas_retry(&xas, page))
 			continue;
 		if (xa_is_value(page))
@@ -2815,15 +2818,40 @@ void filemap_map_pages(struct vm_fault *vmf,
 		/* Has the page moved or been split? */
 		if (unlikely(page != xas_reload(&xas)))
 			goto skip;
+		/*
+		 * also recheck the page lock after getting the reference,
+		 * so that any page lockers will have seen us incrementing
+		 * it or not see us at all.
+		 */
+		if (PageLocked(page))
+			goto skip;
+
 		page = find_subpage(page, xas.xa_index);
 
 		if (!PageUptodate(page) ||
 				PageReadahead(page) ||
 				PageHWPoison(page))
 			goto skip;
-		if (!trylock_page(page))
-			goto skip;
 
+		/*
+		 * We only need to be really careful about races
+		 * with truncate etc for shared mappings.
+		 *
+		 * But we also need to lock the page for compound
+		 * pages (see alloc_set_pte -> page_add_file_rmap).
+		 */
+		if (shared || PageTransCompound(page)) {
+			if (!trylock_page(page))
+				goto skip;
+			locked = true;
+		}
+
+		/*
+		 * Even if we don't get the page lock, we'll re-check
+		 * the page mapping and the mapping size.
+		 *
+		 * It won't hurt, even if it's racy.
+		 */
 		if (page->mapping != mapping || !PageUptodate(page))
 			goto unlock;
 
@@ -2840,10 +2868,12 @@ void filemap_map_pages(struct vm_fault *vmf,
 		last_pgoff = xas.xa_index;
 		if (alloc_set_pte(vmf, page))
 			goto unlock;
-		unlock_page(page);
+		if (locked)
+			unlock_page(page);
 		goto next;
 unlock:
-		unlock_page(page);
+		if (locked)
+			unlock_page(page);
 skip:
 		put_page(page);
 next:
-- 
2.28.0.218.gc12ef3d349