All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: <linux-fsdevel@vger.kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>,
	Amir Goldstein <amir73il@gmail.com>,
	Dave Chinner <david@fromorbit.com>, Ted Tso <tytso@mit.edu>,
	Jan Kara <jack@suse.cz>, Hugh Dickins <hughd@google.com>,
	linux-mm@kvack.org
Subject: [PATCH 09/12] shmem: Convert to using invalidate_lock
Date: Fri, 23 Apr 2021 19:29:38 +0200	[thread overview]
Message-ID: <20210423173018.23133-9-jack@suse.cz> (raw)
In-Reply-To: <20210423171010.12-1-jack@suse.cz>

Shmem uses a home-grown mechanism for serializing hole punch with page
fault. Use mapping->invalidate_lock for it instead. Admittedly the
home-grown mechanism locks out only the range being actually punched out
while invalidate_lock locks the whole mapping so it is serializing more.
But hole punch doesn't seem to be that critical operation and the
simplification is noticeable.

CC: Hugh Dickins <hughd@google.com>
CC: <linux-mm@kvack.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 mm/shmem.c | 98 ++++--------------------------------------------------
 1 file changed, 7 insertions(+), 91 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 55b2888db542..f34162ac46de 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -95,12 +95,11 @@ static struct vfsmount *shm_mnt;
 #define SHORT_SYMLINK_LEN 128
 
 /*
- * shmem_fallocate communicates with shmem_fault or shmem_writepage via
- * inode->i_private (with i_rwsem making sure that it has only one user at
- * a time): we would prefer not to enlarge the shmem inode just for that.
+ * shmem_fallocate communicates with shmem_writepage via inode->i_private (with
+ * i_rwsem making sure that it has only one user at a time): we would prefer
+ * not to enlarge the shmem inode just for that.
  */
 struct shmem_falloc {
-	wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
 	pgoff_t start;		/* start of range currently being fallocated */
 	pgoff_t next;		/* the next page offset to be fallocated */
 	pgoff_t nr_falloced;	/* how many new pages have been fallocated */
@@ -1378,7 +1377,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 			spin_lock(&inode->i_lock);
 			shmem_falloc = inode->i_private;
 			if (shmem_falloc &&
-			    !shmem_falloc->waitq &&
 			    index >= shmem_falloc->start &&
 			    index < shmem_falloc->next)
 				shmem_falloc->nr_unswapped++;
@@ -2025,18 +2023,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	return error;
 }
 
-/*
- * This is like autoremove_wake_function, but it removes the wait queue
- * entry unconditionally - even if something else had already woken the
- * target.
- */
-static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
-{
-	int ret = default_wake_function(wait, mode, sync, key);
-	list_del_init(&wait->entry);
-	return ret;
-}
-
 static vm_fault_t shmem_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -2046,65 +2032,6 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
 	int err;
 	vm_fault_t ret = VM_FAULT_LOCKED;
 
-	/*
-	 * Trinity finds that probing a hole which tmpfs is punching can
-	 * prevent the hole-punch from ever completing: which in turn
-	 * locks writers out with its hold on i_rwsem.  So refrain from
-	 * faulting pages into the hole while it's being punched.  Although
-	 * shmem_undo_range() does remove the additions, it may be unable to
-	 * keep up, as each new page needs its own unmap_mapping_range() call,
-	 * and the i_mmap tree grows ever slower to scan if new vmas are added.
-	 *
-	 * It does not matter if we sometimes reach this check just before the
-	 * hole-punch begins, so that one fault then races with the punch:
-	 * we just need to make racing faults a rare case.
-	 *
-	 * The implementation below would be much simpler if we just used a
-	 * standard mutex or completion: but we cannot take i_rwsem in fault,
-	 * and bloating every shmem inode for this unlikely case would be sad.
-	 */
-	if (unlikely(inode->i_private)) {
-		struct shmem_falloc *shmem_falloc;
-
-		spin_lock(&inode->i_lock);
-		shmem_falloc = inode->i_private;
-		if (shmem_falloc &&
-		    shmem_falloc->waitq &&
-		    vmf->pgoff >= shmem_falloc->start &&
-		    vmf->pgoff < shmem_falloc->next) {
-			struct file *fpin;
-			wait_queue_head_t *shmem_falloc_waitq;
-			DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function);
-
-			ret = VM_FAULT_NOPAGE;
-			fpin = maybe_unlock_mmap_for_io(vmf, NULL);
-			if (fpin)
-				ret = VM_FAULT_RETRY;
-
-			shmem_falloc_waitq = shmem_falloc->waitq;
-			prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
-					TASK_UNINTERRUPTIBLE);
-			spin_unlock(&inode->i_lock);
-			schedule();
-
-			/*
-			 * shmem_falloc_waitq points into the shmem_fallocate()
-			 * stack of the hole-punching task: shmem_falloc_waitq
-			 * is usually invalid by the time we reach here, but
-			 * finish_wait() does not dereference it in that case;
-			 * though i_lock needed lest racing with wake_up_all().
-			 */
-			spin_lock(&inode->i_lock);
-			finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
-			spin_unlock(&inode->i_lock);
-
-			if (fpin)
-				fput(fpin);
-			return ret;
-		}
-		spin_unlock(&inode->i_lock);
-	}
-
 	sgp = SGP_CACHE;
 
 	if ((vma->vm_flags & VM_NOHUGEPAGE) ||
@@ -2113,8 +2040,10 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
 	else if (vma->vm_flags & VM_HUGEPAGE)
 		sgp = SGP_HUGE;
 
+	down_read(&inode->i_mapping->invalidate_lock);
 	err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
 				  gfp, vma, vmf, &ret);
+	up_read(&inode->i_mapping->invalidate_lock);
 	if (err)
 		return vmf_error(err);
 	return ret;
@@ -2715,7 +2644,6 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		struct address_space *mapping = file->f_mapping;
 		loff_t unmap_start = round_up(offset, PAGE_SIZE);
 		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
-		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
 
 		/* protected by i_rwsem */
 		if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
@@ -2723,24 +2651,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 			goto out;
 		}
 
-		shmem_falloc.waitq = &shmem_falloc_waitq;
-		shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT;
-		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
-		spin_lock(&inode->i_lock);
-		inode->i_private = &shmem_falloc;
-		spin_unlock(&inode->i_lock);
-
+		down_write(&mapping->invalidate_lock);
 		if ((u64)unmap_end > (u64)unmap_start)
 			unmap_mapping_range(mapping, unmap_start,
 					    1 + unmap_end - unmap_start, 0);
 		shmem_truncate_range(inode, offset, offset + len - 1);
 		/* No need to unmap again: hole-punching leaves COWed pages */
-
-		spin_lock(&inode->i_lock);
-		inode->i_private = NULL;
-		wake_up_all(&shmem_falloc_waitq);
-		WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head));
-		spin_unlock(&inode->i_lock);
+		up_write(&mapping->invalidate_lock);
 		error = 0;
 		goto out;
 	}
@@ -2763,7 +2680,6 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		goto out;
 	}
 
-	shmem_falloc.waitq = NULL;
 	shmem_falloc.start = start;
 	shmem_falloc.next  = start;
 	shmem_falloc.nr_falloced = 0;
-- 
2.26.2


  parent reply	other threads:[~2021-04-23 17:30 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-23 17:29 [PATCH 0/12 v4] fs: Hole punch vs page cache filling races Jan Kara
2021-04-23 17:29 ` [PATCH 01/12] mm: Fix comments mentioning i_mutex Jan Kara
2021-04-23 17:29 ` [PATCH 02/12] mm: Protect operations adding pages to page cache with invalidate_lock Jan Kara
2021-04-23 18:30   ` Matthew Wilcox
2021-04-23 18:30     ` [f2fs-dev] " Matthew Wilcox
2021-04-23 23:04   ` Dave Chinner
2021-04-23 23:04     ` [f2fs-dev] " Dave Chinner
2021-04-26 15:46     ` Jan Kara
2021-04-26 15:46       ` [f2fs-dev] " Jan Kara
2021-04-23 17:29 ` [PATCH 03/12] ext4: Convert to use mapping->invalidate_lock Jan Kara
2021-04-23 17:29 ` [PATCH 04/12] ext2: Convert to using invalidate_lock Jan Kara
2021-04-23 17:29 ` [PATCH 05/12] xfs: Convert to use invalidate_lock Jan Kara
2021-04-23 22:39   ` Dave Chinner
2021-04-23 17:29 ` [PATCH 06/12] zonefs: Convert to using invalidate_lock Jan Kara
2021-04-26  6:40   ` Damien Le Moal
2021-04-26 16:24     ` Jan Kara
2021-04-23 17:29 ` [PATCH 07/12] f2fs: " Jan Kara
2021-04-23 19:15   ` kernel test robot
2021-04-23 19:15     ` kernel test robot
2021-04-23 20:05   ` kernel test robot
2021-04-23 20:05     ` kernel test robot
2021-04-23 17:29 ` [PATCH 08/12] fuse: " Jan Kara
2021-04-23 17:29 ` Jan Kara [this message]
2021-04-29  4:12   ` [PATCH 09/12] shmem: " Hugh Dickins
2021-04-29  4:12     ` Hugh Dickins
2021-04-29  9:30     ` Jan Kara
2021-04-23 17:29 ` [PATCH 10/12] shmem: Use invalidate_lock to protect fallocate Jan Kara
2021-04-23 19:27   ` kernel test robot
2021-04-23 19:27     ` kernel test robot
2021-04-29  3:24   ` Hugh Dickins
2021-04-29  3:24     ` Hugh Dickins
2021-04-29  9:20     ` Jan Kara
2021-04-23 17:29 ` [PATCH 11/12] ceph: Fix race between hole punch and page fault Jan Kara
2021-04-23 17:29 ` [PATCH 12/12] cifs: " Jan Kara
2021-04-23 22:07 ` [PATCH 0/12 v4] fs: Hole punch vs page cache filling races Dave Chinner
2021-04-23 22:07   ` [f2fs-dev] " Dave Chinner
2021-04-23 23:51   ` Matthew Wilcox
2021-04-23 23:51     ` [f2fs-dev] " Matthew Wilcox
2021-04-24  6:11     ` Christoph Hellwig
2021-04-24  6:11       ` [f2fs-dev] " Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210423173018.23133-9-jack@suse.cz \
    --to=jack@suse.cz \
    --cc=amir73il@gmail.com \
    --cc=david@fromorbit.com \
    --cc=hch@infradead.org \
    --cc=hughd@google.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.