linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Kent Overstreet <kent.overstreet@gmail.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-bcache@vger.kernel.org
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Subject: [PATCH 03/12] mm: pagecache add lock
Date: Mon, 10 Jun 2019 15:14:11 -0400	[thread overview]
Message-ID: <20190610191420.27007-4-kent.overstreet@gmail.com> (raw)
In-Reply-To: <20190610191420.27007-1-kent.overstreet@gmail.com>

Add a per address space lock around adding pages to the pagecache - making it
possible for fallocate INSERT_RANGE/COLLAPSE_RANGE to work correctly, and also
hopefully making truncate and dio a bit saner.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
---
 fs/inode.c            |  1 +
 include/linux/fs.h    | 24 +++++++++++++
 include/linux/sched.h |  4 +++
 init/init_task.c      |  1 +
 mm/filemap.c          | 81 +++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 9a453f3637..8881dc551f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -350,6 +350,7 @@ EXPORT_SYMBOL(inc_nlink);
 static void __address_space_init_once(struct address_space *mapping)
 {
 	xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ);
+	pagecache_lock_init(&mapping->add_lock);
 	init_rwsem(&mapping->i_mmap_rwsem);
 	INIT_LIST_HEAD(&mapping->private_list);
 	spin_lock_init(&mapping->private_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28e76790..a88d994751 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -418,6 +418,28 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned copied,
 				struct page *page, void *fsdata);
 
+/*
+ * Two-state lock - can be taken for add or block - both states are shared,
+ * like read side of rwsem, but conflict with other state:
+ */
+struct pagecache_lock {
+	atomic_long_t		v;
+	wait_queue_head_t	wait;
+};
+
+static inline void pagecache_lock_init(struct pagecache_lock *lock)
+{
+	atomic_long_set(&lock->v, 0);
+	init_waitqueue_head(&lock->wait);
+}
+
+void pagecache_add_put(struct pagecache_lock *);
+void pagecache_add_get(struct pagecache_lock *);
+void __pagecache_block_put(struct pagecache_lock *);
+void __pagecache_block_get(struct pagecache_lock *);
+void pagecache_block_put(struct pagecache_lock *);
+void pagecache_block_get(struct pagecache_lock *);
+
 /**
  * struct address_space - Contents of a cacheable, mappable object.
  * @host: Owner, either the inode or the block_device.
@@ -452,6 +474,8 @@ struct address_space {
 	spinlock_t		private_lock;
 	struct list_head	private_list;
 	void			*private_data;
+	struct pagecache_lock	add_lock
+		____cacheline_aligned_in_smp;	/* protects adding new pages */
 } __attribute__((aligned(sizeof(long)))) __randomize_layout;
 	/*
 	 * On most architectures that alignment is already the case; but
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1549584a15..a46baade99 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -43,6 +43,7 @@ struct io_context;
 struct mempolicy;
 struct nameidata;
 struct nsproxy;
+struct pagecache_lock;
 struct perf_event_context;
 struct pid_namespace;
 struct pipe_inode_info;
@@ -935,6 +936,9 @@ struct task_struct {
 	unsigned int			in_ubsan;
 #endif
 
+	/* currently held lock, for avoiding recursing in fault path: */
+	struct pagecache_lock *pagecache_lock;
+
 	/* Journalling filesystem info: */
 	void				*journal_info;
 
diff --git a/init/init_task.c b/init/init_task.c
index c70ef656d0..92bbb6e909 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -115,6 +115,7 @@ struct task_struct init_task
 	},
 	.blocked	= {{0}},
 	.alloc_lock	= __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
+	.pagecache_lock = NULL,
 	.journal_info	= NULL,
 	INIT_CPU_TIMERS(init_task)
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
diff --git a/mm/filemap.c b/mm/filemap.c
index d78f577bae..93d7e0e686 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -113,6 +113,73 @@
  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
+static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
+{
+	BUG_ON(atomic_long_read(&lock->v) == 0);
+
+	if (atomic_long_sub_return_release(i, &lock->v) == 0)
+		wake_up_all(&lock->wait);
+}
+
+static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i)
+{
+	long v = atomic_long_read(&lock->v), old;
+
+	do {
+		old = v;
+
+		if (i > 0 ? v < 0 : v > 0)
+			return false;
+	} while ((v = atomic_long_cmpxchg_acquire(&lock->v,
+					old, old + i)) != old);
+	return true;
+}
+
+static void __pagecache_lock_get(struct pagecache_lock *lock, long i)
+{
+	wait_event(lock->wait, __pagecache_lock_tryget(lock, i));
+}
+
+void pagecache_add_put(struct pagecache_lock *lock)
+{
+	__pagecache_lock_put(lock, 1);
+}
+EXPORT_SYMBOL(pagecache_add_put);
+
+void pagecache_add_get(struct pagecache_lock *lock)
+{
+	__pagecache_lock_get(lock, 1);
+}
+EXPORT_SYMBOL(pagecache_add_get);
+
+void __pagecache_block_put(struct pagecache_lock *lock)
+{
+	__pagecache_lock_put(lock, -1);
+}
+EXPORT_SYMBOL(__pagecache_block_put);
+
+void __pagecache_block_get(struct pagecache_lock *lock)
+{
+	__pagecache_lock_get(lock, -1);
+}
+EXPORT_SYMBOL(__pagecache_block_get);
+
+void pagecache_block_put(struct pagecache_lock *lock)
+{
+	BUG_ON(current->pagecache_lock != lock);
+	current->pagecache_lock = NULL;
+	__pagecache_lock_put(lock, -1);
+}
+EXPORT_SYMBOL(pagecache_block_put);
+
+void pagecache_block_get(struct pagecache_lock *lock)
+{
+	__pagecache_lock_get(lock, -1);
+	BUG_ON(current->pagecache_lock);
+	current->pagecache_lock = lock;
+}
+EXPORT_SYMBOL(pagecache_block_get);
+
 static void page_cache_delete(struct address_space *mapping,
 				   struct page *page, void *shadow)
 {
@@ -829,11 +896,14 @@ static int __add_to_page_cache_locked(struct page *page,
 	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
 	mapping_set_update(&xas, mapping);
 
+	if (current->pagecache_lock != &mapping->add_lock)
+		pagecache_add_get(&mapping->add_lock);
+
 	if (!huge) {
 		error = mem_cgroup_try_charge(page, current->mm,
 					      gfp_mask, &memcg, false);
 		if (error)
-			return error;
+			goto out;
 	}
 
 	get_page(page);
@@ -869,14 +939,19 @@ static int __add_to_page_cache_locked(struct page *page,
 	if (!huge)
 		mem_cgroup_commit_charge(page, memcg, false, false);
 	trace_mm_filemap_add_to_page_cache(page);
-	return 0;
+	error = 0;
+out:
+	if (current->pagecache_lock != &mapping->add_lock)
+		pagecache_add_put(&mapping->add_lock);
+	return error;
 error:
 	page->mapping = NULL;
 	/* Leave page->index set: truncation relies upon it */
 	if (!huge)
 		mem_cgroup_cancel_charge(page, memcg, false);
 	put_page(page);
-	return xas_error(&xas);
+	error = xas_error(&xas);
+	goto out;
 }
 
 /**
-- 
2.20.1


  parent reply	other threads:[~2019-06-10 19:15 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-10 19:14 bcachefs status update (it's done cooking; let's get this sucker merged) Kent Overstreet
2019-06-10 19:14 ` [PATCH 01/12] Compiler Attributes: add __flatten Kent Overstreet
2019-06-12 17:16   ` Greg KH
2019-06-10 19:14 ` [PATCH 02/12] locking: SIX locks (shared/intent/exclusive) Kent Overstreet
2019-06-10 19:14 ` Kent Overstreet [this message]
2019-06-10 19:14 ` [PATCH 04/12] mm: export find_get_pages() Kent Overstreet
2019-06-10 19:14 ` [PATCH 05/12] fs: insert_inode_locked2() Kent Overstreet
2019-06-10 19:14 ` [PATCH 06/12] fs: factor out d_mark_tmpfile() Kent Overstreet
2019-06-10 19:14 ` [PATCH 07/12] Propagate gfp_t when allocating pte entries from __vmalloc Kent Overstreet
2019-06-10 19:14 ` [PATCH 08/12] block: Add some exports for bcachefs Kent Overstreet
2019-06-10 19:14 ` [PATCH 09/12] bcache: optimize continue_at_nobarrier() Kent Overstreet
2019-06-10 19:14 ` [PATCH 10/12] bcache: move closures to lib/ Kent Overstreet
2019-06-11 10:25   ` Coly Li
2019-06-13  7:28   ` Christoph Hellwig
2019-06-13 11:04     ` Kent Overstreet
2019-06-10 19:14 ` [PATCH 11/12] closures: closure_wait_event() Kent Overstreet
2019-06-11 10:25   ` Coly Li
2019-06-12 17:17   ` Greg KH
2019-06-10 19:14 ` [PATCH 12/12] closures: fix a race on wakeup from closure_sync Kent Overstreet
2019-07-16 10:47   ` Coly Li
2019-07-18  7:46     ` Coly Li
2019-07-22 17:22       ` Kent Overstreet
2019-06-10 20:46 ` bcachefs status update (it's done cooking; let's get this sucker merged) Linus Torvalds
2019-06-11  1:17   ` Kent Overstreet
2019-06-11  4:33     ` Dave Chinner
2019-06-12 16:21       ` Kent Overstreet
2019-06-12 23:02         ` Dave Chinner
2019-06-13 18:36           ` pagecache locking (was: bcachefs status update) merged) Kent Overstreet
2019-06-13 21:13             ` Andreas Dilger
2019-06-13 21:21               ` Kent Overstreet
2019-06-14  0:35                 ` Dave Chinner
2019-06-13 23:55             ` Dave Chinner
2019-06-14  2:30               ` Linus Torvalds
2019-06-14  7:30                 ` Dave Chinner
2019-06-15  1:15                   ` Linus Torvalds
2019-06-14  3:08               ` Linus Torvalds
2019-06-15  4:01                 ` Linus Torvalds
2019-06-17 22:47                   ` Dave Chinner
2019-06-17 23:38                     ` Linus Torvalds
2019-06-18  4:21                       ` Amir Goldstein
2019-06-19 10:38                         ` Jan Kara
2019-06-19 22:37                           ` Dave Chinner
2019-07-03  0:04                             ` pagecache locking Boaz Harrosh
     [not found]                               ` <DM6PR19MB250857CB8A3A1C8279D6F2F3C5FB0@DM6PR19MB2508.namprd19.prod.outlook.com>
2019-07-03  1:25                                 ` Boaz Harrosh
2019-07-05 23:31                               ` Dave Chinner
2019-07-07 15:05                                 ` Boaz Harrosh
2019-07-07 23:55                                   ` Dave Chinner
2019-07-08 13:31                                 ` Jan Kara
2019-07-09 23:47                                   ` Dave Chinner
2019-07-10  8:41                                     ` Jan Kara
2019-06-14 17:08               ` pagecache locking (was: bcachefs status update) merged) Kent Overstreet
2019-06-19  8:21           ` bcachefs status update (it's done cooking; let's get this sucker merged) Jan Kara
2019-07-03  1:04             ` [PATCH] mm: Support madvise_willneed override by Filesystems Boaz Harrosh
2019-07-03 17:21               ` Jan Kara
2019-07-03 18:03                 ` Boaz Harrosh
2019-06-11  4:55     ` bcachefs status update (it's done cooking; let's get this sucker merged) Linus Torvalds
2019-06-11 14:26       ` Matthew Wilcox
2019-06-11  4:10   ` Dave Chinner
2019-06-11  4:39     ` Linus Torvalds
2019-06-11  7:10       ` Dave Chinner
2019-06-12  2:07         ` Linus Torvalds
2019-07-03  5:59 ` Stefan K

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190610191420.27007-4-kent.overstreet@gmail.com \
    --to=kent.overstreet@gmail.com \
    --cc=linux-bcache@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).