All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kent Overstreet <kent.overstreet@gmail.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-bcache@vger.kernel.org
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Subject: [PATCH 03/12] mm: pagecache add lock
Date: Mon, 10 Jun 2019 15:14:11 -0400	[thread overview]
Message-ID: <20190610191420.27007-4-kent.overstreet@gmail.com> (raw)
In-Reply-To: <20190610191420.27007-1-kent.overstreet@gmail.com>

Add a per address space lock around adding pages to the pagecache - making it
possible for fallocate INSERT_RANGE/COLLAPSE_RANGE to work correctly, and also
hopefully making truncate and dio a bit saner.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
---
 fs/inode.c            |  1 +
 include/linux/fs.h    | 24 +++++++++++++
 include/linux/sched.h |  4 +++
 init/init_task.c      |  1 +
 mm/filemap.c          | 81 +++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 9a453f3637..8881dc551f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -350,6 +350,7 @@ EXPORT_SYMBOL(inc_nlink);
 static void __address_space_init_once(struct address_space *mapping)
 {
 	xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ);
+	pagecache_lock_init(&mapping->add_lock);
 	init_rwsem(&mapping->i_mmap_rwsem);
 	INIT_LIST_HEAD(&mapping->private_list);
 	spin_lock_init(&mapping->private_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28e76790..a88d994751 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -418,6 +418,28 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned copied,
 				struct page *page, void *fsdata);
 
+/*
+ * Two-state lock - can be taken for add or block - both states are shared,
+ * like read side of rwsem, but conflict with other state:
+ */
+struct pagecache_lock {
+	atomic_long_t		v;
+	wait_queue_head_t	wait;
+};
+
+static inline void pagecache_lock_init(struct pagecache_lock *lock)
+{
+	atomic_long_set(&lock->v, 0);
+	init_waitqueue_head(&lock->wait);
+}
+
+void pagecache_add_put(struct pagecache_lock *);
+void pagecache_add_get(struct pagecache_lock *);
+void __pagecache_block_put(struct pagecache_lock *);
+void __pagecache_block_get(struct pagecache_lock *);
+void pagecache_block_put(struct pagecache_lock *);
+void pagecache_block_get(struct pagecache_lock *);
+
 /**
  * struct address_space - Contents of a cacheable, mappable object.
  * @host: Owner, either the inode or the block_device.
@@ -452,6 +474,8 @@ struct address_space {
 	spinlock_t		private_lock;
 	struct list_head	private_list;
 	void			*private_data;
+	struct pagecache_lock	add_lock
+		____cacheline_aligned_in_smp;	/* protects adding new pages */
 } __attribute__((aligned(sizeof(long)))) __randomize_layout;
 	/*
 	 * On most architectures that alignment is already the case; but
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1549584a15..a46baade99 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -43,6 +43,7 @@ struct io_context;
 struct mempolicy;
 struct nameidata;
 struct nsproxy;
+struct pagecache_lock;
 struct perf_event_context;
 struct pid_namespace;
 struct pipe_inode_info;
@@ -935,6 +936,9 @@ struct task_struct {
 	unsigned int			in_ubsan;
 #endif
 
+	/* currently held lock, for avoiding recursing in fault path: */
+	struct pagecache_lock *pagecache_lock;
+
 	/* Journalling filesystem info: */
 	void				*journal_info;
 
diff --git a/init/init_task.c b/init/init_task.c
index c70ef656d0..92bbb6e909 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -115,6 +115,7 @@ struct task_struct init_task
 	},
 	.blocked	= {{0}},
 	.alloc_lock	= __SPIN_LOCK_UNLOCKED(init_task.alloc_lock),
+	.pagecache_lock = NULL,
 	.journal_info	= NULL,
 	INIT_CPU_TIMERS(init_task)
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
diff --git a/mm/filemap.c b/mm/filemap.c
index d78f577bae..93d7e0e686 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -113,6 +113,73 @@
  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
+static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
+{
+	BUG_ON(atomic_long_read(&lock->v) == 0);
+
+	if (atomic_long_sub_return_release(i, &lock->v) == 0)
+		wake_up_all(&lock->wait);
+}
+
+static bool __pagecache_lock_tryget(struct pagecache_lock *lock, long i)
+{
+	long v = atomic_long_read(&lock->v), old;
+
+	do {
+		old = v;
+
+		if (i > 0 ? v < 0 : v > 0)
+			return false;
+	} while ((v = atomic_long_cmpxchg_acquire(&lock->v,
+					old, old + i)) != old);
+	return true;
+}
+
+static void __pagecache_lock_get(struct pagecache_lock *lock, long i)
+{
+	wait_event(lock->wait, __pagecache_lock_tryget(lock, i));
+}
+
+void pagecache_add_put(struct pagecache_lock *lock)
+{
+	__pagecache_lock_put(lock, 1);
+}
+EXPORT_SYMBOL(pagecache_add_put);
+
+void pagecache_add_get(struct pagecache_lock *lock)
+{
+	__pagecache_lock_get(lock, 1);
+}
+EXPORT_SYMBOL(pagecache_add_get);
+
+void __pagecache_block_put(struct pagecache_lock *lock)
+{
+	__pagecache_lock_put(lock, -1);
+}
+EXPORT_SYMBOL(__pagecache_block_put);
+
+void __pagecache_block_get(struct pagecache_lock *lock)
+{
+	__pagecache_lock_get(lock, -1);
+}
+EXPORT_SYMBOL(__pagecache_block_get);
+
+void pagecache_block_put(struct pagecache_lock *lock)
+{
+	BUG_ON(current->pagecache_lock != lock);
+	current->pagecache_lock = NULL;
+	__pagecache_lock_put(lock, -1);
+}
+EXPORT_SYMBOL(pagecache_block_put);
+
+void pagecache_block_get(struct pagecache_lock *lock)
+{
+	__pagecache_lock_get(lock, -1);
+	BUG_ON(current->pagecache_lock);
+	current->pagecache_lock = lock;
+}
+EXPORT_SYMBOL(pagecache_block_get);
+
 static void page_cache_delete(struct address_space *mapping,
 				   struct page *page, void *shadow)
 {
@@ -829,11 +896,14 @@ static int __add_to_page_cache_locked(struct page *page,
 	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
 	mapping_set_update(&xas, mapping);
 
+	if (current->pagecache_lock != &mapping->add_lock)
+		pagecache_add_get(&mapping->add_lock);
+
 	if (!huge) {
 		error = mem_cgroup_try_charge(page, current->mm,
 					      gfp_mask, &memcg, false);
 		if (error)
-			return error;
+			goto out;
 	}
 
 	get_page(page);
@@ -869,14 +939,19 @@ static int __add_to_page_cache_locked(struct page *page,
 	if (!huge)
 		mem_cgroup_commit_charge(page, memcg, false, false);
 	trace_mm_filemap_add_to_page_cache(page);
-	return 0;
+	error = 0;
+out:
+	if (current->pagecache_lock != &mapping->add_lock)
+		pagecache_add_put(&mapping->add_lock);
+	return error;
 error:
 	page->mapping = NULL;
 	/* Leave page->index set: truncation relies upon it */
 	if (!huge)
 		mem_cgroup_cancel_charge(page, memcg, false);
 	put_page(page);
-	return xas_error(&xas);
+	error = xas_error(&xas);
+	goto out;
 }
 
 /**
-- 
2.20.1


  parent reply	other threads:[~2019-06-10 19:15 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-10 19:14 bcachefs status update (it's done cooking; let's get this sucker merged) Kent Overstreet
2019-06-10 19:14 ` [PATCH 01/12] Compiler Attributes: add __flatten Kent Overstreet
2019-06-12 17:16   ` Greg KH
2019-06-10 19:14 ` [PATCH 02/12] locking: SIX locks (shared/intent/exclusive) Kent Overstreet
2019-06-10 19:14 ` Kent Overstreet [this message]
2019-06-10 19:14 ` [PATCH 04/12] mm: export find_get_pages() Kent Overstreet
2019-06-10 19:14 ` [PATCH 05/12] fs: insert_inode_locked2() Kent Overstreet
2019-06-10 19:14 ` [PATCH 06/12] fs: factor out d_mark_tmpfile() Kent Overstreet
2019-06-10 19:14 ` [PATCH 07/12] Propagate gfp_t when allocating pte entries from __vmalloc Kent Overstreet
2019-06-10 19:14 ` [PATCH 08/12] block: Add some exports for bcachefs Kent Overstreet
2019-06-10 19:14 ` [PATCH 09/12] bcache: optimize continue_at_nobarrier() Kent Overstreet
2019-06-10 19:14 ` [PATCH 10/12] bcache: move closures to lib/ Kent Overstreet
2019-06-11 10:25   ` Coly Li
2019-06-13  7:28   ` Christoph Hellwig
2019-06-13 11:04     ` Kent Overstreet
2019-06-10 19:14 ` [PATCH 11/12] closures: closure_wait_event() Kent Overstreet
2019-06-11 10:25   ` Coly Li
2019-06-12 17:17   ` Greg KH
2019-06-10 19:14 ` [PATCH 12/12] closures: fix a race on wakeup from closure_sync Kent Overstreet
2019-07-16 10:47   ` Coly Li
2019-07-18  7:46     ` Coly Li
2019-07-22 17:22       ` Kent Overstreet
2020-11-24 23:07         ` Marc Smith
2020-11-25 18:10           ` Marc Smith
2019-06-10 20:46 ` bcachefs status update (it's done cooking; let's get this sucker merged) Linus Torvalds
2019-06-11  1:17   ` Kent Overstreet
2019-06-11  4:33     ` Dave Chinner
2019-06-12 16:21       ` Kent Overstreet
2019-06-12 23:02         ` Dave Chinner
2019-06-13 18:36           ` pagecache locking (was: bcachefs status update) merged) Kent Overstreet
2019-06-13 21:13             ` Andreas Dilger
2019-06-13 21:21               ` Kent Overstreet
2019-06-14  0:35                 ` Dave Chinner
2019-06-13 23:55             ` Dave Chinner
2019-06-14  2:30               ` Linus Torvalds
2019-06-14  7:30                 ` Dave Chinner
2019-06-15  1:15                   ` Linus Torvalds
2019-06-14  3:08               ` Linus Torvalds
2019-06-15  4:01                 ` Linus Torvalds
2019-06-17 22:47                   ` Dave Chinner
2019-06-17 23:38                     ` Linus Torvalds
2019-06-18  4:21                       ` Amir Goldstein
2019-06-19 10:38                         ` Jan Kara
2019-06-19 22:37                           ` Dave Chinner
2019-07-03  0:04                             ` pagecache locking Boaz Harrosh
     [not found]                               ` <DM6PR19MB250857CB8A3A1C8279D6F2F3C5FB0@DM6PR19MB2508.namprd19.prod.outlook.com>
2019-07-03  1:25                                 ` Boaz Harrosh
2019-07-05 23:31                               ` Dave Chinner
2019-07-07 15:05                                 ` Boaz Harrosh
2019-07-07 23:55                                   ` Dave Chinner
2019-07-08 13:31                                 ` Jan Kara
2019-07-09 23:47                                   ` Dave Chinner
2019-07-10  8:41                                     ` Jan Kara
2019-06-14 17:08               ` pagecache locking (was: bcachefs status update) merged) Kent Overstreet
2019-06-19  8:21           ` bcachefs status update (it's done cooking; let's get this sucker merged) Jan Kara
2019-07-03  1:04             ` [PATCH] mm: Support madvise_willneed override by Filesystems Boaz Harrosh
2019-07-03 17:21               ` Jan Kara
2019-07-03 18:03                 ` Boaz Harrosh
2019-06-11  4:55     ` bcachefs status update (it's done cooking; let's get this sucker merged) Linus Torvalds
2019-06-11 14:26       ` Matthew Wilcox
2019-06-11  4:10   ` Dave Chinner
2019-06-11  4:39     ` Linus Torvalds
2019-06-11  7:10       ` Dave Chinner
2019-06-12  2:07         ` Linus Torvalds
2019-07-03  5:59 ` Stefan K
2020-06-10 11:02 ` Stefan K
2020-06-10 11:32   ` Kent Overstreet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190610191420.27007-4-kent.overstreet@gmail.com \
    --to=kent.overstreet@gmail.com \
    --cc=linux-bcache@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.