All of lore.kernel.org
 help / color / mirror / Atom feed
From: Josef Bacik <josef@toxicpanda.com>
To: kernel-team@fb.com, linux-btrfs@vger.kernel.org
Subject: [PATCH 39/42] btrfs: replace cleaner_delayed_iput_mutex with a waitqueue
Date: Fri, 28 Sep 2018 07:18:18 -0400	[thread overview]
Message-ID: <20180928111821.24376-40-josef@toxicpanda.com> (raw)
In-Reply-To: <20180928111821.24376-1-josef@toxicpanda.com>

The throttle path doesn't take cleaner_delayed_iput_mutex, which means
we could think we're done flushing iputs in the data space reservation
path when we could have a throttler doing an iput.  There's no real
reason to serialize the delayed iput flushing, so instead of taking the
cleaner_delayed_iput_mutex whenever we flush the delayed iputs just
replace it with an atomic counter and a waitqueue.  This removes the
short (or long depending on how big the inode is) window where we think
there are no more pending iputs when there really are some.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/ctree.h       |  4 +++-
 fs/btrfs/disk-io.c     |  5 ++---
 fs/btrfs/extent-tree.c |  9 +++++----
 fs/btrfs/inode.c       | 21 +++++++++++++++++++++
 4 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e40356ca0295..1ef0b1649cad 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -894,7 +894,8 @@ struct btrfs_fs_info {
 
 	spinlock_t delayed_iput_lock;
 	struct list_head delayed_iputs;
-	struct mutex cleaner_delayed_iput_mutex;
+	atomic_t nr_delayed_iputs;
+	wait_queue_head_t delayed_iputs_wait;
 
 	/* this protects tree_mod_seq_list */
 	spinlock_t tree_mod_seq_lock;
@@ -3212,6 +3213,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root);
 int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
 void btrfs_add_delayed_iput(struct inode *inode);
 void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
+int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info);
 int btrfs_prealloc_file_range(struct inode *inode, int mode,
 			      u64 start, u64 num_bytes, u64 min_size,
 			      loff_t actual_len, u64 *alloc_hint);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 51b2a5bf25e5..3dce9ff72e41 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1692,9 +1692,7 @@ static int cleaner_kthread(void *arg)
 			goto sleep;
 		}
 
-		mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
 		btrfs_run_delayed_iputs(fs_info);
-		mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
 
 		again = btrfs_clean_one_deleted_snapshot(root);
 		mutex_unlock(&fs_info->cleaner_mutex);
@@ -2677,7 +2675,6 @@ int open_ctree(struct super_block *sb,
 	mutex_init(&fs_info->delete_unused_bgs_mutex);
 	mutex_init(&fs_info->reloc_mutex);
 	mutex_init(&fs_info->delalloc_root_mutex);
-	mutex_init(&fs_info->cleaner_delayed_iput_mutex);
 	seqlock_init(&fs_info->profiles_lock);
 
 	INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2699,6 +2696,7 @@ int open_ctree(struct super_block *sb,
 	atomic_set(&fs_info->defrag_running, 0);
 	atomic_set(&fs_info->qgroup_op_seq, 0);
 	atomic_set(&fs_info->reada_works_cnt, 0);
+	atomic_set(&fs_info->nr_delayed_iputs, 0);
 	atomic64_set(&fs_info->tree_mod_seq, 0);
 	fs_info->sb = sb;
 	fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
@@ -2776,6 +2774,7 @@ int open_ctree(struct super_block *sb,
 	init_waitqueue_head(&fs_info->transaction_wait);
 	init_waitqueue_head(&fs_info->transaction_blocked_wait);
 	init_waitqueue_head(&fs_info->async_submit_wait);
+	init_waitqueue_head(&fs_info->delayed_iputs_wait);
 
 	INIT_LIST_HEAD(&fs_info->pinned_chunks);
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a7ba0d0e8de1..77bc53ad84e9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4258,8 +4258,9 @@ int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
 				 * operations. Wait for it to finish so that
 				 * more space is released.
 				 */
-				mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
-				mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
+				ret = btrfs_wait_on_delayed_iputs(fs_info);
+				if (ret)
+					return ret;
 				goto again;
 			} else {
 				btrfs_end_transaction(trans);
@@ -4829,9 +4830,9 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
 	 * pinned space, so make sure we run the iputs before we do our pinned
 	 * bytes check below.
 	 */
-	mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
 	btrfs_run_delayed_iputs(fs_info);
-	mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
+	wait_event(fs_info->delayed_iputs_wait,
+		   atomic_read(&fs_info->nr_delayed_iputs) == 0);
 
 	trans = btrfs_join_transaction(fs_info->extent_root);
 	if (IS_ERR(trans))
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0a1671fb03bf..ab8242b10601 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3319,6 +3319,7 @@ void btrfs_add_delayed_iput(struct inode *inode)
 	if (atomic_add_unless(&inode->i_count, -1, 1))
 		return;
 
+	atomic_inc(&fs_info->nr_delayed_iputs);
 	spin_lock(&fs_info->delayed_iput_lock);
 	ASSERT(list_empty(&binode->delayed_iput));
 	list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
@@ -3338,11 +3339,31 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
 		list_del_init(&inode->delayed_iput);
 		spin_unlock(&fs_info->delayed_iput_lock);
 		iput(&inode->vfs_inode);
+		if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
+			wake_up(&fs_info->delayed_iputs_wait);
 		spin_lock(&fs_info->delayed_iput_lock);
 	}
 	spin_unlock(&fs_info->delayed_iput_lock);
 }
 
+/**
+ * btrfs_wait_on_delayed_iputs - wait on the delayed iputs to be done running
+ * @fs_info - the fs_info for this fs
+ * @return - EINTR if we were killed, 0 if nothing's pending
+ *
+ * This will wait on any delayed iputs that are currently running with KILLABLE
+ * set.  Once they are all done running we will return, unless we are killed in
+ * which case we return EINTR.
+ */
+int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
+{
+	int ret = wait_event_killable(fs_info->delayed_iputs_wait,
+			atomic_read(&fs_info->nr_delayed_iputs) == 0);
+	if (ret)
+		return -EINTR;
+	return 0;
+}
+
 /*
  * This creates an orphan entry for the given inode in case something goes wrong
  * in the middle of an unlink.
-- 
2.14.3


  parent reply	other threads:[~2018-09-28 11:19 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-28 11:17 [PATCH 00/42][v3] My current patch queue Josef Bacik
2018-09-28 11:17 ` [PATCH 01/42] btrfs: add btrfs_delete_ref_head helper Josef Bacik
2018-09-28 11:17 ` [PATCH 02/42] btrfs: add cleanup_ref_head_accounting helper Josef Bacik
2018-09-28 11:17 ` [PATCH 03/42] btrfs: cleanup extent_op handling Josef Bacik
2018-09-28 11:17 ` [PATCH 04/42] btrfs: only track ref_heads in delayed_ref_updates Josef Bacik
2018-09-28 11:17 ` [PATCH 05/42] btrfs: only count ref heads run in __btrfs_run_delayed_refs Josef Bacik
2018-09-28 11:17 ` [PATCH 06/42] btrfs: introduce delayed_refs_rsv Josef Bacik
2018-09-28 11:51   ` Nikolay Borisov
2018-09-28 11:58     ` Josef Bacik
2018-09-28 11:17 ` [PATCH 07/42] btrfs: check if free bgs for commit Josef Bacik
2018-10-04 11:24   ` David Sterba
2018-10-11 18:33     ` Josef Bacik
2018-10-12 16:50       ` David Sterba
2018-09-28 11:17 ` [PATCH 08/42] btrfs: dump block_rsv whe dumping space info Josef Bacik
2018-10-01 17:08   ` David Sterba
2018-09-28 11:17 ` [PATCH 09/42] btrfs: release metadata before running delayed refs Josef Bacik
2018-09-28 11:17 ` [PATCH 10/42] btrfs: protect space cache inode alloc with nofs Josef Bacik
2018-10-01 17:08   ` David Sterba
2018-09-28 11:17 ` [PATCH 11/42] btrfs: fix truncate throttling Josef Bacik
2018-09-28 11:17 ` [PATCH 12/42] btrfs: don't use global rsv for chunk allocation Josef Bacik
2018-09-28 11:17 ` [PATCH 13/42] btrfs: add ALLOC_CHUNK_FORCE to the flushing code Josef Bacik
2018-09-28 11:17 ` [PATCH 14/42] btrfs: reset max_extent_size properly Josef Bacik
2018-09-28 11:17 ` [PATCH 15/42] btrfs: don't enospc all tickets on flush failure Josef Bacik
2018-09-28 11:17 ` [PATCH 16/42] btrfs: loop in inode_rsv_refill Josef Bacik
2018-10-02 13:47   ` David Sterba
2018-09-28 11:17 ` [PATCH 17/42] btrfs: run delayed iputs before committing Josef Bacik
2018-09-28 11:17 ` [PATCH 18/42] btrfs: move the dio_sem higher up the callchain Josef Bacik
2018-10-03 12:27   ` David Sterba
2018-10-03 14:54   ` Filipe Manana
2018-09-28 11:17 ` [PATCH 19/42] btrfs: set max_extent_size properly Josef Bacik
2018-09-28 11:17 ` [PATCH 20/42] btrfs: don't use ctl->free_space for max_extent_size Josef Bacik
2018-09-28 11:18 ` [PATCH 21/42] btrfs: reset max_extent_size on clear in a bitmap Josef Bacik
2018-09-28 11:18 ` [PATCH 22/42] btrfs: only run delayed refs if we're committing Josef Bacik
2018-09-28 11:18 ` [PATCH 23/42] btrfs: make sure we create all new bgs Josef Bacik
2018-10-08 13:45   ` David Sterba
2018-09-28 11:18 ` [PATCH 24/42] btrfs: assert on non-empty delayed iputs Josef Bacik
2018-10-08 13:44   ` David Sterba
2018-09-28 11:18 ` [PATCH 25/42] btrfs: pass delayed_refs_root to btrfs_delayed_ref_lock Josef Bacik
2018-09-28 11:18 ` [PATCH 26/42] btrfs: make btrfs_destroy_delayed_refs use btrfs_delayed_ref_lock Josef Bacik
2018-09-28 11:18 ` [PATCH 27/42] btrfs: make btrfs_destroy_delayed_refs use btrfs_delete_ref_head Josef Bacik
2018-09-28 11:18 ` [PATCH 28/42] btrfs: handle delayed ref head accounting cleanup in abort Josef Bacik
2018-09-28 11:18 ` [PATCH 29/42] btrfs: call btrfs_create_pending_block_groups unconditionally Josef Bacik
2018-09-28 11:18 ` [PATCH 30/42] btrfs: just delete pending bgs if we are aborted Josef Bacik
2018-09-28 11:18 ` [PATCH 31/42] btrfs: cleanup pending bgs on transaction abort Josef Bacik
2018-09-28 11:18 ` [PATCH 32/42] btrfs: only free reserved extent if we didn't insert it Josef Bacik
2018-09-28 11:18 ` [PATCH 33/42] btrfs: fix insert_reserved error handling Josef Bacik
2018-09-28 11:18 ` [PATCH 34/42] btrfs: wait on ordered extents on abort cleanup Josef Bacik
2018-09-28 11:18 ` [PATCH 35/42] MAINTAINERS: update my email address for btrfs Josef Bacik
2018-09-28 11:18 ` [PATCH 36/42] btrfs: wait on caching when putting the bg cache Josef Bacik
2018-10-01 17:17   ` David Sterba
2018-09-28 11:18 ` [PATCH 37/42] btrfs: wakeup cleaner thread when adding delayed iput Josef Bacik
2018-10-08 10:59   ` Filipe Manana
2018-09-28 11:18 ` [PATCH 38/42] btrfs: be more explicit about allowed flush states Josef Bacik
2018-09-28 11:18 ` Josef Bacik [this message]
2018-09-28 11:18 ` [PATCH 40/42] btrfs: drop min_size from evict_refill_and_join Josef Bacik
2018-10-03 12:52   ` David Sterba
2018-09-28 11:18 ` [PATCH 41/42] btrfs: reserve extra space during evict() Josef Bacik
2018-09-28 11:18 ` [PATCH 42/42] btrfs: don't run delayed_iputs in commit Josef Bacik
2018-10-11 19:53 [PATCH 00/42][v4] My current patch queue Josef Bacik
2018-10-11 19:54 ` [PATCH 39/42] btrfs: replace cleaner_delayed_iput_mutex with a waitqueue Josef Bacik
2018-10-12 19:32 [PATCH 00/42][v5] My current patch queue Josef Bacik
2018-10-12 19:32 ` [PATCH 39/42] btrfs: replace cleaner_delayed_iput_mutex with a waitqueue Josef Bacik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180928111821.24376-40-josef@toxicpanda.com \
    --to=josef@toxicpanda.com \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.