All of lore.kernel.org
 help / color / mirror / Atom feed
From: Josef Bacik <josef@toxicpanda.com>
To: linux-btrfs@vger.kernel.org, kernel-team@fb.com
Subject: [PATCH 23/23] btrfs: do async reclaim for data reservations
Date: Fri, 31 Jan 2020 17:36:13 -0500	[thread overview]
Message-ID: <20200131223613.490779-24-josef@toxicpanda.com> (raw)
In-Reply-To: <20200131223613.490779-1-josef@toxicpanda.com>

Now that we have the data ticketing stuff in place, move normal data
reservations to use an async reclaim helper to satisfy tickets.  Before
we could have multiple tasks race in and both allocate chunks, resulting
in more data chunks than we would necessarily need.  Serializing these
allocations and making a single thread responsible for flushing will
only allocate chunks as needed, as well as cut down on transaction
commits and other flush related activities.

Priority reservations will still work as they have before, simply
trying to allocate a chunk until they can make their reservation.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/ctree.h      |   3 +-
 fs/btrfs/disk-io.c    |   2 +-
 fs/btrfs/space-info.c | 123 ++++++++++++++++++++++++++++++------------
 3 files changed, 91 insertions(+), 37 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 865b24a1759e..709823a23c62 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -493,7 +493,7 @@ enum btrfs_orphan_cleanup_state {
 	ORPHAN_CLEANUP_DONE	= 2,
 };
 
-void btrfs_init_async_reclaim_work(struct work_struct *work);
+void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
 
 /* fs_info */
 struct reloc_control;
@@ -917,6 +917,7 @@ struct btrfs_fs_info {
 
 	/* Used to reclaim the metadata space in the background. */
 	struct work_struct async_reclaim_work;
+	struct work_struct async_data_reclaim_work;
 
 	spinlock_t unused_bgs_lock;
 	struct list_head unused_bgs;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 56d0a24aec74..825242f5c3f7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2753,7 +2753,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
 	fs_info->check_integrity_print_mask = 0;
 #endif
 	btrfs_init_balance(fs_info);
-	btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
+	btrfs_init_async_reclaim_work(fs_info);
 
 	spin_lock_init(&fs_info->block_group_cache_lock);
 	fs_info->block_group_cache_tree = RB_ROOT;
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 5b0dc1046daa..4249a6711200 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -780,9 +780,83 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
 	} while (flush_state <= COMMIT_TRANS);
 }
 
-void btrfs_init_async_reclaim_work(struct work_struct *work)
+static const enum btrfs_flush_state data_flush_states[] = {
+	FLUSH_DELALLOC_WAIT,
+	RUN_DELAYED_IPUTS,
+	FLUSH_DELAYED_REFS,
+	COMMIT_TRANS,
+};
+
+static void btrfs_async_reclaim_data_space(struct work_struct *work)
 {
-	INIT_WORK(work, btrfs_async_reclaim_metadata_space);
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_space_info *space_info;
+	u64 last_tickets_id;
+	int flush_state = 0;
+
+	fs_info = container_of(work, struct btrfs_fs_info,
+			       async_data_reclaim_work);
+	space_info = fs_info->data_sinfo;
+
+	spin_lock(&space_info->lock);
+	if (list_empty(&space_info->tickets)) {
+		space_info->flush = 0;
+		spin_unlock(&space_info->lock);
+		return;
+	}
+	last_tickets_id = space_info->tickets_id;
+	spin_unlock(&space_info->lock);
+
+	while (!space_info->full) {
+		flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE);
+		spin_lock(&space_info->lock);
+		if (list_empty(&space_info->tickets)) {
+			space_info->flush = 0;
+			spin_unlock(&space_info->lock);
+			return;
+		}
+		last_tickets_id = space_info->tickets_id;
+		spin_unlock(&space_info->lock);
+	}
+
+	while (flush_state < ARRAY_SIZE(data_flush_states)) {
+		flush_space(fs_info, space_info, U64_MAX,
+			    data_flush_states[flush_state]);
+		spin_lock(&space_info->lock);
+		if (list_empty(&space_info->tickets)) {
+			space_info->flush = 0;
+			spin_unlock(&space_info->lock);
+			return;
+		}
+
+		if (last_tickets_id == space_info->tickets_id) {
+			flush_state++;
+		} else {
+			last_tickets_id = space_info->tickets_id;
+			flush_state = 0;
+		}
+
+		if (flush_state >= ARRAY_SIZE(data_flush_states)) {
+			if (space_info->full) {
+				if (maybe_fail_all_tickets(fs_info,
+							   space_info))
+					flush_state = 0;
+				else
+					space_info->flush = 0;
+			} else {
+				flush_state = 0;
+			}
+		}
+		spin_unlock(&space_info->lock);
+	}
+}
+
+void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info)
+{
+	INIT_WORK(&fs_info->async_reclaim_work,
+		  btrfs_async_reclaim_metadata_space);
+	INIT_WORK(&fs_info->async_data_reclaim_work,
+		  btrfs_async_reclaim_data_space);
 }
 
 static const enum btrfs_flush_state priority_flush_states[] = {
@@ -802,13 +876,6 @@ static const enum btrfs_flush_state evict_flush_states[] = {
 	COMMIT_TRANS,
 };
 
-static const enum btrfs_flush_state data_flush_states[] = {
-	FLUSH_DELALLOC_WAIT,
-	RUN_DELAYED_IPUTS,
-	FLUSH_DELAYED_REFS,
-	COMMIT_TRANS,
-};
-
 static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
 				struct btrfs_space_info *space_info,
 				struct reserve_ticket *ticket,
@@ -840,12 +907,8 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
 
 static void priority_reclaim_data_space(struct btrfs_fs_info *fs_info,
 					struct btrfs_space_info *space_info,
-					struct reserve_ticket *ticket,
-					const enum btrfs_flush_state *states,
-					int states_nr)
+					struct reserve_ticket *ticket)
 {
-	int flush_state = 0;
-
 	while (!space_info->full) {
 		flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE);
 		spin_lock(&space_info->lock);
@@ -855,17 +918,6 @@ static void priority_reclaim_data_space(struct btrfs_fs_info *fs_info,
 		}
 		spin_unlock(&space_info->lock);
 	}
-
-	while (flush_state < states_nr) {
-		flush_space(fs_info, space_info, U64_MAX, states[flush_state]);
-		spin_lock(&space_info->lock);
-		if (ticket->bytes == 0) {
-			spin_unlock(&space_info->lock);
-			return;
-		}
-		spin_unlock(&space_info->lock);
-		flush_state++;
-	}
 }
 
 static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
@@ -920,6 +972,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
 	int ret;
 
 	switch (flush) {
+	case BTRFS_RESERVE_FLUSH_DATA:
 	case BTRFS_RESERVE_FLUSH_ALL:
 		wait_reserve_ticket(fs_info, space_info, ticket);
 		break;
@@ -933,14 +986,8 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
 						evict_flush_states,
 						ARRAY_SIZE(evict_flush_states));
 		break;
-	case BTRFS_RESERVE_FLUSH_DATA:
-		priority_reclaim_data_space(fs_info, space_info, ticket,
-					data_flush_states,
-					ARRAY_SIZE(data_flush_states));
-		break;
 	case BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE:
-		priority_reclaim_data_space(fs_info, space_info, ticket,
-					    NULL, 0);
+		priority_reclaim_data_space(fs_info, space_info, ticket);
 		break;
 	default:
 		ASSERT(0);
@@ -989,6 +1036,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
 			   struct btrfs_space_info *space_info, u64 orig_bytes,
 			   enum btrfs_reserve_flush_enum flush)
 {
+	struct work_struct *async_work;
 	struct reserve_ticket ticket;
 	u64 used;
 	int ret = 0;
@@ -997,6 +1045,11 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
 	ASSERT(orig_bytes);
 	ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL);
 
+	if (flush == BTRFS_RESERVE_FLUSH_DATA)
+		async_work = &fs_info->async_data_reclaim_work;
+	else
+		async_work = &fs_info->async_reclaim_work;
+
 	spin_lock(&space_info->lock);
 	ret = -ENOSPC;
 	used = btrfs_space_info_used(space_info, true);
@@ -1026,7 +1079,8 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
 		ticket.bytes = orig_bytes;
 		ticket.error = 0;
 		init_waitqueue_head(&ticket.wait);
-		if (flush == BTRFS_RESERVE_FLUSH_ALL) {
+		if (flush == BTRFS_RESERVE_FLUSH_ALL ||
+		    flush == BTRFS_RESERVE_FLUSH_DATA) {
 			list_add_tail(&ticket.list, &space_info->tickets);
 			if (!space_info->flush) {
 				space_info->flush = 1;
@@ -1034,8 +1088,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
 							  space_info->flags,
 							  orig_bytes, flush,
 							  "enospc");
-				queue_work(system_unbound_wq,
-					   &fs_info->async_reclaim_work);
+				queue_work(system_unbound_wq, async_work);
 			}
 		} else {
 			list_add_tail(&ticket.list,
-- 
2.24.1


  parent reply	other threads:[~2020-01-31 22:37 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-31 22:35 [PATCH 00/23][v2] Convert data reservations to the ticketing infrastructure Josef Bacik
2020-01-31 22:35 ` [PATCH 01/23] btrfs: change nr to u64 in btrfs_start_delalloc_roots Josef Bacik
2020-02-03 10:45   ` Johannes Thumshirn
2020-02-03 11:15   ` Nikolay Borisov
2020-01-31 22:35 ` [PATCH 02/23] btrfs: remove orig from shrink_delalloc Josef Bacik
2020-02-03 10:46   ` Johannes Thumshirn
2020-01-31 22:35 ` [PATCH 03/23] btrfs: handle U64_MAX for shrink_delalloc Josef Bacik
2020-02-03 11:01   ` Johannes Thumshirn
2020-02-03 11:05   ` Johannes Thumshirn
2020-01-31 22:35 ` [PATCH 04/23] btrfs: make shrink_delalloc take space_info as an arg Josef Bacik
2020-02-03 10:53   ` Johannes Thumshirn
2020-01-31 22:35 ` [PATCH 05/23] btrfs: make ALLOC_CHUNK use the space info flags Josef Bacik
2020-02-03 11:19   ` Johannes Thumshirn
2020-01-31 22:35 ` [PATCH 06/23] btrfs: call btrfs_try_granting_tickets when freeing reserved bytes Josef Bacik
2020-01-31 22:35 ` [PATCH 07/23] btrfs: call btrfs_try_granting_tickets when unpinning anything Josef Bacik
2020-02-03 11:24   ` Johannes Thumshirn
2020-01-31 22:35 ` [PATCH 08/23] btrfs: call btrfs_try_granting_tickets when reserving space Josef Bacik
2020-01-31 22:35 ` [PATCH 09/23] btrfs: use the btrfs_space_info_free_bytes_may_use helper for delalloc Josef Bacik
2020-02-03 14:46   ` Johannes Thumshirn
2020-01-31 22:36 ` [PATCH 10/23] btrfs: use btrfs_start_delalloc_roots in shrink_delalloc Josef Bacik
2020-02-03 12:03   ` Nikolay Borisov
2020-01-31 22:36 ` [PATCH 11/23] btrfs: check tickets after waiting on ordered extents Josef Bacik
2020-02-03 13:10   ` Nikolay Borisov
2020-02-03 15:57     ` Josef Bacik
2020-01-31 22:36 ` [PATCH 12/23] btrfs: add the data transaction commit logic into may_commit_transaction Josef Bacik
2020-02-03 14:00   ` Nikolay Borisov
2020-01-31 22:36 ` [PATCH 13/23] btrfs: add flushing states for handling data reservations Josef Bacik
2020-02-03 14:00   ` Nikolay Borisov
2020-01-31 22:36 ` [PATCH 14/23] btrfs: add btrfs_reserve_data_bytes and use it Josef Bacik
2020-02-03 14:20   ` Nikolay Borisov
2020-01-31 22:36 ` [PATCH 15/23] btrfs: use ticketing for data space reservations Josef Bacik
2020-02-03 14:29   ` Nikolay Borisov
2020-01-31 22:36 ` [PATCH 16/23] btrfs: serialize data reservations if we are flushing Josef Bacik
2020-02-03 15:06   ` Nikolay Borisov
2020-01-31 22:36 ` [PATCH 17/23] btrfs: use the same helper for data and metadata reservations Josef Bacik
2020-02-03 15:47   ` Nikolay Borisov
2020-01-31 22:36 ` [PATCH 18/23] btrfs: drop the commit_cycles stuff for data reservations Josef Bacik
2020-02-03 16:02   ` Nikolay Borisov
2020-01-31 22:36 ` [PATCH 19/23] btrfs: don't pass bytes_needed to may_commit_transaction Josef Bacik
2020-02-03 16:10   ` Nikolay Borisov
2020-01-31 22:36 ` [PATCH 20/23] btrfs: don't force commit if we are data Josef Bacik
2020-02-03 16:12   ` Nikolay Borisov
2020-01-31 22:36 ` [PATCH 21/23] btrfs: run delayed iputs before committing the transaction for data Josef Bacik
2020-02-03 16:13   ` Nikolay Borisov
2020-01-31 22:36 ` [PATCH 22/23] btrfs: flush delayed refs when trying to reserve data space Josef Bacik
2020-02-03 16:16   ` Nikolay Borisov
2020-02-03 16:24     ` Josef Bacik
2020-01-31 22:36 ` Josef Bacik [this message]
2020-02-03 17:19   ` [PATCH 23/23] btrfs: do async reclaim for data reservations Nikolay Borisov
2020-02-03 18:51     ` Josef Bacik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200131223613.490779-24-josef@toxicpanda.com \
    --to=josef@toxicpanda.com \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.