Linux-BTRFS Archive on lore.kernel.org
 help / color / Atom feed
From: Tejun Heo <tj@kernel.org>
To: dsterba@suse.com, clm@fb.com, josef@toxicpanda.com,
	axboe@kernel.dk, jack@suse.cz
Cc: linux-btrfs@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, kernel-team@fb.com
Subject: [PATCH 7/8] Btrfs: use REQ_CGROUP_PUNT for worker thread submitted bios
Date: Thu, 13 Jun 2019 17:33:49 -0700
Message-ID: <20190614003350.1178444-8-tj@kernel.org> (raw)
In-Reply-To: <20190614003350.1178444-1-tj@kernel.org>

From: Chris Mason <clm@fb.com>

Async CRCs and compression submit IO through helper threads, which
means they have IO priority inversions when cgroup IO controllers are
in use.

This flags all of the writes submitted by btrfs helper threads as
REQ_CGROUP_PUNT.  submit_bio() will punt these to dedicated per-blkcg
work items to avoid the priority inversion.

For the compression code, we take a reference on the wbc's blkg css and
pass it down to the async workers.

For the async crcs, the bio already has the correct css, we just need to
tell the block layer to use REQ_CGROUP_PUNT.

Signed-off-by: Chris Mason <clm@fb.com>
Modified-and-reviewed-by: Tejun Heo <tj@kernel.org>
---
 fs/btrfs/compression.c |  8 +++++++-
 fs/btrfs/compression.h |  3 ++-
 fs/btrfs/disk-io.c     |  6 ++++++
 fs/btrfs/extent_io.c   |  3 +++
 fs/btrfs/inode.c       | 30 +++++++++++++++++++++++++++---
 5 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 873261b932b8..138479a9576c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -289,7 +289,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				 unsigned long compressed_len,
 				 struct page **compressed_pages,
 				 unsigned long nr_pages,
-				 unsigned int write_flags)
+				 unsigned int write_flags,
+				 struct cgroup_subsys_state *blkcg_css)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct bio *bio = NULL;
@@ -323,6 +324,11 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 	bio->bi_opf = REQ_OP_WRITE | write_flags;
 	bio->bi_private = cb;
 	bio->bi_end_io = end_compressed_bio_write;
+
+	if (blkcg_css) {
+		bio->bi_opf |= REQ_CGROUP_PUNT;
+		bio_associate_blkg_from_css(bio, blkcg_css);
+	}
 	refcount_set(&cb->pending_bios, 1);
 
 	/* create and submit bios for the compressed pages */
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 9976fe0f7526..7cbefab96ecf 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -93,7 +93,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				  unsigned long compressed_len,
 				  struct page **compressed_pages,
 				  unsigned long nr_pages,
-				  unsigned int write_flags);
+				  unsigned int write_flags,
+				  struct cgroup_subsys_state *blkcg_css);
 blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags);
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9dbe4ba3995d..a5ebbf3d0833 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -799,6 +799,12 @@ static void run_one_async_done(struct btrfs_work *work)
 		return;
 	}
 
+	/*
+	 * All of the bios that pass through here are from async helpers.
+	 * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's
+	 * context.  This changes nothing when cgroups aren't in use.
+	 */
+	async->bio->bi_opf |= REQ_CGROUP_PUNT;
 	ret = btrfs_map_bio(btrfs_sb(inode->i_sb), async->bio,
 			    async->mirror_num);
 	if (ret) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9f223d7d78c0..d7b57341ff1a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4175,6 +4175,9 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
 		.nr_to_write	= nr_pages * 2,
 		.range_start	= start,
 		.range_end	= end + 1,
+		/* we're called from an async helper function */
+		.punt_to_cgroup	= 1,
+		.no_wbc_acct	= 1,
 	};
 
 	while (start <= end) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index df5527cc07b9..3f9b35bc0455 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -357,6 +357,7 @@ struct async_extent {
 };
 
 struct async_chunk {
+	struct cgroup_subsys_state *blkcg_css;
 	struct inode *inode;
 	struct page *locked_page;
 	u64 start;
@@ -846,7 +847,8 @@ static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
 				    ins.objectid,
 				    ins.offset, async_extent->pages,
 				    async_extent->nr_pages,
-				    async_chunk->write_flags)) {
+				    async_chunk->write_flags,
+				    async_chunk->blkcg_css)) {
 			struct page *p = async_extent->pages[0];
 			const u64 start = async_extent->start;
 			const u64 end = start + async_extent->ram_size - 1;
@@ -1170,6 +1172,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
 	async_chunk = container_of(work, struct async_chunk, work);
 	if (async_chunk->inode)
 		btrfs_add_delayed_iput(async_chunk->inode);
+	if (async_chunk->blkcg_css)
+		css_put(async_chunk->blkcg_css);
 	/*
 	 * Since the pointer to 'pending' is at the beginning of the array of
 	 * async_chunk's, freeing it ensures the whole array has been freed.
@@ -1178,12 +1182,15 @@ static noinline void async_cow_free(struct btrfs_work *work)
 		kvfree(async_chunk->pending);
 }
 
-static int cow_file_range_async(struct inode *inode, struct page *locked_page,
+static int cow_file_range_async(struct inode *inode,
+				struct writeback_control *wbc,
+				struct page *locked_page,
 				u64 start, u64 end, int *page_started,
 				unsigned long *nr_written,
 				unsigned int write_flags)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
 	struct async_cow *ctx;
 	struct async_chunk *async_chunk;
 	unsigned long nr_pages;
@@ -1251,12 +1258,29 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 		 * to unlock it.
 		 */
 		if (locked_page) {
+			/*
+			 * Depending on the compressibility, the pages
+			 * might or might not go through async.  We want
+			 * all of them to be accounted against @wbc once.
+			 * Let's do it here before the paths diverge.  wbc
+			 * accounting is used only for foreign writeback
+			 * detection and doesn't need full accuracy.  Just
+			 * account the whole thing against the first page.
+			 */
+			wbc_account_io(wbc, locked_page, cur_end - start);
 			async_chunk[i].locked_page = locked_page;
 			locked_page = NULL;
 		} else {
 			async_chunk[i].locked_page = NULL;
 		}
 
+		if (blkcg_css != blkcg_root_css) {
+			css_get(blkcg_css);
+			async_chunk[i].blkcg_css = blkcg_css;
+		} else {
+			async_chunk[i].blkcg_css = NULL;
+		}
+
 		btrfs_init_work(&async_chunk[i].work,
 				btrfs_delalloc_helper,
 				async_cow_start, async_cow_submit,
@@ -1653,7 +1677,7 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
 	} else {
 		set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
 			&BTRFS_I(inode)->runtime_flags);
-		ret = cow_file_range_async(inode, locked_page, start, end,
+		ret = cow_file_range_async(inode, wbc, locked_page, start, end,
 					   page_started, nr_written,
 					   write_flags);
 	}
-- 
2.17.1


  parent reply index

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-14  0:33 [PATCHSET btrfs/for-next] btrfs: fix cgroup writeback support Tejun Heo
2019-06-14  0:33 ` [PATCH 1/8] blkcg, writeback: Add wbc->no_wbc_acct Tejun Heo
2019-06-14 13:41   ` Josef Bacik
2019-06-14  0:33 ` [PATCH 2/8] blkcg, writeback: Implement wbc_blkcg_css() Tejun Heo
2019-06-14 13:41   ` Josef Bacik
2019-06-14  0:33 ` [PATCH 3/8] blkcg: implement REQ_CGROUP_PUNT Tejun Heo
2019-06-14 13:43   ` Josef Bacik
2019-06-14  0:33 ` [PATCH 4/8] Btrfs: stop using btrfs_schedule_bio() Tejun Heo
2019-06-14 13:45   ` Josef Bacik
2019-06-14  0:33 ` [PATCH 5/8] Btrfs: delete the entire async bio submission framework Tejun Heo
2019-06-14 13:48   ` Josef Bacik
2019-06-14  0:33 ` [PATCH 6/8] Btrfs: only associate the locked page with one async_cow struct Tejun Heo
2019-06-14 13:48   ` Josef Bacik
2019-06-14  0:33 ` Tejun Heo [this message]
2019-06-14 13:50   ` [PATCH 7/8] Btrfs: use REQ_CGROUP_PUNT for worker thread submitted bios Josef Bacik
2019-06-14 15:15   ` David Sterba
2019-06-14 15:34     ` Tejun Heo
2019-06-14  0:33 ` [PATCH 8/8] Btrfs: extent_write_locked_range() should attach inode->i_wb Tejun Heo
2019-06-14 14:12   ` Josef Bacik
2019-06-14 15:32   ` [PATCH v2 " Tejun Heo
2019-06-14 17:46 ` [PATCHSET btrfs/for-next] btrfs: fix cgroup writeback support Tejun Heo

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190614003350.1178444-8-tj@kernel.org \
    --to=tj@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=clm@fb.com \
    --cc=dsterba@suse.com \
    --cc=jack@suse.cz \
    --cc=josef@toxicpanda.com \
    --cc=kernel-team@fb.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-BTRFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-btrfs/0 linux-btrfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-btrfs linux-btrfs/ https://lore.kernel.org/linux-btrfs \
		linux-btrfs@vger.kernel.org linux-btrfs@archiver.kernel.org
	public-inbox-index linux-btrfs


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-btrfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox