All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] btrfs: remove buffer heads form superblock handling
@ 2020-01-17 12:51 Johannes Thumshirn
  2020-01-17 12:51 ` [PATCH 1/5] btrfs: remove buffer heads from super block reading Johannes Thumshirn
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: Johannes Thumshirn @ 2020-01-17 12:51 UTC (permalink / raw)
  To: David Sterba; +Cc: Nikolay Borisov, linux-btrfs, Johannes Thumshirn

This patch series removes the use of buffer_heads from btrfs' super block read
and write paths.

The first patch removes buffer_heads from superblock reading.
The second from super_block writing and the subsequent patches remove the
buffer_heads from the integrity check code.

It's based on misc-next from Friday January 17, and doesn't show any
regressions in xfstests to the baseline.

Johannes Thumshirn (5):
  btrfs: remove buffer heads from super block reading
  btrfs: remove use of buffer_heads from superblock writeout
  btrfs: remove btrfsic_submit_bh()
  btrfs: remove buffer_heads from btrfsic_process_written_block()
  btrfs: remove buffer_heads form superblock mirror integrity checking

 fs/btrfs/check-integrity.c | 198 ++++++++++---------------------------
 fs/btrfs/check-integrity.h |   2 -
 fs/btrfs/disk-io.c         | 187 ++++++++++++++++++++---------------
 fs/btrfs/disk-io.h         |   4 +-
 fs/btrfs/volumes.c         |  64 +++++++-----
 fs/btrfs/volumes.h         |   2 -
 6 files changed, 199 insertions(+), 258 deletions(-)

-- 
2.24.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/5] btrfs: remove buffer heads from super block reading
  2020-01-17 12:51 [PATCH 0/5] btrfs: remove buffer heads form superblock handling Johannes Thumshirn
@ 2020-01-17 12:51 ` Johannes Thumshirn
  2020-01-17 13:31   ` David Sterba
  2020-01-17 14:30   ` Nikolay Borisov
  2020-01-17 12:51 ` [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout Johannes Thumshirn
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 16+ messages in thread
From: Johannes Thumshirn @ 2020-01-17 12:51 UTC (permalink / raw)
  To: David Sterba; +Cc: Nikolay Borisov, linux-btrfs, Johannes Thumshirn

Super-block reading in BTRFS is done using buffer_heads. Buffer_heads have
some drawbacks, like not being able to propagate errors from the lower
layers.

Change the buffer_heads to BIOs.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/disk-io.c | 80 +++++++++++++++++++++++++++-------------------
 fs/btrfs/disk-io.h |  4 +--
 fs/btrfs/volumes.c | 64 ++++++++++++++++++++++---------------
 fs/btrfs/volumes.h |  2 --
 4 files changed, 88 insertions(+), 62 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5ce2801f8388..50c93ffe8d03 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2635,11 +2635,11 @@ int __cold open_ctree(struct super_block *sb,
 	u64 features;
 	u16 csum_type;
 	struct btrfs_key location;
-	struct buffer_head *bh;
 	struct btrfs_super_block *disk_super;
 	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
 	struct btrfs_root *tree_root;
 	struct btrfs_root *chunk_root;
+	struct page *super_page;
 	int ret;
 	int err = -EINVAL;
 	int clear_free_space_tree = 0;
@@ -2832,39 +2832,37 @@ int __cold open_ctree(struct super_block *sb,
 	/*
 	 * Read super block and check the signature bytes only
 	 */
-	bh = btrfs_read_dev_super(fs_devices->latest_bdev);
-	if (IS_ERR(bh)) {
-		err = PTR_ERR(bh);
-		goto fail_alloc;
+	ret = btrfs_read_dev_super(fs_devices->latest_bdev, &super_page);
+	if (ret) {
+		err = ret;
+		goto fail_page_alloc;
 	}
 
 	/*
 	 * Verify the type first, if that or the the checksum value are
 	 * corrupted, we'll find out
 	 */
-	csum_type = btrfs_super_csum_type((struct btrfs_super_block *)bh->b_data);
+	csum_type = btrfs_super_csum_type(page_address(super_page));
 	if (!btrfs_supported_super_csum(csum_type)) {
 		btrfs_err(fs_info, "unsupported checksum algorithm: %u",
 			  csum_type);
 		err = -EINVAL;
-		brelse(bh);
-		goto fail_alloc;
+		goto fail_page_alloc;
 	}
 
 	ret = btrfs_init_csum_hash(fs_info, csum_type);
 	if (ret) {
 		err = ret;
-		goto fail_alloc;
+		goto fail_page_alloc;
 	}
 
 	/*
 	 * We want to check superblock checksum, the type is stored inside.
 	 * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
 	 */
-	if (btrfs_check_super_csum(fs_info, bh->b_data)) {
+	if (btrfs_check_super_csum(fs_info, page_address(super_page))) {
 		btrfs_err(fs_info, "superblock checksum mismatch");
 		err = -EINVAL;
-		brelse(bh);
 		goto fail_csum;
 	}
 
@@ -2873,8 +2871,9 @@ int __cold open_ctree(struct super_block *sb,
 	 * following bytes up to INFO_SIZE, the checksum is calculated from
 	 * the whole block of INFO_SIZE
 	 */
-	memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
-	brelse(bh);
+	memcpy(fs_info->super_copy, page_address(super_page),
+	       sizeof(*fs_info->super_copy));
+	__free_page(super_page);
 
 	disk_super = fs_info->super_copy;
 
@@ -3330,6 +3329,8 @@ int __cold open_ctree(struct super_block *sb,
 	btrfs_free_block_groups(fs_info);
 fail_csum:
 	btrfs_free_csum_hash(fs_info);
+fail_page_alloc:
+	__free_page(super_page);
 fail_alloc:
 fail_iput:
 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3374,40 +3375,52 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 }
 
 int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
-			struct buffer_head **bh_ret)
+			struct page **super_page)
 {
-	struct buffer_head *bh;
 	struct btrfs_super_block *super;
+	struct bio_vec bio_vec;
+	struct bio bio;
+	struct page *page;
 	u64 bytenr;
+	gfp_t gfp_mask = mapping_gfp_constraint(bdev->bd_inode->i_mapping,
+						~__GFP_FS) | __GFP_NOFAIL;
+	int ret;
 
 	bytenr = btrfs_sb_offset(copy_num);
 	if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
 		return -EINVAL;
 
-	bh = __bread(bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, BTRFS_SUPER_INFO_SIZE);
+	page = alloc_page(gfp_mask);
+	if (!page)
+		return -ENOMEM;
+
+	bio_init(&bio, &bio_vec, 1);
+	bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
+	bio_set_dev(&bio, bdev);
+	bio_set_op_attrs(&bio, REQ_OP_READ, 0);
+	bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE, 0);
+
+	ret = submit_bio_wait(&bio);
 	/*
 	 * If we fail to read from the underlying devices, as of now
 	 * the best option we have is to mark it EIO.
 	 */
-	if (!bh)
+	if (ret)
 		return -EIO;
 
-	super = (struct btrfs_super_block *)bh->b_data;
+	super = page_address(page);
 	if (btrfs_super_bytenr(super) != bytenr ||
-		    btrfs_super_magic(super) != BTRFS_MAGIC) {
-		brelse(bh);
+		    btrfs_super_magic(super) != BTRFS_MAGIC)
 		return -EINVAL;
-	}
 
-	*bh_ret = bh;
+	*super_page = page;
 	return 0;
 }
 
 
-struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
+int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
 {
-	struct buffer_head *bh;
-	struct buffer_head *latest = NULL;
+	struct page *latest = NULL;
 	struct btrfs_super_block *super;
 	int i;
 	u64 transid = 0;
@@ -3419,25 +3432,26 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
 	 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
 	 */
 	for (i = 0; i < 1; i++) {
-		ret = btrfs_read_dev_one_super(bdev, i, &bh);
+		ret = btrfs_read_dev_one_super(bdev, i, page);
 		if (ret)
 			continue;
 
-		super = (struct btrfs_super_block *)bh->b_data;
+		super = page_address(*page);
 
 		if (!latest || btrfs_super_generation(super) > transid) {
-			brelse(latest);
-			latest = bh;
+			if (latest)
+				__free_page(latest);
+			latest = *page;
 			transid = btrfs_super_generation(super);
 		} else {
-			brelse(bh);
+			__free_page(*page);
 		}
 	}
 
-	if (!latest)
-		return ERR_PTR(ret);
+	if (ret)
+		return ret;
 
-	return latest;
+	return 0;
 }
 
 /*
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 8c2d6cf1ce59..e04b233c436a 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -54,9 +54,9 @@ int __cold open_ctree(struct super_block *sb,
 	       char *options);
 void __cold close_ctree(struct btrfs_fs_info *fs_info);
 int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
-struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
+int btrfs_read_dev_super(struct block_device *bdev, struct page **super_page);
 int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
-			struct buffer_head **bh_ret);
+			     struct page **super_page);
 int btrfs_commit_super(struct btrfs_fs_info *fs_info);
 struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
 				      struct btrfs_key *location);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3d5714bf2e32..4aded417a7d1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6,7 +6,6 @@
 #include <linux/sched.h>
 #include <linux/bio.h>
 #include <linux/slab.h>
-#include <linux/buffer_head.h>
 #include <linux/blkdev.h>
 #include <linux/ratelimit.h>
 #include <linux/kthread.h>
@@ -492,7 +491,7 @@ static noinline struct btrfs_fs_devices *find_fsid(
 static int
 btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
 		      int flush, struct block_device **bdev,
-		      struct buffer_head **bh)
+		      struct page **super_page)
 {
 	int ret;
 
@@ -511,9 +510,8 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
 		goto error;
 	}
 	invalidate_bdev(*bdev);
-	*bh = btrfs_read_dev_super(*bdev);
-	if (IS_ERR(*bh)) {
-		ret = PTR_ERR(*bh);
+	ret = btrfs_read_dev_super(*bdev, super_page);
+	if (ret) {
 		blkdev_put(*bdev, flags);
 		goto error;
 	}
@@ -522,7 +520,6 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
 
 error:
 	*bdev = NULL;
-	*bh = NULL;
 	return ret;
 }
 
@@ -603,7 +600,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 {
 	struct request_queue *q;
 	struct block_device *bdev;
-	struct buffer_head *bh;
+	struct page *super_page;
 	struct btrfs_super_block *disk_super;
 	u64 devid;
 	int ret;
@@ -614,17 +611,17 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 		return -EINVAL;
 
 	ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
-				    &bdev, &bh);
+				    &bdev, &super_page);
 	if (ret)
 		return ret;
 
-	disk_super = (struct btrfs_super_block *)bh->b_data;
+	disk_super = page_address(super_page);
 	devid = btrfs_stack_device_id(&disk_super->dev_item);
 	if (devid != device->devid)
-		goto error_brelse;
+		goto error_free_page;
 
 	if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
-		goto error_brelse;
+		goto error_free_page;
 
 	device->generation = btrfs_super_generation(disk_super);
 
@@ -633,7 +630,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 		    BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
 			pr_err(
 		"BTRFS: Invalid seeding and uuid-changed device detected\n");
-			goto error_brelse;
+			goto error_free_page;
 		}
 
 		clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
@@ -659,12 +656,12 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 		fs_devices->rw_devices++;
 		list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
 	}
-	brelse(bh);
+	__free_page(super_page);
 
 	return 0;
 
-error_brelse:
-	brelse(bh);
+error_free_page:
+	__free_page(super_page);
 	blkdev_put(bdev, flags);
 
 	return -EINVAL;
@@ -2164,14 +2161,15 @@ static struct btrfs_device *btrfs_find_device_by_path(
 	u64 devid;
 	u8 *dev_uuid;
 	struct block_device *bdev;
-	struct buffer_head *bh;
+	struct page *super_page;
 	struct btrfs_device *device;
 
 	ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
-				    fs_info->bdev_holder, 0, &bdev, &bh);
+				    fs_info->bdev_holder, 0, &bdev,
+				    &super_page);
 	if (ret)
 		return ERR_PTR(ret);
-	disk_super = (struct btrfs_super_block *)bh->b_data;
+	disk_super = page_address(super_page);
 	devid = btrfs_stack_device_id(&disk_super->dev_item);
 	dev_uuid = disk_super->dev_item.uuid;
 	if (btrfs_fs_incompat(fs_info, METADATA_UUID))
@@ -2181,7 +2179,7 @@ static struct btrfs_device *btrfs_find_device_by_path(
 		device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
 					   disk_super->fsid, true);
 
-	brelse(bh);
+	__free_page(super_page);
 	if (!device)
 		device = ERR_PTR(-ENOENT);
 	blkdev_put(bdev, FMODE_READ);
@@ -7270,25 +7268,41 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
 
 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
 {
-	struct buffer_head *bh;
+	struct page *super_page;
+	struct bio_vec bio_vec;
+	struct bio bio;
 	struct btrfs_super_block *disk_super;
 	int copy_num;
+	int ret;
 
 	if (!bdev)
 		return;
 
+	bio_init(&bio, &bio_vec, 1);
 	for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
 		copy_num++) {
+		u64 bytenr;
 
-		if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
+		if (btrfs_read_dev_one_super(bdev, copy_num, &super_page))
 			continue;
 
-		disk_super = (struct btrfs_super_block *)bh->b_data;
+		disk_super = page_address(super_page);
 
 		memset(&disk_super->magic, 0, sizeof(disk_super->magic));
-		set_buffer_dirty(bh);
-		sync_dirty_buffer(bh);
-		brelse(bh);
+
+		bytenr = btrfs_sb_offset(copy_num);
+
+		bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
+		bio_set_dev(&bio, bdev);
+		bio_set_op_attrs(&bio, REQ_OP_WRITE, 0);
+		bio_add_page(&bio, super_page, PAGE_SIZE, 0);
+
+		ret = submit_bio_wait(&bio);
+		WARN_ON(ret);
+
+		__free_page(super_page);
+		bio_reset(&bio);
+
 	}
 
 	/* Notify udev that device has changed */
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 9c7d4fe5c39a..46a65b15eb93 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -17,8 +17,6 @@ extern struct mutex uuid_mutex;
 
 #define BTRFS_STRIPE_LEN	SZ_64K
 
-struct buffer_head;
-
 struct btrfs_io_geometry {
 	/* remaining bytes before crossing a stripe */
 	u64 len;
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout
  2020-01-17 12:51 [PATCH 0/5] btrfs: remove buffer heads form superblock handling Johannes Thumshirn
  2020-01-17 12:51 ` [PATCH 1/5] btrfs: remove buffer heads from super block reading Johannes Thumshirn
@ 2020-01-17 12:51 ` Johannes Thumshirn
  2020-01-17 13:38   ` David Sterba
                     ` (2 more replies)
  2020-01-17 12:51 ` [PATCH 3/5] btrfs: remove btrfsic_submit_bh() Johannes Thumshirn
                   ` (2 subsequent siblings)
  4 siblings, 3 replies; 16+ messages in thread
From: Johannes Thumshirn @ 2020-01-17 12:51 UTC (permalink / raw)
  To: David Sterba; +Cc: Nikolay Borisov, linux-btrfs, Johannes Thumshirn

Similar to the superblock read path, change the write path to using BIOs
and pages instead of buffer_heads.

This is based on a patch originally authored by Nikolay Borisov.

Co-developed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/disk-io.c | 107 ++++++++++++++++++++++++++-------------------
 1 file changed, 61 insertions(+), 46 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 50c93ffe8d03..51e7b832c8fd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3353,25 +3353,33 @@ int __cold open_ctree(struct super_block *sb,
 }
 ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
 
-static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
+static void btrfs_end_super_write(struct bio *bio)
 {
-	if (uptodate) {
-		set_buffer_uptodate(bh);
-	} else {
-		struct btrfs_device *device = (struct btrfs_device *)
-			bh->b_private;
-
-		btrfs_warn_rl_in_rcu(device->fs_info,
-				"lost page write due to IO error on %s",
-					  rcu_str_deref(device->name));
-		/* note, we don't set_buffer_write_io_error because we have
-		 * our own ways of dealing with the IO errors
-		 */
-		clear_buffer_uptodate(bh);
-		btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
+	struct btrfs_device *device = bio->bi_private;
+	struct bio_vec *bvec;
+	struct bvec_iter_all iter_all;
+	struct page *page;
+
+	bio_for_each_segment_all(bvec, bio, iter_all) {
+		page = bvec->bv_page;
+
+		if (blk_status_to_errno(bio->bi_status)) {
+			btrfs_warn_rl_in_rcu(device->fs_info,
+					     "lost page write due to IO error on %s",
+					     rcu_str_deref(device->name));
+			ClearPageUptodate(page);
+			SetPageError(page);
+			btrfs_dev_stat_inc_and_print(device,
+						     BTRFS_DEV_STAT_WRITE_ERRS);
+		} else {
+			SetPageUptodate(page);
+		}
+
+		put_page(page);
+		unlock_page(page);
 	}
-	unlock_buffer(bh);
-	put_bh(bh);
+
+	bio_put(bio);
 }
 
 int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
@@ -3462,16 +3470,15 @@ int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
  * the expected device size at commit time. Note that max_mirrors must be
  * same for write and wait phases.
  *
- * Return number of errors when buffer head is not found or submission fails.
+ * Return number of errors when page is not found or submission fails.
  */
 static int write_dev_supers(struct btrfs_device *device,
 			    struct btrfs_super_block *sb, int max_mirrors)
 {
 	struct btrfs_fs_info *fs_info = device->fs_info;
 	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
-	struct buffer_head *bh;
+	gfp_t gfp_mask;
 	int i;
-	int ret;
 	int errors = 0;
 	u64 bytenr;
 	int op_flags;
@@ -3481,7 +3488,13 @@ static int write_dev_supers(struct btrfs_device *device,
 
 	shash->tfm = fs_info->csum_shash;
 
+	gfp_mask = mapping_gfp_constraint(device->bdev->bd_inode->i_mapping,
+					  ~__GFP_FS) | __GFP_NOFAIL;
+
 	for (i = 0; i < max_mirrors; i++) {
+		struct page *page;
+		struct bio *bio;
+
 		bytenr = btrfs_sb_offset(i);
 		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
 		    device->commit_total_bytes)
@@ -3494,26 +3507,20 @@ static int write_dev_supers(struct btrfs_device *device,
 				    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
 		crypto_shash_final(shash, sb->csum);
 
-		/* One reference for us, and we leave it for the caller */
-		bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
-			      BTRFS_SUPER_INFO_SIZE);
-		if (!bh) {
+		page = find_or_create_page(device->bdev->bd_inode->i_mapping,
+					   bytenr >> PAGE_SHIFT, gfp_mask);
+		if (!page) {
 			btrfs_err(device->fs_info,
-			    "couldn't get super buffer head for bytenr %llu",
+			    "couldn't get superblock page for bytenr %llu",
 			    bytenr);
 			errors++;
 			continue;
 		}
 
-		memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
+		/* Bump the refcount for wait_dev_supers() */
+		get_page(page);
 
-		/* one reference for submit_bh */
-		get_bh(bh);
-
-		set_buffer_uptodate(bh);
-		lock_buffer(bh);
-		bh->b_end_io = btrfs_end_buffer_write_sync;
-		bh->b_private = device;
+		memcpy(page_address(page), sb, BTRFS_SUPER_INFO_SIZE);
 
 		/*
 		 * we fua the first super.  The others we allow
@@ -3522,9 +3529,17 @@ static int write_dev_supers(struct btrfs_device *device,
 		op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
 		if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
 			op_flags |= REQ_FUA;
-		ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
-		if (ret)
-			errors++;
+
+		bio = bio_alloc(gfp_mask, 1);
+		bio_set_dev(bio, device->bdev);
+		bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
+		bio->bi_private = device;
+		bio->bi_end_io = btrfs_end_super_write;
+		bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
+			     offset_in_page(bytenr));
+
+		bio_set_op_attrs(bio, REQ_OP_WRITE, op_flags);
+		btrfsic_submit_bio(bio);
 	}
 	return errors < i ? 0 : -1;
 }
@@ -3533,12 +3548,11 @@ static int write_dev_supers(struct btrfs_device *device,
  * Wait for write completion of superblocks done by write_dev_supers,
  * @max_mirrors same for write and wait phases.
  *
- * Return number of errors when buffer head is not found or not marked up to
+ * Return number of errors when page is not found or not marked up to
  * date.
  */
 static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
 {
-	struct buffer_head *bh;
 	int i;
 	int errors = 0;
 	bool primary_failed = false;
@@ -3548,32 +3562,33 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
 		max_mirrors = BTRFS_SUPER_MIRROR_MAX;
 
 	for (i = 0; i < max_mirrors; i++) {
+		struct page *page;
+
 		bytenr = btrfs_sb_offset(i);
 		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
 		    device->commit_total_bytes)
 			break;
 
-		bh = __find_get_block(device->bdev,
-				      bytenr / BTRFS_BDEV_BLOCKSIZE,
-				      BTRFS_SUPER_INFO_SIZE);
-		if (!bh) {
+		page = find_get_page(device->bdev->bd_inode->i_mapping,
+				     bytenr >> PAGE_SHIFT);
+		if (!page) {
 			errors++;
 			if (i == 0)
 				primary_failed = true;
 			continue;
 		}
-		wait_on_buffer(bh);
-		if (!buffer_uptodate(bh)) {
+		wait_on_page_locked(page);
+		if (PageError(page)) {
 			errors++;
 			if (i == 0)
 				primary_failed = true;
 		}
 
 		/* drop our reference */
-		brelse(bh);
+		put_page(page);
 
 		/* drop the reference from the writing run */
-		brelse(bh);
+		put_page(page);
 	}
 
 	/* log error, force error return */
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/5] btrfs: remove btrfsic_submit_bh()
  2020-01-17 12:51 [PATCH 0/5] btrfs: remove buffer heads form superblock handling Johannes Thumshirn
  2020-01-17 12:51 ` [PATCH 1/5] btrfs: remove buffer heads from super block reading Johannes Thumshirn
  2020-01-17 12:51 ` [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout Johannes Thumshirn
@ 2020-01-17 12:51 ` Johannes Thumshirn
  2020-01-17 15:05   ` Nikolay Borisov
  2020-01-17 12:51 ` [PATCH 4/5] btrfs: remove buffer_heads from btrfsic_process_written_block() Johannes Thumshirn
  2020-01-17 12:51 ` [PATCH 5/5] btrfs: remove buffer_heads form superblock mirror integrity checking Johannes Thumshirn
  4 siblings, 1 reply; 16+ messages in thread
From: Johannes Thumshirn @ 2020-01-17 12:51 UTC (permalink / raw)
  To: David Sterba; +Cc: Nikolay Borisov, linux-btrfs, Johannes Thumshirn

Now that the last use of btrfsic_submit_bh() is gone, remove the function
as well.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/check-integrity.c | 57 --------------------------------------
 fs/btrfs/check-integrity.h |  2 --
 2 files changed, 59 deletions(-)

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index a0ce69f2d27c..e7507985435e 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2730,63 +2730,6 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev)
 						  &btrfsic_dev_state_hashtable);
 }
 
-int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh)
-{
-	struct btrfsic_dev_state *dev_state;
-
-	if (!btrfsic_is_initialized)
-		return submit_bh(op, op_flags, bh);
-
-	mutex_lock(&btrfsic_mutex);
-	/* since btrfsic_submit_bh() might also be called before
-	 * btrfsic_mount(), this might return NULL */
-	dev_state = btrfsic_dev_state_lookup(bh->b_bdev->bd_dev);
-
-	/* Only called to write the superblock (incl. FLUSH/FUA) */
-	if (NULL != dev_state &&
-	    (op == REQ_OP_WRITE) && bh->b_size > 0) {
-		u64 dev_bytenr;
-
-		dev_bytenr = BTRFS_BDEV_BLOCKSIZE * bh->b_blocknr;
-		if (dev_state->state->print_mask &
-		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
-			pr_info("submit_bh(op=0x%x,0x%x, blocknr=%llu (bytenr %llu), size=%zu, data=%p, bdev=%p)\n",
-			       op, op_flags, (unsigned long long)bh->b_blocknr,
-			       dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
-		btrfsic_process_written_block(dev_state, dev_bytenr,
-					      &bh->b_data, 1, NULL,
-					      NULL, bh, op_flags);
-	} else if (NULL != dev_state && (op_flags & REQ_PREFLUSH)) {
-		if (dev_state->state->print_mask &
-		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
-			pr_info("submit_bh(op=0x%x,0x%x FLUSH, bdev=%p)\n",
-			       op, op_flags, bh->b_bdev);
-		if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
-			if ((dev_state->state->print_mask &
-			     (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
-			      BTRFSIC_PRINT_MASK_VERBOSE)))
-				pr_info("btrfsic_submit_bh(%s) with FLUSH but dummy block already in use (ignored)!\n",
-				       dev_state->name);
-		} else {
-			struct btrfsic_block *const block =
-				&dev_state->dummy_block_for_bio_bh_flush;
-
-			block->is_iodone = 0;
-			block->never_written = 0;
-			block->iodone_w_error = 0;
-			block->flush_gen = dev_state->last_flush_gen + 1;
-			block->submit_bio_bh_rw = op_flags;
-			block->orig_bio_bh_private = bh->b_private;
-			block->orig_bio_bh_end_io.bh = bh->b_end_io;
-			block->next_in_same_bio = NULL;
-			bh->b_private = block;
-			bh->b_end_io = btrfsic_bh_end_io;
-		}
-	}
-	mutex_unlock(&btrfsic_mutex);
-	return submit_bh(op, op_flags, bh);
-}
-
 static void __btrfsic_submit_bio(struct bio *bio)
 {
 	struct btrfsic_dev_state *dev_state;
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h
index 9bf4359cc44c..bcc730a06cb5 100644
--- a/fs/btrfs/check-integrity.h
+++ b/fs/btrfs/check-integrity.h
@@ -7,11 +7,9 @@
 #define BTRFS_CHECK_INTEGRITY_H
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
-int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh);
 void btrfsic_submit_bio(struct bio *bio);
 int btrfsic_submit_bio_wait(struct bio *bio);
 #else
-#define btrfsic_submit_bh submit_bh
 #define btrfsic_submit_bio submit_bio
 #define btrfsic_submit_bio_wait submit_bio_wait
 #endif
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/5] btrfs: remove buffer_heads from btrfsic_process_written_block()
  2020-01-17 12:51 [PATCH 0/5] btrfs: remove buffer heads form superblock handling Johannes Thumshirn
                   ` (2 preceding siblings ...)
  2020-01-17 12:51 ` [PATCH 3/5] btrfs: remove btrfsic_submit_bh() Johannes Thumshirn
@ 2020-01-17 12:51 ` Johannes Thumshirn
  2020-01-17 12:51 ` [PATCH 5/5] btrfs: remove buffer_heads form superblock mirror integrity checking Johannes Thumshirn
  4 siblings, 0 replies; 16+ messages in thread
From: Johannes Thumshirn @ 2020-01-17 12:51 UTC (permalink / raw)
  To: David Sterba; +Cc: Nikolay Borisov, linux-btrfs, Johannes Thumshirn

Now that the last caller of btrfsic_process_written_block() with
buffer_heads is gone, remove the buffer_head processing path from it as
well.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/check-integrity.c | 103 +++++++++----------------------------
 1 file changed, 25 insertions(+), 78 deletions(-)

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index e7507985435e..4f6db2fe482a 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -152,11 +152,8 @@ struct btrfsic_block {
 	struct list_head ref_to_list;	/* list */
 	struct list_head ref_from_list;	/* list */
 	struct btrfsic_block *next_in_same_bio;
-	void *orig_bio_bh_private;
-	union {
-		bio_end_io_t *bio;
-		bh_end_io_t *bh;
-	} orig_bio_bh_end_io;
+	void *orig_bio_private;
+	bio_end_io_t *orig_bio_end_io;
 	int submit_bio_bh_rw;
 	u64 flush_gen; /* only valid if !never_written */
 };
@@ -325,14 +322,12 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
 					  u64 dev_bytenr, char **mapped_datav,
 					  unsigned int num_pages,
 					  struct bio *bio, int *bio_is_patched,
-					  struct buffer_head *bh,
 					  int submit_bio_bh_rw);
 static int btrfsic_process_written_superblock(
 		struct btrfsic_state *state,
 		struct btrfsic_block *const block,
 		struct btrfs_super_block *const super_hdr);
 static void btrfsic_bio_end_io(struct bio *bp);
-static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
 					      const struct btrfsic_block *block,
 					      int recursion_level);
@@ -399,8 +394,8 @@ static void btrfsic_block_init(struct btrfsic_block *b)
 	b->never_written = 0;
 	b->mirror_num = 0;
 	b->next_in_same_bio = NULL;
-	b->orig_bio_bh_private = NULL;
-	b->orig_bio_bh_end_io.bio = NULL;
+	b->orig_bio_private = NULL;
+	b->orig_bio_end_io = NULL;
 	INIT_LIST_HEAD(&b->collision_resolving_node);
 	INIT_LIST_HEAD(&b->all_blocks_node);
 	INIT_LIST_HEAD(&b->ref_to_list);
@@ -1743,7 +1738,6 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
 					  u64 dev_bytenr, char **mapped_datav,
 					  unsigned int num_pages,
 					  struct bio *bio, int *bio_is_patched,
-					  struct buffer_head *bh,
 					  int submit_bio_bh_rw)
 {
 	int is_metadata;
@@ -1902,9 +1896,9 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
 				block->is_iodone = 0;
 				BUG_ON(NULL == bio_is_patched);
 				if (!*bio_is_patched) {
-					block->orig_bio_bh_private =
+					block->orig_bio_private =
 					    bio->bi_private;
-					block->orig_bio_bh_end_io.bio =
+					block->orig_bio_end_io =
 					    bio->bi_end_io;
 					block->next_in_same_bio = NULL;
 					bio->bi_private = block;
@@ -1916,25 +1910,17 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
 					    bio->bi_private;
 
 					BUG_ON(NULL == chained_block);
-					block->orig_bio_bh_private =
-					    chained_block->orig_bio_bh_private;
-					block->orig_bio_bh_end_io.bio =
-					    chained_block->orig_bio_bh_end_io.
-					    bio;
+					block->orig_bio_private =
+					    chained_block->orig_bio_private;
+					block->orig_bio_end_io =
+					    chained_block->orig_bio_end_io;
 					block->next_in_same_bio = chained_block;
 					bio->bi_private = block;
 				}
-			} else if (NULL != bh) {
-				block->is_iodone = 0;
-				block->orig_bio_bh_private = bh->b_private;
-				block->orig_bio_bh_end_io.bh = bh->b_end_io;
-				block->next_in_same_bio = NULL;
-				bh->b_private = block;
-				bh->b_end_io = btrfsic_bh_end_io;
 			} else {
 				block->is_iodone = 1;
-				block->orig_bio_bh_private = NULL;
-				block->orig_bio_bh_end_io.bio = NULL;
+				block->orig_bio_private = NULL;
+				block->orig_bio_end_io = NULL;
 				block->next_in_same_bio = NULL;
 			}
 		}
@@ -2042,8 +2028,8 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
 			block->is_iodone = 0;
 			BUG_ON(NULL == bio_is_patched);
 			if (!*bio_is_patched) {
-				block->orig_bio_bh_private = bio->bi_private;
-				block->orig_bio_bh_end_io.bio = bio->bi_end_io;
+				block->orig_bio_private = bio->bi_private;
+				block->orig_bio_end_io = bio->bi_end_io;
 				block->next_in_same_bio = NULL;
 				bio->bi_private = block;
 				bio->bi_end_io = btrfsic_bio_end_io;
@@ -2054,24 +2040,17 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
 				    bio->bi_private;
 
 				BUG_ON(NULL == chained_block);
-				block->orig_bio_bh_private =
-				    chained_block->orig_bio_bh_private;
-				block->orig_bio_bh_end_io.bio =
-				    chained_block->orig_bio_bh_end_io.bio;
+				block->orig_bio_private =
+				    chained_block->orig_bio_private;
+				block->orig_bio_end_io =
+				    chained_block->orig_bio_end_io;
 				block->next_in_same_bio = chained_block;
 				bio->bi_private = block;
 			}
-		} else if (NULL != bh) {
-			block->is_iodone = 0;
-			block->orig_bio_bh_private = bh->b_private;
-			block->orig_bio_bh_end_io.bh = bh->b_end_io;
-			block->next_in_same_bio = NULL;
-			bh->b_private = block;
-			bh->b_end_io = btrfsic_bh_end_io;
 		} else {
 			block->is_iodone = 1;
-			block->orig_bio_bh_private = NULL;
-			block->orig_bio_bh_end_io.bio = NULL;
+			block->orig_bio_private = NULL;
+			block->orig_bio_end_io = NULL;
 			block->next_in_same_bio = NULL;
 		}
 		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
@@ -2112,8 +2091,8 @@ static void btrfsic_bio_end_io(struct bio *bp)
 		iodone_w_error = 1;
 
 	BUG_ON(NULL == block);
-	bp->bi_private = block->orig_bio_bh_private;
-	bp->bi_end_io = block->orig_bio_bh_end_io.bio;
+	bp->bi_private = block->orig_bio_private;
+	bp->bi_end_io = block->orig_bio_end_io;
 
 	do {
 		struct btrfsic_block *next_block;
@@ -2146,38 +2125,6 @@ static void btrfsic_bio_end_io(struct bio *bp)
 	bp->bi_end_io(bp);
 }
 
-static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
-{
-	struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
-	int iodone_w_error = !uptodate;
-	struct btrfsic_dev_state *dev_state;
-
-	BUG_ON(NULL == block);
-	dev_state = block->dev_state;
-	if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
-		pr_info("bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
-		       iodone_w_error,
-		       btrfsic_get_block_type(dev_state->state, block),
-		       block->logical_bytenr, block->dev_state->name,
-		       block->dev_bytenr, block->mirror_num);
-
-	block->iodone_w_error = iodone_w_error;
-	if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
-		dev_state->last_flush_gen++;
-		if ((dev_state->state->print_mask &
-		     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
-			pr_info("bh_end_io() new %s flush_gen=%llu\n",
-			       dev_state->name, dev_state->last_flush_gen);
-	}
-	if (block->submit_bio_bh_rw & REQ_FUA)
-		block->flush_gen = 0; /* FUA completed means block is on disk */
-
-	bh->b_private = block->orig_bio_bh_private;
-	bh->b_end_io = block->orig_bio_bh_end_io.bh;
-	block->is_iodone = 1; /* for FLUSH, this releases the block */
-	bh->b_end_io(bh, uptodate);
-}
-
 static int btrfsic_process_written_superblock(
 		struct btrfsic_state *state,
 		struct btrfsic_block *const superblock,
@@ -2781,7 +2728,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
 		btrfsic_process_written_block(dev_state, dev_bytenr,
 					      mapped_datav, segs,
 					      bio, &bio_is_patched,
-					      NULL, bio->bi_opf);
+					      bio->bi_opf);
 		bio_for_each_segment(bvec, bio, iter)
 			kunmap(bvec.bv_page);
 		kfree(mapped_datav);
@@ -2805,8 +2752,8 @@ static void __btrfsic_submit_bio(struct bio *bio)
 			block->iodone_w_error = 0;
 			block->flush_gen = dev_state->last_flush_gen + 1;
 			block->submit_bio_bh_rw = bio->bi_opf;
-			block->orig_bio_bh_private = bio->bi_private;
-			block->orig_bio_bh_end_io.bio = bio->bi_end_io;
+			block->orig_bio_private = bio->bi_private;
+			block->orig_bio_end_io = bio->bi_end_io;
 			block->next_in_same_bio = NULL;
 			bio->bi_private = block;
 			bio->bi_end_io = btrfsic_bio_end_io;
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 5/5] btrfs: remove buffer_heads form superblock mirror integrity checking
  2020-01-17 12:51 [PATCH 0/5] btrfs: remove buffer heads form superblock handling Johannes Thumshirn
                   ` (3 preceding siblings ...)
  2020-01-17 12:51 ` [PATCH 4/5] btrfs: remove buffer_heads from btrfsic_process_written_block() Johannes Thumshirn
@ 2020-01-17 12:51 ` Johannes Thumshirn
  2020-01-17 15:10   ` Nikolay Borisov
  4 siblings, 1 reply; 16+ messages in thread
From: Johannes Thumshirn @ 2020-01-17 12:51 UTC (permalink / raw)
  To: David Sterba; +Cc: Nikolay Borisov, linux-btrfs, Johannes Thumshirn

The integrity checking code for the superblock mirrors is the last remaining
user of buffer_heads in BTRFS, change it to using plain BIOs as well.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/check-integrity.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 4f6db2fe482a..9a614b19b6b3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -77,7 +77,6 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/buffer_head.h>
 #include <linux/mutex.h>
 #include <linux/genhd.h>
 #include <linux/blkdev.h>
@@ -762,28 +761,41 @@ static int btrfsic_process_superblock_dev_mirror(
 	struct btrfs_fs_info *fs_info = state->fs_info;
 	struct btrfs_super_block *super_tmp;
 	u64 dev_bytenr;
-	struct buffer_head *bh;
 	struct btrfsic_block *superblock_tmp;
 	int pass;
 	struct block_device *const superblock_bdev = device->bdev;
+	struct page *page;
+	struct bio bio;
+	struct bio_vec bio_vec;
+	int ret;
 
 	/* super block bytenr is always the unmapped device bytenr */
 	dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
 	if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes)
 		return -1;
-	bh = __bread(superblock_bdev, dev_bytenr / BTRFS_BDEV_BLOCKSIZE,
-		     BTRFS_SUPER_INFO_SIZE);
-	if (NULL == bh)
+
+	page = alloc_page(GFP_NOIO);
+	if (!page)
+		return -1;
+
+	bio_init(&bio, &bio_vec, 1);
+	bio.bi_iter.bi_sector = dev_bytenr >> SECTOR_SHIFT;
+	bio_set_dev(&bio, superblock_bdev);
+	bio_set_op_attrs(&bio, REQ_OP_READ, 0);
+	bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE, 0);
+
+	ret = submit_bio_wait(&bio);
+	if (ret)
 		return -1;
-	super_tmp = (struct btrfs_super_block *)
-	    (bh->b_data + (dev_bytenr & (BTRFS_BDEV_BLOCKSIZE - 1)));
+
+	super_tmp = page_address(page);
 
 	if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
 	    btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
 	    memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
 	    btrfs_super_nodesize(super_tmp) != state->metablock_size ||
 	    btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
-		brelse(bh);
+		__free_page(page);
 		return 0;
 	}
 
@@ -795,7 +807,7 @@ static int btrfsic_process_superblock_dev_mirror(
 		superblock_tmp = btrfsic_block_alloc();
 		if (NULL == superblock_tmp) {
 			pr_info("btrfsic: error, kmalloc failed!\n");
-			brelse(bh);
+			__free_page(page);
 			return -1;
 		}
 		/* for superblock, only the dev_bytenr makes sense */
@@ -880,7 +892,7 @@ static int btrfsic_process_superblock_dev_mirror(
 					      mirror_num)) {
 				pr_info("btrfsic: btrfsic_map_block(bytenr @%llu, mirror %d) failed!\n",
 				       next_bytenr, mirror_num);
-				brelse(bh);
+				__free_page(page);
 				return -1;
 			}
 
@@ -890,7 +902,7 @@ static int btrfsic_process_superblock_dev_mirror(
 					mirror_num, NULL);
 			if (NULL == next_block) {
 				btrfsic_release_block_ctx(&tmp_next_block_ctx);
-				brelse(bh);
+				__free_page(page);
 				return -1;
 			}
 
@@ -902,7 +914,7 @@ static int btrfsic_process_superblock_dev_mirror(
 					BTRFSIC_GENERATION_UNKNOWN);
 			btrfsic_release_block_ctx(&tmp_next_block_ctx);
 			if (NULL == l) {
-				brelse(bh);
+				__free_page(page);
 				return -1;
 			}
 		}
@@ -910,7 +922,7 @@ static int btrfsic_process_superblock_dev_mirror(
 	if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
 		btrfsic_dump_tree_sub(state, superblock_tmp, 0);
 
-	brelse(bh);
+	__free_page(page);
 	return 0;
 }
 
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/5] btrfs: remove buffer heads from super block reading
  2020-01-17 12:51 ` [PATCH 1/5] btrfs: remove buffer heads from super block reading Johannes Thumshirn
@ 2020-01-17 13:31   ` David Sterba
  2020-01-17 14:30   ` Nikolay Borisov
  1 sibling, 0 replies; 16+ messages in thread
From: David Sterba @ 2020-01-17 13:31 UTC (permalink / raw)
  To: Johannes Thumshirn; +Cc: David Sterba, Nikolay Borisov, linux-btrfs

On Fri, Jan 17, 2020 at 09:51:01PM +0900, Johannes Thumshirn wrote:
> Super-block reading in BTRFS is done using buffer_heads. Buffer_heads have
> some drawbacks, like not being able to propagate errors from the lower
> layers.
> 
> Change the buffer_heads to BIOs.

Please enhance that, eg. a brief overview. It's changing from BH to
pages, but thre's no mapping, inodes and such so something that gives me
an idea before I star reading the code.

> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/disk-io.c | 80 +++++++++++++++++++++++++++-------------------
>  fs/btrfs/disk-io.h |  4 +--
>  fs/btrfs/volumes.c | 64 ++++++++++++++++++++++---------------
>  fs/btrfs/volumes.h |  2 --
>  4 files changed, 88 insertions(+), 62 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 5ce2801f8388..50c93ffe8d03 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2635,11 +2635,11 @@ int __cold open_ctree(struct super_block *sb,
>  	u64 features;
>  	u16 csum_type;
>  	struct btrfs_key location;
> -	struct buffer_head *bh;
>  	struct btrfs_super_block *disk_super;
>  	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
>  	struct btrfs_root *tree_root;
>  	struct btrfs_root *chunk_root;
> +	struct page *super_page;
>  	int ret;
>  	int err = -EINVAL;
>  	int clear_free_space_tree = 0;
> @@ -2832,39 +2832,37 @@ int __cold open_ctree(struct super_block *sb,
>  	/*
>  	 * Read super block and check the signature bytes only
>  	 */
> -	bh = btrfs_read_dev_super(fs_devices->latest_bdev);
> -	if (IS_ERR(bh)) {
> -		err = PTR_ERR(bh);
> -		goto fail_alloc;
> +	ret = btrfs_read_dev_super(fs_devices->latest_bdev, &super_page);
> +	if (ret) {
> +		err = ret;
> +		goto fail_page_alloc;
>  	}
>  
>  	/*
>  	 * Verify the type first, if that or the the checksum value are
>  	 * corrupted, we'll find out
>  	 */
> -	csum_type = btrfs_super_csum_type((struct btrfs_super_block *)bh->b_data);
> +	csum_type = btrfs_super_csum_type(page_address(super_page));
>  	if (!btrfs_supported_super_csum(csum_type)) {
>  		btrfs_err(fs_info, "unsupported checksum algorithm: %u",
>  			  csum_type);
>  		err = -EINVAL;
> -		brelse(bh);
> -		goto fail_alloc;
> +		goto fail_page_alloc;
>  	}
>  
>  	ret = btrfs_init_csum_hash(fs_info, csum_type);
>  	if (ret) {
>  		err = ret;
> -		goto fail_alloc;
> +		goto fail_page_alloc;
>  	}
>  
>  	/*
>  	 * We want to check superblock checksum, the type is stored inside.
>  	 * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
>  	 */
> -	if (btrfs_check_super_csum(fs_info, bh->b_data)) {
> +	if (btrfs_check_super_csum(fs_info, page_address(super_page))) {
>  		btrfs_err(fs_info, "superblock checksum mismatch");
>  		err = -EINVAL;
> -		brelse(bh);
>  		goto fail_csum;
>  	}
>  
> @@ -2873,8 +2871,9 @@ int __cold open_ctree(struct super_block *sb,
>  	 * following bytes up to INFO_SIZE, the checksum is calculated from
>  	 * the whole block of INFO_SIZE
>  	 */
> -	memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
> -	brelse(bh);
> +	memcpy(fs_info->super_copy, page_address(super_page),

page_address(super_page) is used 3 times, please add a variable for that

> +	       sizeof(*fs_info->super_copy));
> +	__free_page(super_page);
>  
>  	disk_super = fs_info->super_copy;
>  
> @@ -3330,6 +3329,8 @@ int __cold open_ctree(struct super_block *sb,
>  	btrfs_free_block_groups(fs_info);
>  fail_csum:
>  	btrfs_free_csum_hash(fs_info);
> +fail_page_alloc:
> +	__free_page(super_page);
>  fail_alloc:
>  fail_iput:
>  	btrfs_mapping_tree_free(&fs_info->mapping_tree);
> @@ -3374,40 +3375,52 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
>  }
>  
>  int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> -			struct buffer_head **bh_ret)
> +			struct page **super_page)
>  {
> -	struct buffer_head *bh;
>  	struct btrfs_super_block *super;
> +	struct bio_vec bio_vec;
> +	struct bio bio;
> +	struct page *page;
>  	u64 bytenr;
> +	gfp_t gfp_mask = mapping_gfp_constraint(bdev->bd_inode->i_mapping,
> +						~__GFP_FS) | __GFP_NOFAIL;
> +	int ret;
>  
>  	bytenr = btrfs_sb_offset(copy_num);
>  	if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
>  		return -EINVAL;
>  
> -	bh = __bread(bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, BTRFS_SUPER_INFO_SIZE);
> +	page = alloc_page(gfp_mask);

With __GFP_NOFAIL the error does not need to be handled

> +	if (!page)
> +		return -ENOMEM;
> +
> +	bio_init(&bio, &bio_vec, 1);

bio_bvec is used only for initialization, is it needed at all?

> +	bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> +	bio_set_dev(&bio, bdev);
> +	bio_set_op_attrs(&bio, REQ_OP_READ, 0);
> +	bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE, 0);
> +
> +	ret = submit_bio_wait(&bio);
>  	/*
>  	 * If we fail to read from the underlying devices, as of now
>  	 * the best option we have is to mark it EIO.
>  	 */
> -	if (!bh)
> +	if (ret)
>  		return -EIO;
>  
> -	super = (struct btrfs_super_block *)bh->b_data;
> +	super = page_address(page);
>  	if (btrfs_super_bytenr(super) != bytenr ||
> -		    btrfs_super_magic(super) != BTRFS_MAGIC) {
> -		brelse(bh);
> +		    btrfs_super_magic(super) != BTRFS_MAGIC)
>  		return -EINVAL;
> -	}
>  
> -	*bh_ret = bh;
> +	*super_page = page;
>  	return 0;
>  }
>  
>  
> -struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
> +int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
>  {
> -	struct buffer_head *bh;
> -	struct buffer_head *latest = NULL;
> +	struct page *latest = NULL;
>  	struct btrfs_super_block *super;
>  	int i;
>  	u64 transid = 0;
> @@ -3419,25 +3432,26 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
>  	 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
>  	 */
>  	for (i = 0; i < 1; i++) {
> -		ret = btrfs_read_dev_one_super(bdev, i, &bh);
> +		ret = btrfs_read_dev_one_super(bdev, i, page);
>  		if (ret)
>  			continue;
>  
> -		super = (struct btrfs_super_block *)bh->b_data;
> +		super = page_address(*page);
>  
>  		if (!latest || btrfs_super_generation(super) > transid) {
> -			brelse(latest);
> -			latest = bh;
> +			if (latest)
> +				__free_page(latest);
> +			latest = *page;
>  			transid = btrfs_super_generation(super);
>  		} else {
> -			brelse(bh);
> +			__free_page(*page);
>  		}
>  	}
>  
> -	if (!latest)
> -		return ERR_PTR(ret);
> +	if (ret)
> +		return ret;
>  
> -	return latest;
> +	return 0;
>  }
>  
>  /*
> diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
> index 8c2d6cf1ce59..e04b233c436a 100644
> --- a/fs/btrfs/disk-io.h
> +++ b/fs/btrfs/disk-io.h
> @@ -54,9 +54,9 @@ int __cold open_ctree(struct super_block *sb,
>  	       char *options);
>  void __cold close_ctree(struct btrfs_fs_info *fs_info);
>  int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
> -struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
> +int btrfs_read_dev_super(struct block_device *bdev, struct page **super_page);
>  int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> -			struct buffer_head **bh_ret);
> +			     struct page **super_page);
>  int btrfs_commit_super(struct btrfs_fs_info *fs_info);
>  struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
>  				      struct btrfs_key *location);
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 3d5714bf2e32..4aded417a7d1 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6,7 +6,6 @@
>  #include <linux/sched.h>
>  #include <linux/bio.h>
>  #include <linux/slab.h>
> -#include <linux/buffer_head.h>
>  #include <linux/blkdev.h>
>  #include <linux/ratelimit.h>
>  #include <linux/kthread.h>
> @@ -492,7 +491,7 @@ static noinline struct btrfs_fs_devices *find_fsid(
>  static int
>  btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
>  		      int flush, struct block_device **bdev,
> -		      struct buffer_head **bh)
> +		      struct page **super_page)
>  {
>  	int ret;
>  
> @@ -511,9 +510,8 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
>  		goto error;
>  	}
>  	invalidate_bdev(*bdev);
> -	*bh = btrfs_read_dev_super(*bdev);
> -	if (IS_ERR(*bh)) {
> -		ret = PTR_ERR(*bh);
> +	ret = btrfs_read_dev_super(*bdev, super_page);
> +	if (ret) {
>  		blkdev_put(*bdev, flags);
>  		goto error;
>  	}
> @@ -522,7 +520,6 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
>  
>  error:
>  	*bdev = NULL;
> -	*bh = NULL;
>  	return ret;
>  }
>  
> @@ -603,7 +600,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
>  {
>  	struct request_queue *q;
>  	struct block_device *bdev;
> -	struct buffer_head *bh;
> +	struct page *super_page;
>  	struct btrfs_super_block *disk_super;
>  	u64 devid;
>  	int ret;
> @@ -614,17 +611,17 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
>  		return -EINVAL;
>  
>  	ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
> -				    &bdev, &bh);
> +				    &bdev, &super_page);
>  	if (ret)
>  		return ret;
>  
> -	disk_super = (struct btrfs_super_block *)bh->b_data;
> +	disk_super = page_address(super_page);
>  	devid = btrfs_stack_device_id(&disk_super->dev_item);
>  	if (devid != device->devid)
> -		goto error_brelse;
> +		goto error_free_page;
>  
>  	if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
> -		goto error_brelse;
> +		goto error_free_page;
>  
>  	device->generation = btrfs_super_generation(disk_super);
>  
> @@ -633,7 +630,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
>  		    BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
>  			pr_err(
>  		"BTRFS: Invalid seeding and uuid-changed device detected\n");
> -			goto error_brelse;
> +			goto error_free_page;
>  		}
>  
>  		clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
> @@ -659,12 +656,12 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
>  		fs_devices->rw_devices++;
>  		list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
>  	}
> -	brelse(bh);
> +	__free_page(super_page);
>  
>  	return 0;
>  
> -error_brelse:
> -	brelse(bh);
> +error_free_page:
> +	__free_page(super_page);
>  	blkdev_put(bdev, flags);
>  
>  	return -EINVAL;
> @@ -2164,14 +2161,15 @@ static struct btrfs_device *btrfs_find_device_by_path(
>  	u64 devid;
>  	u8 *dev_uuid;
>  	struct block_device *bdev;
> -	struct buffer_head *bh;
> +	struct page *super_page;
>  	struct btrfs_device *device;
>  
>  	ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
> -				    fs_info->bdev_holder, 0, &bdev, &bh);
> +				    fs_info->bdev_holder, 0, &bdev,
> +				    &super_page);
>  	if (ret)
>  		return ERR_PTR(ret);
> -	disk_super = (struct btrfs_super_block *)bh->b_data;
> +	disk_super = page_address(super_page);
>  	devid = btrfs_stack_device_id(&disk_super->dev_item);
>  	dev_uuid = disk_super->dev_item.uuid;
>  	if (btrfs_fs_incompat(fs_info, METADATA_UUID))
> @@ -2181,7 +2179,7 @@ static struct btrfs_device *btrfs_find_device_by_path(
>  		device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
>  					   disk_super->fsid, true);
>  
> -	brelse(bh);
> +	__free_page(super_page);
>  	if (!device)
>  		device = ERR_PTR(-ENOENT);
>  	blkdev_put(bdev, FMODE_READ);
> @@ -7270,25 +7268,41 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
>  
>  void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
>  {
> -	struct buffer_head *bh;
> +	struct page *super_page;
> +	struct bio_vec bio_vec;
> +	struct bio bio;
>  	struct btrfs_super_block *disk_super;
>  	int copy_num;
> +	int ret;
>  
>  	if (!bdev)
>  		return;
>  
> +	bio_init(&bio, &bio_vec, 1);
>  	for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
>  		copy_num++) {
> +		u64 bytenr;
>  
> -		if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
> +		if (btrfs_read_dev_one_super(bdev, copy_num, &super_page))
>  			continue;
>  
> -		disk_super = (struct btrfs_super_block *)bh->b_data;
> +		disk_super = page_address(super_page);
>  
>  		memset(&disk_super->magic, 0, sizeof(disk_super->magic));
> -		set_buffer_dirty(bh);
> -		sync_dirty_buffer(bh);
> -		brelse(bh);
> +
> +		bytenr = btrfs_sb_offset(copy_num);
> +
> +		bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> +		bio_set_dev(&bio, bdev);
> +		bio_set_op_attrs(&bio, REQ_OP_WRITE, 0);
> +		bio_add_page(&bio, super_page, PAGE_SIZE, 0);

The read side uses BTRFS_SUPER_INFO_SIZE, I'd assume the write side
needs that too.

> +
> +		ret = submit_bio_wait(&bio);
> +		WARN_ON(ret);
> +
> +		__free_page(super_page);
> +		bio_reset(&bio);
> +
>  	}
>  
>  	/* Notify udev that device has changed */
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 9c7d4fe5c39a..46a65b15eb93 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -17,8 +17,6 @@ extern struct mutex uuid_mutex;
>  
>  #define BTRFS_STRIPE_LEN	SZ_64K
>  
> -struct buffer_head;
> -
>  struct btrfs_io_geometry {
>  	/* remaining bytes before crossing a stripe */
>  	u64 len;
> -- 
> 2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout
  2020-01-17 12:51 ` [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout Johannes Thumshirn
@ 2020-01-17 13:38   ` David Sterba
  2020-01-17 14:51   ` David Sterba
  2020-01-17 15:01   ` Nikolay Borisov
  2 siblings, 0 replies; 16+ messages in thread
From: David Sterba @ 2020-01-17 13:38 UTC (permalink / raw)
  To: Johannes Thumshirn; +Cc: David Sterba, Nikolay Borisov, linux-btrfs

On Fri, Jan 17, 2020 at 09:51:02PM +0900, Johannes Thumshirn wrote:
> Similar to the superblock read path, change the write path to using BIOs
> and pages instead of buffer_heads.
> 
> This is based on a patch originally authored by Nikolay Borisov.
> 
> Co-developed-by: Nikolay Borisov <nborisov@suse.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/disk-io.c | 107 ++++++++++++++++++++++++++-------------------
>  1 file changed, 61 insertions(+), 46 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 50c93ffe8d03..51e7b832c8fd 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3353,25 +3353,33 @@ int __cold open_ctree(struct super_block *sb,
>  }
>  ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
>  
> -static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
> +static void btrfs_end_super_write(struct bio *bio)
>  {
> -	if (uptodate) {
> -		set_buffer_uptodate(bh);
> -	} else {
> -		struct btrfs_device *device = (struct btrfs_device *)
> -			bh->b_private;
> -
> -		btrfs_warn_rl_in_rcu(device->fs_info,
> -				"lost page write due to IO error on %s",
> -					  rcu_str_deref(device->name));
> -		/* note, we don't set_buffer_write_io_error because we have
> -		 * our own ways of dealing with the IO errors
> -		 */
> -		clear_buffer_uptodate(bh);
> -		btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
> +	struct btrfs_device *device = bio->bi_private;
> +	struct bio_vec *bvec;
> +	struct bvec_iter_all iter_all;
> +	struct page *page;
> +
> +	bio_for_each_segment_all(bvec, bio, iter_all) {
> +		page = bvec->bv_page;
> +
> +		if (blk_status_to_errno(bio->bi_status)) {
> +			btrfs_warn_rl_in_rcu(device->fs_info,
> +					     "lost page write due to IO error on %s",
> +					     rcu_str_deref(device->name));
> +			ClearPageUptodate(page);
> +			SetPageError(page);
> +			btrfs_dev_stat_inc_and_print(device,
> +						     BTRFS_DEV_STAT_WRITE_ERRS);
> +		} else {
> +			SetPageUptodate(page);
> +		}
> +
> +		put_page(page);
> +		unlock_page(page);
>  	}
> -	unlock_buffer(bh);
> -	put_bh(bh);
> +
> +	bio_put(bio);
>  }
>  
>  int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> @@ -3462,16 +3470,15 @@ int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
>   * the expected device size at commit time. Note that max_mirrors must be
>   * same for write and wait phases.
>   *
> - * Return number of errors when buffer head is not found or submission fails.
> + * Return number of errors when page is not found or submission fails.
>   */
>  static int write_dev_supers(struct btrfs_device *device,
>  			    struct btrfs_super_block *sb, int max_mirrors)
>  {
>  	struct btrfs_fs_info *fs_info = device->fs_info;
>  	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
> -	struct buffer_head *bh;
> +	gfp_t gfp_mask;
>  	int i;
> -	int ret;
>  	int errors = 0;
>  	u64 bytenr;
>  	int op_flags;
> @@ -3481,7 +3488,13 @@ static int write_dev_supers(struct btrfs_device *device,
>  
>  	shash->tfm = fs_info->csum_shash;
>  
> +	gfp_mask = mapping_gfp_constraint(device->bdev->bd_inode->i_mapping,
> +					  ~__GFP_FS) | __GFP_NOFAIL;
> +
>  	for (i = 0; i < max_mirrors; i++) {
> +		struct page *page;
> +		struct bio *bio;
> +
>  		bytenr = btrfs_sb_offset(i);
>  		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
>  		    device->commit_total_bytes)
> @@ -3494,26 +3507,20 @@ static int write_dev_supers(struct btrfs_device *device,
>  				    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
>  		crypto_shash_final(shash, sb->csum);
>  
> -		/* One reference for us, and we leave it for the caller */
> -		bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
> -			      BTRFS_SUPER_INFO_SIZE);
> -		if (!bh) {
> +		page = find_or_create_page(device->bdev->bd_inode->i_mapping,
> +					   bytenr >> PAGE_SHIFT, gfp_mask);

This has NOFAIL again, but now we're in write_dev_supers, so this has
some implications regarding the potential unbounded waiting

> +		if (!page) {
>  			btrfs_err(device->fs_info,
> -			    "couldn't get super buffer head for bytenr %llu",
> +			    "couldn't get superblock page for bytenr %llu",
>  			    bytenr);
>  			errors++;
>  			continue;
>  		}
>  
> -		memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
> +		/* Bump the refcount for wait_dev_supers() */
> +		get_page(page);
>  
> -		/* one reference for submit_bh */
> -		get_bh(bh);
> -
> -		set_buffer_uptodate(bh);
> -		lock_buffer(bh);
> -		bh->b_end_io = btrfs_end_buffer_write_sync;
> -		bh->b_private = device;
> +		memcpy(page_address(page), sb, BTRFS_SUPER_INFO_SIZE);
>  
>  		/*
>  		 * we fua the first super.  The others we allow
> @@ -3522,9 +3529,17 @@ static int write_dev_supers(struct btrfs_device *device,
>  		op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
>  		if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
>  			op_flags |= REQ_FUA;
> -		ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
> -		if (ret)
> -			errors++;
> +
> +		bio = bio_alloc(gfp_mask, 1);

And allocating a new bio when we have to write the superblock is also
not very nice. This should do something like the device::flush_bio that
could be reused.

> +		bio_set_dev(bio, device->bdev);
> +		bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> +		bio->bi_private = device;
> +		bio->bi_end_io = btrfs_end_super_write;
> +		bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
> +			     offset_in_page(bytenr));
> +
> +		bio_set_op_attrs(bio, REQ_OP_WRITE, op_flags);
> +		btrfsic_submit_bio(bio);
>  	}
>  	return errors < i ? 0 : -1;
>  }
> @@ -3533,12 +3548,11 @@ static int write_dev_supers(struct btrfs_device *device,
>   * Wait for write completion of superblocks done by write_dev_supers,
>   * @max_mirrors same for write and wait phases.
>   *
> - * Return number of errors when buffer head is not found or not marked up to
> + * Return number of errors when page is not found or not marked up to
>   * date.
>   */
>  static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
>  {
> -	struct buffer_head *bh;
>  	int i;
>  	int errors = 0;
>  	bool primary_failed = false;
> @@ -3548,32 +3562,33 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
>  		max_mirrors = BTRFS_SUPER_MIRROR_MAX;
>  
>  	for (i = 0; i < max_mirrors; i++) {
> +		struct page *page;
> +
>  		bytenr = btrfs_sb_offset(i);
>  		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
>  		    device->commit_total_bytes)
>  			break;
>  
> -		bh = __find_get_block(device->bdev,
> -				      bytenr / BTRFS_BDEV_BLOCKSIZE,
> -				      BTRFS_SUPER_INFO_SIZE);
> -		if (!bh) {
> +		page = find_get_page(device->bdev->bd_inode->i_mapping,
> +				     bytenr >> PAGE_SHIFT);
> +		if (!page) {
>  			errors++;
>  			if (i == 0)
>  				primary_failed = true;
>  			continue;
>  		}
> -		wait_on_buffer(bh);
> -		if (!buffer_uptodate(bh)) {
> +		wait_on_page_locked(page);

What locks the page?

> +		if (PageError(page)) {
>  			errors++;
>  			if (i == 0)
>  				primary_failed = true;
>  		}
>  
>  		/* drop our reference */
> -		brelse(bh);
> +		put_page(page);
>  
>  		/* drop the reference from the writing run */
> -		brelse(bh);
> +		put_page(page);
>  	}
>  
>  	/* log error, force error return */
> -- 
> 2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/5] btrfs: remove buffer heads from super block reading
  2020-01-17 12:51 ` [PATCH 1/5] btrfs: remove buffer heads from super block reading Johannes Thumshirn
  2020-01-17 13:31   ` David Sterba
@ 2020-01-17 14:30   ` Nikolay Borisov
  1 sibling, 0 replies; 16+ messages in thread
From: Nikolay Borisov @ 2020-01-17 14:30 UTC (permalink / raw)
  To: Johannes Thumshirn, David Sterba; +Cc: linux-btrfs



On 17.01.20 г. 14:51 ч., Johannes Thumshirn wrote:
> Super-block reading in BTRFS is done using buffer_heads. Buffer_heads have
> some drawbacks, like not being able to propagate errors from the lower
> layers.
> 
> Change the buffer_heads to BIOs.
> 

Generally this looks good. I guess as a follow up I can send the rest of
my code which cleans up a bit parameters of the involved function and
let people decide if it makes the code more readable. One minor nit
below. In any case:

Reviewed-by: Nikolay Borisov <nborisov@suse.com>

<snip>


> @@ -7270,25 +7268,41 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
>  
>  void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
>  {
> -	struct buffer_head *bh;
> +	struct page *super_page;
> +	struct bio_vec bio_vec;
> +	struct bio bio;
>  	struct btrfs_super_block *disk_super;
>  	int copy_num;
> +	int ret;
>  
>  	if (!bdev)
>  		return;
>  
> +	bio_init(&bio, &bio_vec, 1);
>  	for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
>  		copy_num++) {
> +		u64 bytenr;

nit: super_page is only used in this loop so IMO it will be better if
it's declared inside of it.
>  
> -		if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
> +		if (btrfs_read_dev_one_super(bdev, copy_num, &super_page))
>  			continue;
>  
> -		disk_super = (struct btrfs_super_block *)bh->b_data;
> +		disk_super = page_address(super_page);
>  
>  		memset(&disk_super->magic, 0, sizeof(disk_super->magic));
> -		set_buffer_dirty(bh);
> -		sync_dirty_buffer(bh);
> -		brelse(bh);
> +
> +		bytenr = btrfs_sb_offset(copy_num);
> +
> +		bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> +		bio_set_dev(&bio, bdev);
> +		bio_set_op_attrs(&bio, REQ_OP_WRITE, 0);
> +		bio_add_page(&bio, super_page, PAGE_SIZE, 0);
> +
> +		ret = submit_bio_wait(&bio);
> +		WARN_ON(ret);
> +
> +		__free_page(super_page);
> +		bio_reset(&bio);
> +
>  	}
>  
>  	/* Notify udev that device has changed */
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 9c7d4fe5c39a..46a65b15eb93 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -17,8 +17,6 @@ extern struct mutex uuid_mutex;
>  
>  #define BTRFS_STRIPE_LEN	SZ_64K
>  
> -struct buffer_head;
> -
>  struct btrfs_io_geometry {
>  	/* remaining bytes before crossing a stripe */
>  	u64 len;
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout
  2020-01-17 12:51 ` [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout Johannes Thumshirn
  2020-01-17 13:38   ` David Sterba
@ 2020-01-17 14:51   ` David Sterba
  2020-01-17 15:01   ` Nikolay Borisov
  2 siblings, 0 replies; 16+ messages in thread
From: David Sterba @ 2020-01-17 14:51 UTC (permalink / raw)
  To: Johannes Thumshirn; +Cc: David Sterba, Nikolay Borisov, linux-btrfs

On Fri, Jan 17, 2020 at 09:51:02PM +0900, Johannes Thumshirn wrote:
> Similar to the superblock read path, change the write path to using BIOs
> and pages instead of buffer_heads.
> 
> This is based on a patch originally authored by Nikolay Borisov.
> 
> Co-developed-by: Nikolay Borisov <nborisov@suse.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/disk-io.c | 107 ++++++++++++++++++++++++++-------------------
>  1 file changed, 61 insertions(+), 46 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 50c93ffe8d03..51e7b832c8fd 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c

Grep pointed out that the #include <buffer_head> is still in disk-io.c,
(check-integrity.c got it removed).

> @@ -3353,25 +3353,33 @@ int __cold open_ctree(struct super_block *sb,
>  }
>  ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
>  
> -static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
> +static void btrfs_end_super_write(struct bio *bio)
>  {
> -	if (uptodate) {
> -		set_buffer_uptodate(bh);
> -	} else {
> -		struct btrfs_device *device = (struct btrfs_device *)
> -			bh->b_private;
> -
> -		btrfs_warn_rl_in_rcu(device->fs_info,
> -				"lost page write due to IO error on %s",
> -					  rcu_str_deref(device->name));
> -		/* note, we don't set_buffer_write_io_error because we have
> -		 * our own ways of dealing with the IO errors
> -		 */
> -		clear_buffer_uptodate(bh);
> -		btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
> +	struct btrfs_device *device = bio->bi_private;
> +	struct bio_vec *bvec;
> +	struct bvec_iter_all iter_all;
> +	struct page *page;
> +
> +	bio_for_each_segment_all(bvec, bio, iter_all) {
> +		page = bvec->bv_page;
> +
> +		if (blk_status_to_errno(bio->bi_status)) {
> +			btrfs_warn_rl_in_rcu(device->fs_info,
> +					     "lost page write due to IO error on %s",
> +					     rcu_str_deref(device->name));
> +			ClearPageUptodate(page);
> +			SetPageError(page);
> +			btrfs_dev_stat_inc_and_print(device,
> +						     BTRFS_DEV_STAT_WRITE_ERRS);
> +		} else {
> +			SetPageUptodate(page);
> +		}
> +
> +		put_page(page);
> +		unlock_page(page);
>  	}
> -	unlock_buffer(bh);
> -	put_bh(bh);
> +
> +	bio_put(bio);
>  }
>  
>  int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> @@ -3462,16 +3470,15 @@ int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
>   * the expected device size at commit time. Note that max_mirrors must be
>   * same for write and wait phases.
>   *
> - * Return number of errors when buffer head is not found or submission fails.
> + * Return number of errors when page is not found or submission fails.
>   */
>  static int write_dev_supers(struct btrfs_device *device,
>  			    struct btrfs_super_block *sb, int max_mirrors)
>  {
>  	struct btrfs_fs_info *fs_info = device->fs_info;
>  	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
> -	struct buffer_head *bh;
> +	gfp_t gfp_mask;
>  	int i;
> -	int ret;
>  	int errors = 0;
>  	u64 bytenr;
>  	int op_flags;
> @@ -3481,7 +3488,13 @@ static int write_dev_supers(struct btrfs_device *device,
>  
>  	shash->tfm = fs_info->csum_shash;
>  
> +	gfp_mask = mapping_gfp_constraint(device->bdev->bd_inode->i_mapping,
> +					  ~__GFP_FS) | __GFP_NOFAIL;
> +
>  	for (i = 0; i < max_mirrors; i++) {
> +		struct page *page;
> +		struct bio *bio;
> +
>  		bytenr = btrfs_sb_offset(i);
>  		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
>  		    device->commit_total_bytes)
> @@ -3494,26 +3507,20 @@ static int write_dev_supers(struct btrfs_device *device,
>  				    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
>  		crypto_shash_final(shash, sb->csum);
>  
> -		/* One reference for us, and we leave it for the caller */
> -		bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
> -			      BTRFS_SUPER_INFO_SIZE);
> -		if (!bh) {
> +		page = find_or_create_page(device->bdev->bd_inode->i_mapping,
> +					   bytenr >> PAGE_SHIFT, gfp_mask);
> +		if (!page) {
>  			btrfs_err(device->fs_info,
> -			    "couldn't get super buffer head for bytenr %llu",
> +			    "couldn't get superblock page for bytenr %llu",
>  			    bytenr);
>  			errors++;
>  			continue;
>  		}
>  
> -		memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
> +		/* Bump the refcount for wait_dev_supers() */
> +		get_page(page);
>  
> -		/* one reference for submit_bh */
> -		get_bh(bh);
> -
> -		set_buffer_uptodate(bh);
> -		lock_buffer(bh);
> -		bh->b_end_io = btrfs_end_buffer_write_sync;
> -		bh->b_private = device;
> +		memcpy(page_address(page), sb, BTRFS_SUPER_INFO_SIZE);
>  
>  		/*
>  		 * we fua the first super.  The others we allow
> @@ -3522,9 +3529,17 @@ static int write_dev_supers(struct btrfs_device *device,
>  		op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
>  		if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
>  			op_flags |= REQ_FUA;
> -		ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
> -		if (ret)
> -			errors++;
> +
> +		bio = bio_alloc(gfp_mask, 1);
> +		bio_set_dev(bio, device->bdev);
> +		bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> +		bio->bi_private = device;
> +		bio->bi_end_io = btrfs_end_super_write;
> +		bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
> +			     offset_in_page(bytenr));
> +
> +		bio_set_op_attrs(bio, REQ_OP_WRITE, op_flags);
> +		btrfsic_submit_bio(bio);
>  	}
>  	return errors < i ? 0 : -1;
>  }
> @@ -3533,12 +3548,11 @@ static int write_dev_supers(struct btrfs_device *device,
>   * Wait for write completion of superblocks done by write_dev_supers,
>   * @max_mirrors same for write and wait phases.
>   *
> - * Return number of errors when buffer head is not found or not marked up to
> + * Return number of errors when page is not found or not marked up to
>   * date.
>   */
>  static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
>  {
> -	struct buffer_head *bh;
>  	int i;
>  	int errors = 0;
>  	bool primary_failed = false;
> @@ -3548,32 +3562,33 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
>  		max_mirrors = BTRFS_SUPER_MIRROR_MAX;
>  
>  	for (i = 0; i < max_mirrors; i++) {
> +		struct page *page;
> +
>  		bytenr = btrfs_sb_offset(i);
>  		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
>  		    device->commit_total_bytes)
>  			break;
>  
> -		bh = __find_get_block(device->bdev,
> -				      bytenr / BTRFS_BDEV_BLOCKSIZE,
> -				      BTRFS_SUPER_INFO_SIZE);
> -		if (!bh) {
> +		page = find_get_page(device->bdev->bd_inode->i_mapping,
> +				     bytenr >> PAGE_SHIFT);
> +		if (!page) {
>  			errors++;
>  			if (i == 0)
>  				primary_failed = true;
>  			continue;
>  		}
> -		wait_on_buffer(bh);
> -		if (!buffer_uptodate(bh)) {
> +		wait_on_page_locked(page);
> +		if (PageError(page)) {
>  			errors++;
>  			if (i == 0)
>  				primary_failed = true;
>  		}
>  
>  		/* drop our reference */
> -		brelse(bh);
> +		put_page(page);
>  
>  		/* drop the reference from the writing run */
> -		brelse(bh);
> +		put_page(page);
>  	}
>  
>  	/* log error, force error return */
> -- 
> 2.24.1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout
  2020-01-17 12:51 ` [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout Johannes Thumshirn
  2020-01-17 13:38   ` David Sterba
  2020-01-17 14:51   ` David Sterba
@ 2020-01-17 15:01   ` Nikolay Borisov
  2020-01-17 15:11     ` David Sterba
  2020-01-22 15:48     ` Johannes Thumshirn
  2 siblings, 2 replies; 16+ messages in thread
From: Nikolay Borisov @ 2020-01-17 15:01 UTC (permalink / raw)
  To: Johannes Thumshirn, David Sterba; +Cc: linux-btrfs



On 17.01.20 г. 14:51 ч., Johannes Thumshirn wrote:
> Similar to the superblock read path, change the write path to using BIOs
> and pages instead of buffer_heads.
> 
> This is based on a patch originally authored by Nikolay Borisov.
> 
> Co-developed-by: Nikolay Borisov <nborisov@suse.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/disk-io.c | 107 ++++++++++++++++++++++++++-------------------
>  1 file changed, 61 insertions(+), 46 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 50c93ffe8d03..51e7b832c8fd 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3353,25 +3353,33 @@ int __cold open_ctree(struct super_block *sb,
>  }
>  ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
>  
> -static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
> +static void btrfs_end_super_write(struct bio *bio)
>  {
> -	if (uptodate) {
> -		set_buffer_uptodate(bh);
> -	} else {
> -		struct btrfs_device *device = (struct btrfs_device *)
> -			bh->b_private;
> -
> -		btrfs_warn_rl_in_rcu(device->fs_info,
> -				"lost page write due to IO error on %s",
> -					  rcu_str_deref(device->name));
> -		/* note, we don't set_buffer_write_io_error because we have
> -		 * our own ways of dealing with the IO errors
> -		 */
> -		clear_buffer_uptodate(bh);
> -		btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
> +	struct btrfs_device *device = bio->bi_private;
> +	struct bio_vec *bvec;
> +	struct bvec_iter_all iter_all;
> +	struct page *page;
> +
> +	bio_for_each_segment_all(bvec, bio, iter_all) {
> +		page = bvec->bv_page;
> +
> +		if (blk_status_to_errno(bio->bi_status)) {
> +			btrfs_warn_rl_in_rcu(device->fs_info,
> +					     "lost page write due to IO error on %s",
> +					     rcu_str_deref(device->name));
> +			ClearPageUptodate(page);
> +			SetPageError(page);
> +			btrfs_dev_stat_inc_and_print(device,
> +						     BTRFS_DEV_STAT_WRITE_ERRS);
> +		} else {
> +			SetPageUptodate(page);
> +		}
> +
> +		put_page(page);

Isn't this extra put page? Perhahps it's not because that would be the
reference from find_or_create_page in write_dev_supers. In any case I'd
rather have it in that function.

> +		unlock_page(page);
>  	}
> -	unlock_buffer(bh);
> -	put_bh(bh);
> +
> +	bio_put(bio);
>  }
>  
>  int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> @@ -3462,16 +3470,15 @@ int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
>   * the expected device size at commit time. Note that max_mirrors must be
>   * same for write and wait phases.
>   *
> - * Return number of errors when buffer head is not found or submission fails.
> + * Return number of errors when page is not found or submission fails.
>   */
>  static int write_dev_supers(struct btrfs_device *device,
>  			    struct btrfs_super_block *sb, int max_mirrors)
>  {
>  	struct btrfs_fs_info *fs_info = device->fs_info;
>  	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
> -	struct buffer_head *bh;
> +	gfp_t gfp_mask;
>  	int i;
> -	int ret;
>  	int errors = 0;
>  	u64 bytenr;
>  	int op_flags;
> @@ -3481,7 +3488,13 @@ static int write_dev_supers(struct btrfs_device *device,
>  
>  	shash->tfm = fs_info->csum_shash;
>  
> +	gfp_mask = mapping_gfp_constraint(device->bdev->bd_inode->i_mapping,
> +					  ~__GFP_FS) | __GFP_NOFAIL;
> +
>  	for (i = 0; i < max_mirrors; i++) {
> +		struct page *page;
> +		struct bio *bio;
> +
>  		bytenr = btrfs_sb_offset(i);
>  		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
>  		    device->commit_total_bytes)
> @@ -3494,26 +3507,20 @@ static int write_dev_supers(struct btrfs_device *device,
>  				    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
>  		crypto_shash_final(shash, sb->csum);
>  
> -		/* One reference for us, and we leave it for the caller */
> -		bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
> -			      BTRFS_SUPER_INFO_SIZE);
> -		if (!bh) {
> +		page = find_or_create_page(device->bdev->bd_inode->i_mapping,
> +					   bytenr >> PAGE_SHIFT, gfp_mask);

You introduce asymmetry here. Because the write path now utilizes the
page cache whereas the read path uses plain page alloc. I'm not sure but
could this lead to reading garbage from the super block because now you
don't have synchronization between the read and write path. This reminds
me why I was using the page cache and not a plain page. Also by
utilising the page cache you will potentially be reducing IO to disk
since you can be fetching the sb data directly from cache.

> +		if (!page) {
>  			btrfs_err(device->fs_info,
> -			    "couldn't get super buffer head for bytenr %llu",
> +			    "couldn't get superblock page for bytenr %llu",
>  			    bytenr);
>  			errors++;
>  			continue;
>  		}
>  
> -		memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
> +		/* Bump the refcount for wait_dev_supers() */
> +		get_page(page);
>  
> -		/* one reference for submit_bh */
> -		get_bh(bh);
> -
> -		set_buffer_uptodate(bh);
> -		lock_buffer(bh);
> -		bh->b_end_io = btrfs_end_buffer_write_sync;
> -		bh->b_private = device;
> +		memcpy(page_address(page), sb, BTRFS_SUPER_INFO_SIZE);
>  
>  		/*
>  		 * we fua the first super.  The others we allow
> @@ -3522,9 +3529,17 @@ static int write_dev_supers(struct btrfs_device *device,
>  		op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
>  		if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
>  			op_flags |= REQ_FUA;
> -		ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
> -		if (ret)
> -			errors++;
> +
> +		bio = bio_alloc(gfp_mask, 1);
> +		bio_set_dev(bio, device->bdev);
> +		bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> +		bio->bi_private = device;
> +		bio->bi_end_io = btrfs_end_super_write;
> +		bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
> +			     offset_in_page(bytenr));
> +
> +		bio_set_op_attrs(bio, REQ_OP_WRITE, op_flags);
> +		btrfsic_submit_bio(bio);
>  	}
>  	return errors < i ? 0 : -1;
>  }
> @@ -3533,12 +3548,11 @@ static int write_dev_supers(struct btrfs_device *device,
>   * Wait for write completion of superblocks done by write_dev_supers,
>   * @max_mirrors same for write and wait phases.
>   *
> - * Return number of errors when buffer head is not found or not marked up to
> + * Return number of errors when page is not found or not marked up to
>   * date.
>   */
>  static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
>  {
> -	struct buffer_head *bh;
>  	int i;
>  	int errors = 0;
>  	bool primary_failed = false;
> @@ -3548,32 +3562,33 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
>  		max_mirrors = BTRFS_SUPER_MIRROR_MAX;
>  
>  	for (i = 0; i < max_mirrors; i++) {
> +		struct page *page;
> +
>  		bytenr = btrfs_sb_offset(i);
>  		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
>  		    device->commit_total_bytes)
>  			break;
>  
> -		bh = __find_get_block(device->bdev,
> -				      bytenr / BTRFS_BDEV_BLOCKSIZE,
> -				      BTRFS_SUPER_INFO_SIZE);
> -		if (!bh) {
> +		page = find_get_page(device->bdev->bd_inode->i_mapping,
> +				     bytenr >> PAGE_SHIFT);
> +		if (!page) {
>  			errors++;
>  			if (i == 0)
>  				primary_failed = true;
>  			continue;
>  		}
> -		wait_on_buffer(bh);
> -		if (!buffer_uptodate(bh)) {
> +		wait_on_page_locked(page);
> +		if (PageError(page)) {
>  			errors++;
>  			if (i == 0)
>  				primary_failed = true;
>  		}
>  
>  		/* drop our reference */
> -		brelse(bh);
> +		put_page(page);
>  
>  		/* drop the reference from the writing run */
> -		brelse(bh);
> +		put_page(page);
>  	}
>  
>  	/* log error, force error return */
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/5] btrfs: remove btrfsic_submit_bh()
  2020-01-17 12:51 ` [PATCH 3/5] btrfs: remove btrfsic_submit_bh() Johannes Thumshirn
@ 2020-01-17 15:05   ` Nikolay Borisov
  0 siblings, 0 replies; 16+ messages in thread
From: Nikolay Borisov @ 2020-01-17 15:05 UTC (permalink / raw)
  To: Johannes Thumshirn, David Sterba; +Cc: linux-btrfs



On 17.01.20 г. 14:51 ч., Johannes Thumshirn wrote:
> Now that the last use of btrfsic_submit_bh() is gone, remove the function
> as well.
> 
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/check-integrity.c | 57 --------------------------------------
>  fs/btrfs/check-integrity.h |  2 --
>  2 files changed, 59 deletions(-)


Straightforward enough:

Reviewed-by: Nikolay Borisov <nborisov@suse.com>


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 5/5] btrfs: remove buffer_heads form superblock mirror integrity checking
  2020-01-17 12:51 ` [PATCH 5/5] btrfs: remove buffer_heads form superblock mirror integrity checking Johannes Thumshirn
@ 2020-01-17 15:10   ` Nikolay Borisov
  2020-01-17 15:13     ` David Sterba
  0 siblings, 1 reply; 16+ messages in thread
From: Nikolay Borisov @ 2020-01-17 15:10 UTC (permalink / raw)
  To: Johannes Thumshirn, David Sterba; +Cc: linux-btrfs



On 17.01.20 г. 14:51 ч., Johannes Thumshirn wrote:
> The integrity checking code for the superblock mirrors is the last remaining
> user of buffer_heads in BTRFS, change it to using plain BIOs as well.
> 
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

So this path is called only during mount which means it's more or less
excluded from concurrent writes. Still I'd like to see it converted to
using the page cache for the reason mentioned in the "write path" patch.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout
  2020-01-17 15:01   ` Nikolay Borisov
@ 2020-01-17 15:11     ` David Sterba
  2020-01-22 15:48     ` Johannes Thumshirn
  1 sibling, 0 replies; 16+ messages in thread
From: David Sterba @ 2020-01-17 15:11 UTC (permalink / raw)
  To: Nikolay Borisov; +Cc: Johannes Thumshirn, David Sterba, linux-btrfs

On Fri, Jan 17, 2020 at 05:01:35PM +0200, Nikolay Borisov wrote:
> > @@ -3494,26 +3507,20 @@ static int write_dev_supers(struct btrfs_device *device,
> >  				    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
> >  		crypto_shash_final(shash, sb->csum);
> >  
> > -		/* One reference for us, and we leave it for the caller */
> > -		bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
> > -			      BTRFS_SUPER_INFO_SIZE);
> > -		if (!bh) {
> > +		page = find_or_create_page(device->bdev->bd_inode->i_mapping,
> > +					   bytenr >> PAGE_SHIFT, gfp_mask);
> 
> You introduce asymmetry here. Because the write path now utilizes the
> page cache whereas the read path uses plain page alloc. I'm not sure but
> could this lead to reading garbage from the super block because now you
> don't have synchronization between the read and write path. This reminds
> me why I was using the page cache and not a plain page. Also by
> utilising the page cache you will potentially be reducing IO to disk
> since you can be fetching the sb data directly from cache.

There won't be any data in the cache anyway, because
btrfs_get_bdev_and_sb calls invalidate_bdev just before reading the
superblock. But otherwise I agree that the cache must be used,
effectively the same way as __bread does with BH.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 5/5] btrfs: remove buffer_heads form superblock mirror integrity checking
  2020-01-17 15:10   ` Nikolay Borisov
@ 2020-01-17 15:13     ` David Sterba
  0 siblings, 0 replies; 16+ messages in thread
From: David Sterba @ 2020-01-17 15:13 UTC (permalink / raw)
  To: Nikolay Borisov; +Cc: Johannes Thumshirn, David Sterba, linux-btrfs

On Fri, Jan 17, 2020 at 05:10:35PM +0200, Nikolay Borisov wrote:
> 
> 
> On 17.01.20 г. 14:51 ч., Johannes Thumshirn wrote:
> > The integrity checking code for the superblock mirrors is the last remaining
> > user of buffer_heads in BTRFS, change it to using plain BIOs as well.
> > 
> > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> 
> So this path is called only during mount which means it's more or less
> excluded from concurrent writes. Still I'd like to see it converted to
> using the page cache for the reason mentioned in the "write path" patch.

IIRC we had some funny bugs when mount and device scan (udev) raced just
after mkfs, the page cache must be used so there's no way to read stale
data.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout
  2020-01-17 15:01   ` Nikolay Borisov
  2020-01-17 15:11     ` David Sterba
@ 2020-01-22 15:48     ` Johannes Thumshirn
  1 sibling, 0 replies; 16+ messages in thread
From: Johannes Thumshirn @ 2020-01-22 15:48 UTC (permalink / raw)
  To: Nikolay Borisov, David Sterba; +Cc: linux-btrfs

On 17/01/2020, 16:01, "linux-btrfs-owner@vger.kernel.org on behalf of Nikolay Borisov" <linux-btrfs-owner@vger.kernel.org on behalf of nborisov@suse.com> wrote: 
    
    On 17.01.20 г. 14:51 ч., Johannes Thumshirn wrote:
    > Similar to the superblock read path, change the write path to using BIOs
    > and pages instead of buffer_heads.
    >
    > This is based on a patch originally authored by Nikolay Borisov.
    >
    > Co-developed-by: Nikolay Borisov <nborisov@suse.com>
    > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
    > ---
    >  fs/btrfs/disk-io.c | 107 ++++++++++++++++++++++++++-------------------
    >  1 file changed, 61 insertions(+), 46 deletions(-)
    >
    > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
    > index 50c93ffe8d03..51e7b832c8fd 100644
    > --- a/fs/btrfs/disk-io.c
    > +++ b/fs/btrfs/disk-io.c
    > @@ -3353,25 +3353,33 @@ int __cold open_ctree(struct super_block *sb,
    >  }
    >  ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
    >
    > -static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
    > +static void btrfs_end_super_write(struct bio *bio)
    >  {
    > -     if (uptodate) {
    > -             set_buffer_uptodate(bh);
    > -     } else {
    > -             struct btrfs_device *device = (struct btrfs_device *)
    > -                     bh->b_private;
    > -
    > -             btrfs_warn_rl_in_rcu(device->fs_info,
    > -                             "lost page write due to IO error on %s",
    > -                                       rcu_str_deref(device->name));
    > -             /* note, we don't set_buffer_write_io_error because we have
    > -              * our own ways of dealing with the IO errors
    > -              */
    > -             clear_buffer_uptodate(bh);
    > -             btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
    > +     struct btrfs_device *device = bio->bi_private;
    > +     struct bio_vec *bvec;
    > +     struct bvec_iter_all iter_all;
    > +     struct page *page;
    > +
    > +     bio_for_each_segment_all(bvec, bio, iter_all) {
    > +             page = bvec->bv_page;
    > +
    > +             if (blk_status_to_errno(bio->bi_status)) {
    > +                     btrfs_warn_rl_in_rcu(device->fs_info,
    > +                                          "lost page write due to IO error on %s",
    > +                                          rcu_str_deref(device->name));
    > +                     ClearPageUptodate(page);
    > +                     SetPageError(page);
    > +                     btrfs_dev_stat_inc_and_print(device,
    > +                                                  BTRFS_DEV_STAT_WRITE_ERRS);
    > +             } else {
    > +                     SetPageUptodate(page);
    > +             }
    > +
    > +             put_page(page);
    
    Isn't this extra put page? Perhahps it's not because that would be the
    reference from find_or_create_page in write_dev_supers. In any case I'd
    rather have it in that function.

No we can't do the put_page() in in write_dev_supers() as btrfs_end_super_write()
is the bio endio hook and it's called upon completion of the bio. This happens
asynchronous to write_dev_supers().



^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2020-01-22 15:48 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-17 12:51 [PATCH 0/5] btrfs: remove buffer heads form superblock handling Johannes Thumshirn
2020-01-17 12:51 ` [PATCH 1/5] btrfs: remove buffer heads from super block reading Johannes Thumshirn
2020-01-17 13:31   ` David Sterba
2020-01-17 14:30   ` Nikolay Borisov
2020-01-17 12:51 ` [PATCH 2/5] btrfs: remove use of buffer_heads from superblock writeout Johannes Thumshirn
2020-01-17 13:38   ` David Sterba
2020-01-17 14:51   ` David Sterba
2020-01-17 15:01   ` Nikolay Borisov
2020-01-17 15:11     ` David Sterba
2020-01-22 15:48     ` Johannes Thumshirn
2020-01-17 12:51 ` [PATCH 3/5] btrfs: remove btrfsic_submit_bh() Johannes Thumshirn
2020-01-17 15:05   ` Nikolay Borisov
2020-01-17 12:51 ` [PATCH 4/5] btrfs: remove buffer_heads from btrfsic_process_written_block() Johannes Thumshirn
2020-01-17 12:51 ` [PATCH 5/5] btrfs: remove buffer_heads form superblock mirror integrity checking Johannes Thumshirn
2020-01-17 15:10   ` Nikolay Borisov
2020-01-17 15:13     ` David Sterba

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.