All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
To: clm@fb.com, jbacik@fb.com, dsterba@suse.com
Cc: Chandan Rajendra <chandan@linux.vnet.ibm.com>,
	linux-btrfs@vger.kernel.org
Subject: [PATCH V21 07/19] Btrfs: subpage-blocksize: Use kmalloc()-ed memory to hold metadata blocks
Date: Sun,  2 Oct 2016 18:54:16 +0530	[thread overview]
Message-ID: <1475414668-25954-8-git-send-email-chandan@linux.vnet.ibm.com> (raw)
In-Reply-To: <1475414668-25954-1-git-send-email-chandan@linux.vnet.ibm.com>

For subpage-blocksizes this commit uses kmalloc()-ed memory to buffer
metadata blocks in memory.

When reading/writing metadata blocks, We now track the first extent
buffer using bio->bi_private. With kmalloc()-ed memory we cannot use
page->private. Hence when writing dirty extent buffers in
subpage-blocksize scenario, this commit forces each bio to contain a
single extent buffer. For the non subpage-blocksize scenario we continue
to track the corresponding extent buffer using page->private and hence a
single write bio will continue to have more than one dirty extent
buffer.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
---
 fs/btrfs/ctree.h                 |   6 +-
 fs/btrfs/disk-io.c               |  27 +++---
 fs/btrfs/extent_io.c             | 204 +++++++++++++++++++++++++--------------
 fs/btrfs/extent_io.h             |   8 +-
 fs/btrfs/tests/extent-io-tests.c |   4 +-
 5 files changed, 158 insertions(+), 91 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b9ee7cf..745284c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1491,14 +1491,16 @@ static inline void btrfs_set_token_##name(struct extent_buffer *eb,	\
 #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits)		\
 static inline u##bits btrfs_##name(struct extent_buffer *eb)		\
 {									\
-	type *p = page_address(eb->pages[0]);				\
+	type *p = (type *)((u8 *)page_address(eb->pages[0])		\
+			+ eb->pg_offset);				\
 	u##bits res = le##bits##_to_cpu(p->member);			\
 	return res;							\
 }									\
 static inline void btrfs_set_##name(struct extent_buffer *eb,		\
 				    u##bits val)			\
 {									\
-	type *p = page_address(eb->pages[0]);				\
+	type *p = (type *)((u8 *)page_address(eb->pages[0])		\
+			+ eb->pg_offset);				\
 	p->member = cpu_to_le##bits(val);				\
 }
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9ff48a7..5663481 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -448,13 +448,10 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
  * we only fill in the checksum field in the first page of a multi-page block
  */
 
-static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
+static int csum_dirty_buffer(struct btrfs_fs_info *fs_info,
+			struct extent_buffer *eb)
 {
-	struct extent_buffer *eb;
 
-	eb = (struct extent_buffer *)page->private;
-	if (page != eb->pages[0])
-		return 0;
 	ASSERT(memcmp_extent_buffer(eb, fs_info->fsid,
 			btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0);
 
@@ -557,11 +554,10 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 	int ret = 0;
 	int reads_done;
 
-	if (!page->private)
+	eb = (io_bio->bio).bi_private;
+	if (!eb)
 		goto out;
 
-	eb = (struct extent_buffer *)page->private;
-
 	/* the pending IO might have been the only thing that kept this buffer
 	 * in memory.  Make sure we have a ref for all this other checks
 	 */
@@ -646,11 +642,11 @@ out:
 	return ret;
 }
 
-static int btree_io_failed_hook(struct page *page, int failed_mirror)
+static int btree_io_failed_hook(struct page *page, void *private,
+				int failed_mirror)
 {
-	struct extent_buffer *eb;
+	struct extent_buffer *eb = private;
 
-	eb = (struct extent_buffer *)page->private;
 	set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
 	eb->read_mirror = failed_mirror;
 	atomic_dec(&eb->io_pages);
@@ -829,11 +825,18 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 
 static int btree_csum_one_bio(struct btrfs_fs_info *fs_info, struct bio *bio)
 {
+	struct extent_buffer *eb = bio->bi_private;
 	struct bio_vec *bvec;
 	int i, ret = 0;
 
 	bio_for_each_segment_all(bvec, bio, i) {
-		ret = csum_dirty_buffer(fs_info, bvec->bv_page);
+		if (eb->len >= PAGE_SIZE)
+			eb = (struct extent_buffer *)(bvec->bv_page->private);
+
+		if (bvec->bv_page != eb->pages[0])
+			continue;
+
+		ret = csum_dirty_buffer(fs_info, eb);
 		if (ret)
 			break;
 	}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6cac61f..8ace367 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2817,18 +2817,17 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 }
 
 
-static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
-				       unsigned long bio_flags)
+static int __must_check submit_one_bio(struct bio *bio,
+				struct extent_io_tree *tree, int mirror_num,
+				unsigned long bio_flags)
 {
 	int ret = 0;
 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 	struct page *page = bvec->bv_page;
-	struct extent_io_tree *tree = bio->bi_private;
 	u64 start;
 
 	start = page_offset(page) + bvec->bv_offset;
 
-	bio->bi_private = NULL;
 	bio_get(bio);
 
 	if (tree->ops && tree->ops->submit_bio_hook)
@@ -2864,7 +2863,8 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
 			      int mirror_num,
 			      unsigned long prev_bio_flags,
 			      unsigned long bio_flags,
-			      bool force_bio_submit)
+			      bool force_bio_submit,
+			      void *private)
 {
 	int ret = 0;
 	struct bio *bio;
@@ -2883,7 +2883,8 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
 		    force_bio_submit ||
 		    merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
 		    bio_add_page(bio, page, page_size, offset) < page_size) {
-			ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
+			ret = submit_one_bio(bio, tree, mirror_num,
+					prev_bio_flags);
 			if (ret < 0) {
 				*bio_ret = NULL;
 				return ret;
@@ -2903,7 +2904,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
 
 	bio_add_page(bio, page, page_size, offset);
 	bio->bi_end_io = end_io_func;
-	bio->bi_private = tree;
+	bio->bi_private = private;
 	bio_set_op_attrs(bio, op, op_flags);
 	if (wbc) {
 		wbc_init_bio(wbc, bio);
@@ -2913,7 +2914,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
 	if (bio_ret)
 		*bio_ret = bio;
 	else
-		ret = submit_one_bio(bio, mirror_num, bio_flags);
+		ret = submit_one_bio(bio, tree, mirror_num, bio_flags);
 
 	return ret;
 }
@@ -3211,7 +3212,7 @@ static int __do_readpage(struct extent_io_tree *tree,
 					 end_bio_extent_readpage, mirror_num,
 					 *bio_flags,
 					 this_bio_flag,
-					 force_bio_submit);
+					 force_bio_submit, NULL);
 		if (!ret) {
 			nr++;
 			*bio_flags = this_bio_flag;
@@ -3346,7 +3347,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
 	ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
 				      &bio_flags, 0);
 	if (bio)
-		ret = submit_one_bio(bio, mirror_num, bio_flags);
+		ret = submit_one_bio(bio, tree, mirror_num, bio_flags);
 	return ret;
 }
 
@@ -3591,7 +3592,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 					 page, sector, iosize, pg_offset,
 					 bdev, &epd->bio, max_nr,
 					 end_bio_extent_writepage,
-					 0, 0, 0, false);
+					 0, 0, 0, false, NULL);
 		if (ret)
 			SetPageError(page);
 
@@ -3774,9 +3775,8 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb)
 	}
 }
 
-static void set_btree_ioerr(struct page *page)
+static void set_btree_ioerr(struct extent_buffer *eb)
 {
-	struct extent_buffer *eb = (struct extent_buffer *)page->private;
 	struct btrfs_fs_info *fs_info = eb->eb_info->fs_info;
 
 	if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
@@ -3837,19 +3837,23 @@ static void set_btree_ioerr(struct page *page)
 static void end_bio_extent_buffer_writepage(struct bio *bio)
 {
 	struct bio_vec *bvec;
-	struct extent_buffer *eb;
+	struct extent_buffer *eb = bio->bi_private;
+	u32 nodesize = eb->len;
 	int i, done;
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 
-		eb = (struct extent_buffer *)page->private;
-		BUG_ON(!eb);
+		if (nodesize >= PAGE_SIZE) {
+			eb = (struct extent_buffer *)page->private;
+			BUG_ON(!eb);
+		}
+
 		done = atomic_dec_and_test(&eb->io_pages);
 
 		if (bio->bi_error ||
 		    test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
-			set_btree_ioerr(page);
+			set_btree_ioerr(eb);
 
 		account_metadata_end_writeback(page,
 					       &eb->eb_info->fs_info->bdi);
@@ -3871,6 +3875,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 	u64 offset = eb->start;
 	unsigned long i, num_pages;
 	unsigned long bio_flags = 0;
+	size_t len;
 	int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META;
 	int ret = 0;
 
@@ -3880,27 +3885,33 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 	if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
 		bio_flags = EXTENT_BIO_TREE_LOG;
 
+	len = min_t(size_t, eb->len, PAGE_SIZE);
+
 	for (i = 0; i < num_pages; i++) {
 		struct page *p = eb->pages[i];
 
 		ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
-					 p, offset >> 9, PAGE_SIZE, 0, bdev,
-					 &epd->bio, -1,
+					 p, offset >> 9, len, eb->pg_offset,
+					 bdev, &epd->bio, -1,
 					 end_bio_extent_buffer_writepage,
-					 0, epd->bio_flags, bio_flags, false);
+					 0, epd->bio_flags, bio_flags, false,
+					 eb);
 		epd->bio_flags = bio_flags;
 		if (ret) {
-			set_btree_ioerr(p);
+			set_btree_ioerr(eb);
 			if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
 				end_extent_buffer_writeback(eb);
 			ret = -EIO;
 			break;
 		}
 		account_metadata_writeback(p, &fs_info->bdi);
-		offset += PAGE_SIZE;
+		offset += len;
 		update_nr_written(p, wbc, 1);
 	}
 
+	if (!ret && len < PAGE_SIZE)
+		flush_write_bio(epd);
+
 	return ret;
 }
 
@@ -3964,7 +3975,7 @@ repeat:
 	}
 	rcu_read_unlock();
 	if (ret)
-		*index = (ebs[ret - 1]->start >> PAGE_SHIFT) + 1;
+		*index = ebs[ret - 1]->start + 1;
 	return ret;
 }
 
@@ -3997,8 +4008,8 @@ static int btree_write_cache_pages(struct btrfs_fs_info *fs_info,
 		index = eb_info->writeback_index; /* Start from prev offset */
 		end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_SHIFT;
-		end = wbc->range_end >> PAGE_SHIFT;
+		index = wbc->range_start;
+		end = wbc->range_end;
 		scanned = 1;
 	}
 	if (wbc->sync_mode == WB_SYNC_ALL)
@@ -4097,19 +4108,18 @@ int btree_write_range(struct btrfs_fs_info *fs_info, u64 start, u64 end)
 int btree_wait_range(struct btrfs_fs_info *fs_info, u64 start, u64 end)
 {
 	struct extent_buffer *ebs[EBVEC_SIZE];
-	pgoff_t index = start >> PAGE_SHIFT;
-	pgoff_t end_index = end >> PAGE_SHIFT;
 	unsigned nr_ebs;
 	int ret = 0;
 
 	if (end < start)
 		return ret;
 
-	while ((index <= end) &&
-	       (nr_ebs = eb_lookup_tag(fs_info->eb_info, ebs, &index,
+	while ((start <= end) &&
+		(nr_ebs = eb_lookup_tag(fs_info->eb_info, ebs,
+				       (pgoff_t *)&start,
 				       PAGECACHE_TAG_WRITEBACK,
-				       min(end_index - index,
-					   (pgoff_t)EBVEC_SIZE-1) + 1)) != 0) {
+				       min_t(u64, end - start,
+					     EBVEC_SIZE-1) + 1)) != 0) {
 		unsigned i;
 
 		for (i = 0; i < nr_ebs; i++) {
@@ -4296,7 +4306,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
 		bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
 				 epd->sync_io ? WRITE_SYNC : 0);
 
-		ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
+		ret = submit_one_bio(epd->bio, epd->tree, 0, epd->bio_flags);
 		BUG_ON(ret < 0); /* -ENOMEM */
 		epd->bio = NULL;
 	}
@@ -4436,7 +4446,7 @@ int extent_readpages(struct extent_io_tree *tree,
 
 	BUG_ON(!list_empty(pages));
 	if (bio)
-		return submit_one_bio(bio, 0, bio_flags);
+		return submit_one_bio(bio, tree, 0, bio_flags);
 	return 0;
 }
 
@@ -4818,6 +4828,12 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
 		return;
 
 	ASSERT(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+
+	if (test_bit(EXTENT_BUFFER_MEM, &eb->bflags)) {
+		kfree(eb->addr);
+		return;
+	}
+
 	do {
 		index--;
 		page = eb->pages[index];
@@ -4925,12 +4941,35 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_eb_info *eb_info,
 	if (!eb)
 		return NULL;
 
+	if (len < PAGE_SIZE) {
+		eb->addr = kmalloc(len, GFP_NOFS);
+		if (!eb->addr)
+			goto err;
+
+		if (((unsigned long)(eb->addr + len - 1) & PAGE_MASK) !=
+		    ((unsigned long)eb->addr & PAGE_MASK)) {
+			/* eb->addr spans two pages - use alloc_page instead */
+			kfree(eb->addr);
+			eb->addr = NULL;
+			goto use_alloc_page;
+		}
+
+		set_bit(EXTENT_BUFFER_MEM, &eb->bflags);
+		eb->pg_offset = offset_in_page(eb->addr);
+		eb->pages[0] = virt_to_page(eb->addr);
+		goto init_eb;
+	}
+
+use_alloc_page:
+
 	for (i = 0; i < num_pages; i++) {
 		eb->pages[i] = alloc_page(GFP_NOFS);
 		if (!eb->pages[i])
 			goto err;
 		attach_extent_buffer_page(eb, eb->pages[i]);
 	}
+
+init_eb:
 	set_extent_buffer_uptodate(eb);
 	btrfs_set_header_nritems(eb, 0);
 	set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
@@ -4996,8 +5035,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_eb_info *eb_info,
 	struct extent_buffer *eb;
 
 	rcu_read_lock();
-	eb = radix_tree_lookup(&eb_info->buffer_radix,
-			       start >> PAGE_SHIFT);
+	eb = radix_tree_lookup(&eb_info->buffer_radix, start);
 	if (eb && atomic_inc_not_zero(&eb->refs)) {
 		rcu_read_unlock();
 		/*
@@ -5046,8 +5084,7 @@ again:
 	if (ret)
 		goto free_eb;
 	spin_lock_irq(&eb_info->buffer_lock);
-	ret = radix_tree_insert(&eb_info->buffer_radix,
-				start >> PAGE_SHIFT, eb);
+	ret = radix_tree_insert(&eb_info->buffer_radix, start, eb);
 	spin_unlock_irq(&eb_info->buffer_lock);
 	radix_tree_preload_end();
 	if (ret == -EEXIST) {
@@ -5102,6 +5139,29 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	if (!eb)
 		return ERR_PTR(-ENOMEM);
 
+	if (len < PAGE_SIZE) {
+		eb->addr = kmalloc(len, GFP_NOFS);
+		if (!eb->addr) {
+			exists = ERR_PTR(-ENOMEM);
+			goto free_eb;
+		}
+
+		if (((unsigned long)(eb->addr + len - 1) & PAGE_MASK) !=
+		    ((unsigned long)eb->addr & PAGE_MASK)) {
+			/* eb->addr spans two pages - use alloc_page instead */
+			kfree(eb->addr);
+			eb->addr = NULL;
+			goto use_alloc_page;
+		}
+
+		set_bit(EXTENT_BUFFER_MEM, &eb->bflags);
+		eb->pg_offset = offset_in_page(eb->addr);
+		eb->pages[0] = virt_to_page(eb->addr);
+		goto insert_into_tree;
+	}
+
+use_alloc_page:
+
 	for (i = 0; i < num_pages; i++) {
 		p = alloc_page(GFP_NOFS|__GFP_NOFAIL);
 		if (!p) {
@@ -5124,7 +5184,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 		attach_extent_buffer_page(eb, p);
 		eb->pages[i] = p;
 	}
-again:
+insert_into_tree:
 	ret = radix_tree_preload(GFP_NOFS);
 	if (ret) {
 		exists = ERR_PTR(ret);
@@ -5132,8 +5192,7 @@ again:
 	}
 
 	spin_lock_irq(&eb_info->buffer_lock);
-	ret = radix_tree_insert(&eb_info->buffer_radix,
-				start >> PAGE_SHIFT, eb);
+	ret = radix_tree_insert(&eb_info->buffer_radix, start, eb);
 	spin_unlock_irq(&eb_info->buffer_lock);
 	radix_tree_preload_end();
 	if (ret == -EEXIST) {
@@ -5141,7 +5200,7 @@ again:
 		if (exists)
 			goto free_eb;
 		else
-			goto again;
+			goto insert_into_tree;
 	}
 	/* add one reference for the tree */
 	check_buffer_tree_ref(eb);
@@ -5412,7 +5471,9 @@ int extent_buffer_uptodate(struct extent_buffer *eb)
 static void end_bio_extent_buffer_readpage(struct bio *bio)
 {
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-	struct extent_io_tree *tree = NULL;
+	struct extent_buffer *eb = bio->bi_private;
+	struct btrfs_eb_info *eb_info = eb->eb_info;
+	struct extent_io_tree *tree = &eb_info->io_tree;
 	struct bio_vec *bvec;
 	u64 unlock_start = 0, unlock_len = 0;
 	int mirror_num = io_bio->mirror_num;
@@ -5421,16 +5482,7 @@ static void end_bio_extent_buffer_readpage(struct bio *bio)
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
-		struct btrfs_eb_info *eb_info;
-		struct extent_buffer *eb;
-
-		eb = (struct extent_buffer *)page->private;
-		if (WARN_ON(!eb))
-			continue;
 
-		eb_info = eb->eb_info;
-		if (!tree)
-			tree = &eb_info->io_tree;
 		if (uptodate) {
 			/*
 			 * btree_readpage_end_io_hook doesn't care about
@@ -5454,7 +5506,8 @@ static void end_bio_extent_buffer_readpage(struct bio *bio)
 				}
 				clean_io_failure(eb_info->fs_info,
 						 &eb_info->io_failure_tree,
-						 tree, start, page, 0, 0);
+						 tree, start, page, 0,
+						 eb->pg_offset);
 			}
 		}
 		/*
@@ -5464,11 +5517,12 @@ static void end_bio_extent_buffer_readpage(struct bio *bio)
 		 * anything.
 		 */
 		if (!uptodate)
-			tree->ops->readpage_io_failed_hook(page, mirror_num);
+			tree->ops->readpage_io_failed_hook(page, eb,
+							mirror_num);
 
 		if (unlock_start == 0) {
 			unlock_start = eb->start;
-			unlock_len = PAGE_SIZE;
+			unlock_len = min(eb->len, PAGE_SIZE);
 		} else {
 			unlock_len += PAGE_SIZE;
 		}
@@ -5493,6 +5547,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
 	u64 unlock_start = 0, unlock_len = 0;
 	unsigned long i;
 	struct page *page;
+	size_t len;
 	int err;
 	int ret = 0;
 	unsigned long num_pages;
@@ -5515,10 +5570,13 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
 	clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
 	eb->read_mirror = 0;
 	atomic_set(&eb->io_pages, num_pages);
+
+	len = min_t(size_t, eb->len, PAGE_SIZE);
+
 	for (i = 0; i < num_pages; i++) {
 		page = eb->pages[i];
 		if (ret) {
-			unlock_len += PAGE_SIZE;
+			unlock_len += len;
 			if (atomic_dec_and_test(&eb->io_pages)) {
 				clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
 				smp_mb__after_atomic();
@@ -5528,10 +5586,10 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
 		}
 
 		err = submit_extent_page(REQ_OP_READ, REQ_META, io_tree, NULL,
-					 page, offset >> 9, PAGE_SIZE, 0, bdev,
-					 &bio, -1,
+					 page, offset >> 9, len, eb->pg_offset,
+					 bdev, &bio, -1,
 					 end_bio_extent_buffer_readpage,
-					 mirror_num, 0, 0, false);
+					 mirror_num, 0, 0, false, eb);
 		if (err) {
 			ret = err;
 			/*
@@ -5548,13 +5606,13 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
 				wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
 			}
 			unlock_start = offset;
-			unlock_len = PAGE_SIZE;
+			unlock_len = len;
 		}
-		offset += PAGE_SIZE;
+		offset += len;
 	}
 
 	if (bio) {
-		err = submit_one_bio(bio, mirror_num, 0);
+		err = submit_one_bio(bio, io_tree, mirror_num, 0);
 		if (err)
 			return err;
 	}
@@ -5581,7 +5639,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
 	struct page *page;
 	char *kaddr;
 	char *dst = (char *)dstv;
-	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	size_t start_offset = eb->pg_offset;
 	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 
 	WARN_ON(start > eb->len);
@@ -5612,7 +5670,7 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
 	struct page *page;
 	char *kaddr;
 	char __user *dst = (char __user *)dstv;
-	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	size_t start_offset = eb->pg_offset;
 	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 	int ret = 0;
 
@@ -5650,10 +5708,10 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 			       unsigned long *map_start,
 			       unsigned long *map_len)
 {
-	size_t offset = start & (PAGE_SIZE - 1);
+	size_t offset;
 	char *kaddr;
 	struct page *p;
-	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	size_t start_offset = eb->pg_offset;
 	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 	unsigned long end_i = (start_offset + start + min_len - 1) >>
 		PAGE_SHIFT;
@@ -5679,7 +5737,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 	p = eb->pages[i];
 	kaddr = page_address(p);
 	*map = kaddr + offset;
-	*map_len = PAGE_SIZE - offset;
+	*map_len = (eb->len >= PAGE_SIZE) ? PAGE_SIZE - offset : eb->len;
 	return 0;
 }
 
@@ -5692,7 +5750,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
 	struct page *page;
 	char *kaddr;
 	char *ptr = (char *)ptrv;
-	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	size_t start_offset = eb->pg_offset;
 	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 	int ret = 0;
 
@@ -5727,7 +5785,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
 	struct page *page;
 	char *kaddr;
 	char *src = (char *)srcv;
-	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	size_t start_offset = eb->pg_offset;
 	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 
 	WARN_ON(start > eb->len);
@@ -5756,7 +5814,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
 	size_t offset;
 	struct page *page;
 	char *kaddr;
-	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	size_t start_offset = eb->pg_offset;
 	unsigned long i = (start_offset + start) >> PAGE_SHIFT;
 
 	WARN_ON(start > eb->len);
@@ -5786,7 +5844,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
 	size_t offset;
 	struct page *page;
 	char *kaddr;
-	size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
+	size_t start_offset = dst->pg_offset;
 	unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
 
 	WARN_ON(src->len != dst_len);
@@ -5839,7 +5897,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
 				    unsigned long *page_index,
 				    size_t *page_offset)
 {
-	size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+	size_t start_offset = eb->pg_offset;
 	size_t byte_offset = BIT_BYTE(nr);
 	size_t offset;
 
@@ -5987,7 +6045,7 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 	size_t cur;
 	size_t dst_off_in_page;
 	size_t src_off_in_page;
-	size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
+	size_t start_offset = dst->pg_offset;
 	unsigned long dst_i;
 	unsigned long src_i;
 
@@ -6035,7 +6093,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 	size_t src_off_in_page;
 	unsigned long dst_end = dst_offset + len - 1;
 	unsigned long src_end = src_offset + len - 1;
-	size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
+	size_t start_offset = dst->pg_offset;
 	unsigned long dst_i;
 	unsigned long src_i;
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index e7a0462..6a02343 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -45,6 +45,7 @@
 #define EXTENT_BUFFER_WRITE_ERR 11    /* write IO error */
 #define EXTENT_BUFFER_MIXED_PAGES 12	/* the pages span multiple zones or numa nodes. */
 #define EXTENT_BUFFER_READING 13 /* currently reading this eb. */
+#define EXTENT_BUFFER_MEM 14
 
 /* these are flags for extent_clear_unlock_delalloc */
 #define PAGE_UNLOCK		(1 << 0)
@@ -138,7 +139,8 @@ struct extent_io_ops {
 	int (*merge_bio_hook)(struct page *page, unsigned long offset,
 			      size_t size, struct bio *bio,
 			      unsigned long bio_flags);
-	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
+	int (*readpage_io_failed_hook)(struct page *page, void *private,
+				int failed_mirror);
 	int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
 				    struct page *page, u64 start, u64 end,
 				    int mirror);
@@ -234,6 +236,8 @@ struct extent_buffer {
 	 */
 	wait_queue_head_t read_lock_wq;
 	struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
+	void *addr;
+	unsigned int pg_offset;
 #ifdef CONFIG_BTRFS_DEBUG
 	struct list_head leak_list;
 #endif
@@ -454,7 +458,7 @@ static inline void extent_buffer_get(struct extent_buffer *eb)
 
 static inline unsigned long eb_index(struct extent_buffer *eb)
 {
-	return eb->start >> PAGE_SHIFT;
+	return eb->start;
 }
 
 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 45524f1..b85a57e 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -379,7 +379,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
 	 * In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than
 	 * BTRFS_MAX_METADATA_BLOCKSIZE.
 	 */
-	len = (sectorsize < BTRFS_MAX_METADATA_BLOCKSIZE)
+	len = ((sectorsize * 4) <= BTRFS_MAX_METADATA_BLOCKSIZE)
 		? sectorsize * 4 : sectorsize;
 
 	bitmap = kmalloc(len, GFP_KERNEL);
@@ -401,7 +401,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
 
 	/* Do it over again with an extent buffer which isn't page-aligned. */
 	free_extent_buffer(eb);
-	eb = alloc_dummy_extent_buffer(NULL, nodesize / 2, len);
+	eb = alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len);
 	if (!eb) {
 		test_msg("Couldn't allocate test extent buffer\n");
 		kfree(bitmap);
-- 
2.5.5


  parent reply	other threads:[~2016-10-02 13:25 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-02 13:24 [PATCH V21 00/19] Allow I/O on blocks whose size is less than page size Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 01/19] Btrfs: subpage-blocksize: extent_clear_unlock_delalloc: Prevent page from being unlocked more than once Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 02/19] Btrfs: subpage-blocksize: Make sure delalloc range intersects with the locked page's range Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 03/19] Btrfs: subpage-blocksize: Use PG_Uptodate flag to track block uptodate status Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 04/19] Btrfs: Remove extent_io_tree's track_uptodate member Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 05/19] Btrfs: subpage-blocksize: Fix whole page read Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 06/19] Btrfs: subpage-blocksize: Fix whole page write Chandan Rajendra
2016-10-02 13:24 ` Chandan Rajendra [this message]
2016-10-02 13:24 ` [PATCH V21 08/19] Btrfs: subpage-blocksize: Execute sanity tests on all possible block sizes Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 09/19] Btrfs: subpage-blocksize: Compute free space tree BITMAP_RANGE based on sectorsize Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 10/19] Btrfs: subpage-blocksize: Allow mounting filesystems where sectorsize < PAGE_SIZE Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 11/19] Btrfs: subpage-blocksize: Deal with partial ordered extent allocations Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 12/19] Btrfs: subpage-blocksize: Explicitly track I/O status of blocks of an ordered extent Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 13/19] Btrfs: subpage-blocksize: btrfs_punch_hole: Fix uptodate blocks check Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 14/19] Btrfs: subpage-blocksize: Fix file defragmentation code Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 15/19] Btrfs: subpage-blocksize: Enable dedupe ioctl Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 16/19] Btrfs: subpage-blocksize: btrfs_clone: Flush dirty blocks of a page that do not map the clone range Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 17/19] Btrfs: subpage-blocksize: Make file extent relocate code subpage blocksize aware Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 18/19] Btrfs: subpage-blocksize: __btrfs_lookup_bio_sums: Set offset when moving to a new bio_vec Chandan Rajendra
2016-10-02 13:24 ` [PATCH V21 19/19] Btrfs: subpage-blocksize: Disable compression Chandan Rajendra
2017-06-19 10:19 ` [PATCH V21 00/19] Allow I/O on blocks whose size is less than page size Chandan Rajendra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1475414668-25954-8-git-send-email-chandan@linux.vnet.ibm.com \
    --to=chandan@linux.vnet.ibm.com \
    --cc=clm@fb.com \
    --cc=dsterba@suse.com \
    --cc=jbacik@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.