All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
To: clm@fb.com, jbacik@fb.com, bo.li.liu@oracle.com, dsterba@suse.cz
Cc: Chandan Rajendra <chandan@linux.vnet.ibm.com>,
	aneesh.kumar@linux.vnet.ibm.com, linux-btrfs@vger.kernel.org
Subject: [RFC PATCH V7 16/16] Btrfs: subpagesize-blocksize: Track blocks of ordered extent submitted for write I/O.
Date: Mon, 22 Sep 2014 00:25:30 +0530	[thread overview]
Message-ID: <1411325730-21817-17-git-send-email-chandan@linux.vnet.ibm.com> (raw)
In-Reply-To: <1411325730-21817-1-git-send-email-chandan@linux.vnet.ibm.com>

In the subpagesize-blocksize scenario, the following command (with 4k as the
PAGE_SIZE and 2k as the block size) can cause false accounting of blocks of an
ordered extent that is written to disk:

$ xfs_io -f -c "pwrite 0 10240" \
-c "sync_range 0 4096" \
-c "sync_range 8192 2048" \
-c "pwrite 10240 2048" \
-c "sync_range 10240 2048" \
/mnt/btrfs/file.bin

To fix this, we would have to explicitly track the blocks of an ordered extent
that have already been submitted for write I/O.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
---
 fs/btrfs/extent_io.c    | 24 ++++++++++++++++++++++--
 fs/btrfs/ordered-data.c |  4 +++-
 fs/btrfs/ordered-data.h |  4 ++++
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ccd9e1c..2cf9e59 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3201,6 +3201,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 	u64 extent_offset;
 	u64 extent_end;
 	u64 iosize;
+	u64 blk, nr_blks;
+	u64 blk_submitted;
 	sector_t sector;
 	struct extent_state *cached_state = NULL;
 	struct block_device *bdev;
@@ -3267,11 +3269,26 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 		iosize = min(extent_end - cur, end - cur + 1);
 		iosize = ALIGN(iosize, blocksize);
 
+		blk = extent_offset >> inode->i_sb->s_blocksize_bits;
+		nr_blks = iosize >> inode->i_sb->s_blocksize_bits;
+
+		blk_submitted = find_next_bit(ordered->blocks_submitted,
+					ordered->len >> inode->i_sb->s_blocksize_bits,
+					blk);
+		if (blk_submitted < blk + nr_blks) {
+			if (blk_submitted == blk) {
+				cur += blocksize;
+				btrfs_put_ordered_extent(ordered);
+				continue;
+			}
+			iosize = (blk_submitted - blk)
+				<< inode->i_sb->s_blocksize_bits;
+			nr_blks = iosize >> inode->i_sb->s_blocksize_bits;
+		}
+
 		sector = (ordered->start + extent_offset) >> 9;
 		bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
 		compressed = test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags);
-		btrfs_put_ordered_extent(ordered);
-		ordered = NULL;
 
 		/*
 		 * compressed and inline extents are written through other
@@ -3284,6 +3301,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
  			 */
 			nr++;
 			cur += iosize;
+			btrfs_put_ordered_extent(ordered);
 			continue;
 		}
 
@@ -3298,6 +3316,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 		} else {
 			unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
 
+			bitmap_set(ordered->blocks_submitted, blk, nr_blks);
+			btrfs_put_ordered_extent(ordered);
 			set_range_writeback(tree, cur, cur + iosize - 1);
 			if (!PageWriteback(page)) {
 				btrfs_err(BTRFS_I(inode)->root->fs_info,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 4d9832f..59b2544 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -199,13 +199,15 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 	nr_longs = BITS_TO_LONGS(len >> inode->i_sb->s_blocksize_bits);
 	if (nr_longs == 1) {
 		entry->blocks_done = &entry->blocks_bitmap;
+		entry->blocks_submitted = &entry->blocks_submitted_bitmap;
 	} else {
-		entry->blocks_done = kzalloc(nr_longs * sizeof(unsigned long),
+		entry->blocks_done = kzalloc(2 * nr_longs * sizeof(unsigned long),
 					GFP_NOFS);
 		if (!entry->blocks_done) {
 			kmem_cache_free(btrfs_ordered_extent_cache, entry);
 			return -ENOMEM;
 		}
+		entry->blocks_submitted = entry->blocks_done + nr_longs;
 	}
 
 	entry->file_offset = file_offset;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 7de3b1e..851914c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -139,6 +139,10 @@ struct btrfs_ordered_extent {
 	/* bitmap to track the blocks that have been written to disk */
 	unsigned long *blocks_done;
 	unsigned long blocks_bitmap;
+
+	/* bitmap to track the blocks that have been submitted for write i/o */
+	unsigned long *blocks_submitted;
+	unsigned long blocks_submitted_bitmap;
 };
 
 /*
-- 
2.1.0


      parent reply	other threads:[~2014-09-21 18:56 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-21 18:55 [RFC PATCH V7 00/16] Btrfs: Subpagesize-blocksize: Get rid of whole page I/O Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 01/16] Btrfs: subpagesize-blocksize: Get rid of whole page reads Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 02/16] Btrfs: subpagesize-blocksize: Get rid of whole page writes Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 03/16] Btrfs: subpagesize-blocksize: __btrfs_buffered_write: Reserve/release extents aligned to block size Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 04/16] Btrfs: subpagesize-blocksize: Define extent_buffer_head Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 05/16] Btrfs: subpagesize-blocksize: Read tree blocks whose size is <PAGE_CACHE_SIZE Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 06/16] Btrfs: subpagesize-blocksize: Write only dirty extent buffers belonging to a page Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 07/16] Btrfs: subpagesize-blocksize: Allow mounting filesystems where sectorsize != PAGE_SIZE Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 08/16] Btrfs: subpagesize-blocksize: Compute and look up csums based on sectorsized blocks Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 09/16] Btrfs: subpagesize-blocksize: __extent_writepage: Write only dirty blocks of a page Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 10/16] Btrfs: subpagesize-blocksize: fallocate: Work with sectorsized units Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 11/16] Btrfs: subpagesize-blocksize: btrfs_page_mkwrite: Reserve space in " Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 12/16] Btrfs: subpagesize-blocksize: Search for all ordered extents that could span across a page Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 13/16] Btrfs: subpagesize-blocksize: Deal with partial ordered extent allocations Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 14/16] Btrfs: subpagesize-blocksize: Explicitly Track I/O status of blocks of an ordered extent Chandan Rajendra
2014-09-21 18:55 ` [RFC PATCH V7 15/16] Btrfs: subpagesize-blocksize: Revert commit fc4adbff823f76577ece26dcb88bf6f8392dbd43 Chandan Rajendra
2014-09-21 18:55 ` Chandan Rajendra [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1411325730-21817-17-git-send-email-chandan@linux.vnet.ibm.com \
    --to=chandan@linux.vnet.ibm.com \
    --cc=aneesh.kumar@linux.vnet.ibm.com \
    --cc=bo.li.liu@oracle.com \
    --cc=clm@fb.com \
    --cc=dsterba@suse.cz \
    --cc=jbacik@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.