All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions
@ 2016-12-02  2:03 Qu Wenruo
  2016-12-02  2:03 ` [PATCH 2/2] btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges Qu Wenruo
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Qu Wenruo @ 2016-12-02  2:03 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, chandan

Introduce a new parameter, struct extent_changeset for
btrfs_qgroup_reserved_data() and its callers.

Such extent_changeset was used in btrfs_qgroup_reserve_data() to record
which range it reserved in current reserve, so it can free it at error
path.

The reason we need to export it to callers is, at buffered write error
path, without knowing what exactly which range we reserved in current
allocation, we can free space which is not reserved by us.

This will lead to qgroup reserved space underflow.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
 fs/btrfs/ctree.h       |  6 ++++--
 fs/btrfs/extent-tree.c | 18 ++++++++++++------
 fs/btrfs/extent_io.h   | 22 ++++++++++++++++++++++
 fs/btrfs/file.c        | 13 ++++++++++---
 fs/btrfs/inode-map.c   |  4 +++-
 fs/btrfs/inode.c       | 19 ++++++++++++++-----
 fs/btrfs/ioctl.c       |  5 ++++-
 fs/btrfs/qgroup.c      | 25 +++++++++++++++----------
 fs/btrfs/qgroup.h      |  3 ++-
 fs/btrfs/relocation.c  |  4 +++-
 10 files changed, 89 insertions(+), 30 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0db037c..9f7e109 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2694,7 +2694,8 @@ enum btrfs_flush_state {
 	COMMIT_TRANS		=	6,
 };
 
-int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len);
+int btrfs_check_data_free_space(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len);
 int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
 void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
@@ -2716,7 +2717,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes,
 			enum btrfs_metadata_reserve_type reserve_type);
 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes,
 			enum btrfs_metadata_reserve_type reserve_type);
-int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len,
+int btrfs_delalloc_reserve_space(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len,
 			enum btrfs_metadata_reserve_type reserve_type);
 void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len,
 			enum btrfs_metadata_reserve_type reserve_type);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index dae287d..f116bcf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3357,6 +3357,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
 {
 	struct btrfs_root *root = block_group->fs_info->tree_root;
 	struct inode *inode = NULL;
+	struct extent_changeset data_reserved = EMPTY_CHANGESET;
 	u64 alloc_hint = 0;
 	int dcs = BTRFS_DC_ERROR;
 	u64 num_pages = 0;
@@ -3474,7 +3475,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
 	num_pages *= 16;
 	num_pages *= PAGE_SIZE;
 
-	ret = btrfs_check_data_free_space(inode, 0, num_pages);
+	ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages);
 	if (ret)
 		goto out_put;
 
@@ -3505,6 +3506,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
 	block_group->disk_cache_state = dcs;
 	spin_unlock(&block_group->lock);
 
+	extent_changeset_release(&data_reserved);
 	return ret;
 }
 
@@ -4302,7 +4304,8 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
  * Will replace old btrfs_check_data_free_space(), but for patch split,
  * add a new function first and then replace it.
  */
-int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
+int btrfs_check_data_free_space(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int ret;
@@ -4317,9 +4320,11 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
 		return ret;
 
 	/* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
-	ret = btrfs_qgroup_reserve_data(inode, start, len);
+	ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
 	if (ret < 0)
 		btrfs_free_reserved_data_space_noquota(inode, start, len);
+	else
+		ret = 0;
 	return ret;
 }
 
@@ -6254,12 +6259,13 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes,
  * Return 0 for success
  * Return <0 for error(-ENOSPC or -EQUOT)
  */
-int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len,
-				 enum btrfs_metadata_reserve_type reserve_type)
+int btrfs_delalloc_reserve_space(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len,
+			enum btrfs_metadata_reserve_type reserve_type)
 {
 	int ret;
 
-	ret = btrfs_check_data_free_space(inode, start, len);
+	ret = btrfs_check_data_free_space(inode, reserved, start, len);
 	if (ret < 0)
 		return ret;
 	ret = btrfs_delalloc_reserve_metadata(inode, len, reserve_type);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 13edb86..43784b4 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -196,6 +196,28 @@ struct extent_changeset {
 	struct ulist *range_changed;
 };
 
+#define EMPTY_CHANGESET (struct extent_changeset) { 0, }
+
+static inline void extent_changeset_init(struct extent_changeset *changeset)
+{
+	changeset->bytes_changed = 0;
+	changeset->range_changed = NULL;
+}
+
+static inline void extent_changeset_reinit(struct extent_changeset *changeset)
+{
+	changeset->bytes_changed = 0;
+	if (changeset->range_changed)
+		ulist_reinit(changeset->range_changed);
+}
+
+static inline void extent_changeset_release(struct extent_changeset *changeset)
+{
+	changeset->bytes_changed = 0;
+	ulist_free(changeset->range_changed);
+	changeset->range_changed = NULL;
+}
+
 static inline void extent_set_compress_type(unsigned long *bio_flags,
 					    int compress_type)
 {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 861ff47..b5d0f79 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1521,6 +1521,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct page **pages = NULL;
 	struct extent_state *cached_state = NULL;
+	struct extent_changeset data_reserved = EMPTY_CHANGESET;
 	u64 release_bytes = 0;
 	u64 lockstart;
 	u64 lockend;
@@ -1572,7 +1573,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		reserve_bytes = round_up(write_bytes + sector_offset,
 				root->sectorsize);
 
-		ret = btrfs_check_data_free_space(inode, pos, write_bytes);
+		extent_changeset_reinit(&data_reserved);
+		ret = btrfs_check_data_free_space(inode, &data_reserved, pos,
+						  write_bytes);
 		if (ret < 0) {
 			if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
 						      BTRFS_INODE_PREALLOC)) &&
@@ -1749,6 +1752,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		}
 	}
 
+	extent_changeset_release(&data_reserved);
 	return num_written ? num_written : ret;
 }
 
@@ -2716,6 +2720,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 {
 	struct inode *inode = file_inode(file);
 	struct extent_state *cached_state = NULL;
+	struct extent_changeset data_reserved = EMPTY_CHANGESET;
 	struct falloc_range *range;
 	struct falloc_range *tmp;
 	struct list_head reserve_list;
@@ -2847,10 +2852,11 @@ static long btrfs_fallocate(struct file *file, int mode,
 				free_extent_map(em);
 				break;
 			}
-			ret = btrfs_qgroup_reserve_data(inode, cur_offset,
-					last_byte - cur_offset);
+			ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
+					cur_offset, last_byte - cur_offset);
 			if (ret < 0)
 				break;
+			ret = 0;
 		} else {
 			/*
 			 * Do not need to reserve unwritten extent for this
@@ -2918,6 +2924,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 	if (ret != 0)
 		btrfs_free_reserved_data_space(inode, alloc_start,
 				       alloc_end - cur_offset);
+	extent_changeset_release(&data_reserved);
 	return ret;
 }
 
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 0335862..72f7d4e 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -399,6 +399,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
 	struct btrfs_path *path;
 	struct inode *inode;
 	struct btrfs_block_rsv *rsv;
+	struct extent_changeset data_reserved = EMPTY_CHANGESET;
 	u64 num_bytes;
 	u64 alloc_hint = 0;
 	int ret;
@@ -491,7 +492,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
 	/* Just to make sure we have enough space */
 	prealloc += 8 * PAGE_SIZE;
 
-	ret = btrfs_delalloc_reserve_space(inode, 0, prealloc,
+	ret = btrfs_delalloc_reserve_space(inode, &data_reserved, 0, prealloc,
 					   BTRFS_RESERVE_NORMAL);
 	if (ret)
 		goto out_put;
@@ -516,6 +517,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
 	trans->bytes_reserved = num_bytes;
 
 	btrfs_free_path(path);
+	extent_changeset_release(&data_reserved);
 	return ret;
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3db58d9..9477424 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2145,6 +2145,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 	struct btrfs_writepage_fixup *fixup;
 	struct btrfs_ordered_extent *ordered;
 	struct extent_state *cached_state = NULL;
+	struct extent_changeset data_reserved = EMPTY_CHANGESET;
 	struct page *page;
 	struct inode *inode;
 	u64 page_start;
@@ -2185,7 +2186,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 
 	if (inode_need_compress(inode))
 		reserve_type = BTRFS_RESERVE_COMPRESS;
-	ret = btrfs_delalloc_reserve_space(inode, page_start,
+	ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
 					   PAGE_SIZE, reserve_type);
 	if (ret) {
 		mapping_set_error(page->mapping, ret);
@@ -2205,6 +2206,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 	unlock_page(page);
 	put_page(page);
 	kfree(fixup);
+	extent_changeset_release(&data_reserved);
 }
 
 /*
@@ -4843,6 +4845,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct btrfs_ordered_extent *ordered;
 	struct extent_state *cached_state = NULL;
+	struct extent_changeset data_reserved = EMPTY_CHANGESET;
 	char *kaddr;
 	u32 blocksize = root->sectorsize;
 	pgoff_t index = from >> PAGE_SHIFT;
@@ -4861,7 +4864,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
 	    (!len || ((len & (blocksize - 1)) == 0)))
 		goto out;
 
-	ret = btrfs_delalloc_reserve_space(inode,
+	ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
 			round_down(from, blocksize), blocksize, reserve_type);
 	if (ret)
 		goto out;
@@ -4946,6 +4949,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
 	unlock_page(page);
 	put_page(page);
 out:
+	extent_changeset_release(&data_reserved);
 	return ret;
 }
 
@@ -8743,6 +8747,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	struct inode *inode = file->f_mapping->host;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_dio_data dio_data = { 0 };
+	struct extent_changeset data_reserved = EMPTY_CHANGESET;
 	loff_t offset = iocb->ki_pos;
 	size_t count = 0;
 	int flags = 0;
@@ -8778,8 +8783,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 			inode_unlock(inode);
 			relock = true;
 		}
-		ret = btrfs_delalloc_reserve_space(inode, offset, count,
-						   BTRFS_RESERVE_NORMAL);
+		ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+					offset, count, BTRFS_RESERVE_NORMAL);
 		if (ret)
 			goto out;
 		dio_data.outstanding_extents = div64_u64(count +
@@ -8836,6 +8841,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	if (relock)
 		inode_lock(inode);
 
+	extent_changeset_release(&data_reserved);
 	return ret;
 }
 
@@ -9067,6 +9073,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct btrfs_ordered_extent *ordered;
 	struct extent_state *cached_state = NULL;
+	struct extent_changeset data_reserved = EMPTY_CHANGESET;
 	enum btrfs_metadata_reserve_type reserve_type = BTRFS_RESERVE_NORMAL;
 	char *kaddr;
 	unsigned long zero_start;
@@ -9095,7 +9102,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 * end up waiting indefinitely to get a lock on the page currently
 	 * being processed by btrfs_page_mkwrite() function.
 	 */
-	ret = btrfs_delalloc_reserve_space(inode, page_start,
+	ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
 					   reserved_space, reserve_type);
 	if (!ret) {
 		ret = file_update_time(vma->vm_file);
@@ -9200,6 +9207,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 out_unlock:
 	if (!ret) {
 		sb_end_pagefault(inode->i_sb);
+		extent_changeset_release(&data_reserved);
 		return VM_FAULT_LOCKED;
 	}
 	unlock_page(page);
@@ -9208,6 +9216,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 				     reserve_type);
 out_noreserve:
 	sb_end_pagefault(inode->i_sb);
+	extent_changeset_release(&data_reserved);
 	return ret;
 }
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b799d91..8678104 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1128,6 +1128,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
 	struct btrfs_ordered_extent *ordered;
 	struct extent_state *cached_state = NULL;
 	struct extent_io_tree *tree;
+	struct extent_changeset data_reserved = EMPTY_CHANGESET;
 	enum btrfs_metadata_reserve_type reserve_type = BTRFS_RESERVE_NORMAL;
 	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
 
@@ -1139,7 +1140,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
 
 	if (inode_need_compress(inode))
 		reserve_type = BTRFS_RESERVE_COMPRESS;
-	ret = btrfs_delalloc_reserve_space(inode,
+	ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
 			start_index << PAGE_SHIFT,
 			page_cnt << PAGE_SHIFT, reserve_type);
 	if (ret)
@@ -1250,6 +1251,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
 		unlock_page(pages[i]);
 		put_page(pages[i]);
 	}
+	extent_changeset_release(&data_reserved);
 	return i_done;
 out:
 	for (i = 0; i < i_done; i++) {
@@ -1259,6 +1261,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
 	btrfs_delalloc_release_space(inode,
 			start_index << PAGE_SHIFT,
 			page_cnt << PAGE_SHIFT, reserve_type);
+	extent_changeset_release(&data_reserved);
 	return ret;
 
 }
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 8a6b07d..05d210f 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2820,10 +2820,10 @@ btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
  *
  * NOTE: this function may sleep for memory allocation.
  */
-int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
+int btrfs_qgroup_reserve_data(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	struct extent_changeset changeset;
 	struct ulist_node *unode;
 	struct ulist_iterator uiter;
 	int ret;
@@ -2832,30 +2832,35 @@ int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len)
 	    !is_fstree(root->objectid) || len == 0)
 		return 0;
 
-	changeset.bytes_changed = 0;
-	changeset.range_changed = ulist_alloc(GFP_NOFS);
+	/* @reserved parameter is mandatory for qgroup */
+	if (WARN_ON(!reserved))
+		return -EINVAL;
+	if (!reserved->range_changed) {
+		reserved->range_changed = ulist_alloc(GFP_NOFS);
+		if (!reserved->range_changed)
+			return -ENOMEM;
+	}
+
 	ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start,
-			start + len -1, EXTENT_QGROUP_RESERVED, &changeset);
+			start + len -1, EXTENT_QGROUP_RESERVED, reserved);
 	trace_btrfs_qgroup_reserve_data(inode, start, len,
-					changeset.bytes_changed,
+					reserved->bytes_changed,
 					QGROUP_RESERVE);
 	if (ret < 0)
 		goto cleanup;
-	ret = qgroup_reserve(root, changeset.bytes_changed);
+	ret = qgroup_reserve(root, reserved->bytes_changed);
 	if (ret < 0)
 		goto cleanup;
 
-	ulist_free(changeset.range_changed);
 	return ret;
 
 cleanup:
 	/* cleanup already reserved ranges */
 	ULIST_ITER_INIT(&uiter);
-	while ((unode = ulist_next(changeset.range_changed, &uiter)))
+	while ((unode = ulist_next(reserved->range_changed, &uiter)))
 		clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
 				 unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
 				 GFP_NOFS);
-	ulist_free(changeset.range_changed);
 	return ret;
 }
 
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 99c879d..a15baf5 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -177,7 +177,8 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
 #endif
 
 /* New io_tree based accurate qgroup reserve API */
-int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len);
+int btrfs_qgroup_reserve_data(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len);
 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
 int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len);
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 77ca5b5..72ac7d7 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3077,11 +3077,12 @@ int prealloc_file_extent_cluster(struct inode *inode,
 	u64 prealloc_start = cluster->start - offset;
 	u64 prealloc_end = cluster->end - offset;
 	u64 cur_offset;
+	struct extent_changeset data_reserved = EMPTY_CHANGESET;
 
 	BUG_ON(cluster->start != cluster->boundary[0]);
 	inode_lock(inode);
 
-	ret = btrfs_check_data_free_space(inode, prealloc_start,
+	ret = btrfs_check_data_free_space(inode, &data_reserved, prealloc_start,
 					  prealloc_end + 1 - prealloc_start);
 	if (ret)
 		goto out;
@@ -3113,6 +3114,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
 				       prealloc_end + 1 - cur_offset);
 out:
 	inode_unlock(inode);
+	extent_changeset_release(&data_reserved);
 	return ret;
 }
 
-- 
2.7.4




^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
  2016-12-02  2:03 [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions Qu Wenruo
@ 2016-12-02  2:03 ` Qu Wenruo
  2016-12-13 12:56   ` Chandan Rajendra
  2016-12-08 14:29 ` [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions Chandan Rajendra
  2016-12-13 12:54 ` Chandan Rajendra
  2 siblings, 1 reply; 6+ messages in thread
From: Qu Wenruo @ 2016-12-02  2:03 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, chandan

[BUG]
For the following case, btrfs can underflow qgroup reserved space
at error path:
(Page size 4K, function name without "btrfs_" prefix)

         Task A                  |             Task B
----------------------------------------------------------------------
Buffered_write [0, 2K)           |
|- check_data_free_space()       |
|  |- qgroup_reserve_data()      |
|     Range aligned to page      |
|     range [0, 4K)          <<< |
|     4K bytes reserved      <<< |
|- copy pages to page cache      |
                                 | Buffered_write [2K, 4K)
                                 | |- check_data_free_space()
                                 | |  |- qgroup_reserved_data()
                                 | |     Range alinged to page
                                 | |     range [0, 4K)
                                 | |     Already reserved by A <<<
                                 | |     0 bytes reserved      <<<
                                 | |- delalloc_reserve_metadata()
                                 | |  And it *FAILED* (Maybe EQUOTA)
                                 | |- free_reserved_data_space()
                                      |- qgroup_free_data()
                                         Range aligned to page range
                                         [0, 4K)
                                         Freeing 4K
(Special thanks to Chandan for the detailed report and analyse)

[CAUSE]
Above Task B is freeing reserved data range [0, 4K) which is actually
reserved by Task A.

And at write back time, page dirty by Task A will go through writeback
routine, which will free 4K reserved data space at file extent insert
time, causing the qgroup underflow.

[FIX]
For btrfs_qgroup_free_data(), add @reserved parameter to only free
data ranges reserved by previous btrfs_qgroup_reserve_data().
So in above case, Task B will try to free 0 byte, so no underflow.

Reported-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
 fs/btrfs/ctree.h       |  8 ++++---
 fs/btrfs/extent-tree.c | 12 ++++++----
 fs/btrfs/file.c        | 32 +++++++++++++++----------
 fs/btrfs/inode.c       | 35 +++++++++++++--------------
 fs/btrfs/ioctl.c       |  4 ++--
 fs/btrfs/qgroup.c      | 64 ++++++++++++++++++++++++++++++++++++++++++++++----
 fs/btrfs/qgroup.h      |  3 ++-
 fs/btrfs/relocation.c  |  8 +++----
 8 files changed, 117 insertions(+), 49 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9f7e109..1d5eaf3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2697,7 +2697,11 @@ enum btrfs_flush_state {
 int btrfs_check_data_free_space(struct inode *inode,
 			struct extent_changeset *reserved, u64 start, u64 len);
 int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
-void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
+void btrfs_free_reserved_data_space(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len);
+void btrfs_delalloc_release_space(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len,
+			enum btrfs_metadata_reserve_type reserve_type);
 void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
 					    u64 len);
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
@@ -2720,8 +2724,6 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes,
 int btrfs_delalloc_reserve_space(struct inode *inode,
 			struct extent_changeset *reserved, u64 start, u64 len,
 			enum btrfs_metadata_reserve_type reserve_type);
-void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len,
-			enum btrfs_metadata_reserve_type reserve_type);
 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
 struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
 					      unsigned short type);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f116bcf..a1e9c7b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4365,7 +4365,8 @@ void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
  * This one will handle the per-inode data rsv map for accurate reserved
  * space framework.
  */
-void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len)
+void btrfs_free_reserved_data_space(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 
@@ -4375,7 +4376,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len)
 	start = round_down(start, root->sectorsize);
 
 	btrfs_free_reserved_data_space_noquota(inode, start, len);
-	btrfs_qgroup_free_data(inode, start, len);
+	btrfs_qgroup_free_data(inode, reserved, start, len);
 }
 
 static void force_metadata_allocation(struct btrfs_fs_info *info)
@@ -6270,7 +6271,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
 		return ret;
 	ret = btrfs_delalloc_reserve_metadata(inode, len, reserve_type);
 	if (ret < 0)
-		btrfs_free_reserved_data_space(inode, start, len);
+		btrfs_free_reserved_data_space(inode, reserved, start, len);
 	return ret;
 }
 
@@ -6291,11 +6292,12 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
  * list if there are no delalloc bytes left.
  * Also it will handle the qgroup reserved space.
  */
-void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len,
+void btrfs_delalloc_release_space(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len,
 			enum btrfs_metadata_reserve_type reserve_type)
 {
 	btrfs_delalloc_release_metadata(inode, len, reserve_type);
-	btrfs_free_reserved_data_space(inode, start, len);
+	btrfs_free_reserved_data_space(inode, reserved, start, len);
 }
 
 static int update_block_group(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b5d0f79..5cfa065 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1603,8 +1603,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 						      reserve_type);
 		if (ret) {
 			if (!only_release_metadata)
-				btrfs_free_reserved_data_space(inode, pos,
-							       write_bytes);
+				btrfs_free_reserved_data_space(inode,
+						&data_reserved, pos,
+						write_bytes);
 			else
 				btrfs_end_write_no_snapshoting(root);
 			break;
@@ -1690,9 +1691,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 
 				__pos = round_down(pos, root->sectorsize) +
 					(dirty_pages << PAGE_SHIFT);
-				btrfs_delalloc_release_space(inode, __pos,
-							     release_bytes,
-							     reserve_type);
+				btrfs_delalloc_release_space(inode,
+						&data_reserved, __pos,
+						release_bytes, reserve_type);
 			}
 		}
 
@@ -1746,7 +1747,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 			btrfs_delalloc_release_metadata(inode, release_bytes,
 							reserve_type);
 		} else {
-			btrfs_delalloc_release_space(inode,
+			btrfs_delalloc_release_space(inode, &data_reserved,
 						round_down(pos, root->sectorsize),
 						release_bytes, reserve_type);
 		}
@@ -2831,6 +2832,8 @@ static long btrfs_fallocate(struct file *file, int mode,
 	/* First, check if we exceed the qgroup limit */
 	INIT_LIST_HEAD(&reserve_list);
 	while (1) {
+		struct extent_changeset *changeset;
+
 		em = btrfs_get_extent(inode, NULL, 0, cur_offset,
 				      alloc_end - cur_offset, 0);
 		if (IS_ERR_OR_NULL(em)) {
@@ -2863,8 +2866,12 @@ static long btrfs_fallocate(struct file *file, int mode,
 			 * range, free reserved data space first, otherwise
 			 * it'll result in false ENOSPC error.
 			 */
-			btrfs_free_reserved_data_space(inode, cur_offset,
-				last_byte - cur_offset);
+			if (data_reserved.range_changed)
+				changeset = &data_reserved;
+			else
+				changeset = NULL;
+			btrfs_free_reserved_data_space(inode, changeset,
+					cur_offset, last_byte - cur_offset);
 		}
 		free_extent_map(em);
 		cur_offset = last_byte;
@@ -2883,8 +2890,9 @@ static long btrfs_fallocate(struct file *file, int mode,
 					range->len, 1 << inode->i_blkbits,
 					offset + len, &alloc_hint);
 		else
-			btrfs_free_reserved_data_space(inode, range->start,
-						       range->len);
+			btrfs_free_reserved_data_space(inode,
+					&data_reserved, range->start,
+					range->len);
 		list_del(&range->list);
 		kfree(range);
 	}
@@ -2922,8 +2930,8 @@ static long btrfs_fallocate(struct file *file, int mode,
 	inode_unlock(inode);
 	/* Let go of our reservation. */
 	if (ret != 0)
-		btrfs_free_reserved_data_space(inode, alloc_start,
-				       alloc_end - cur_offset);
+		btrfs_free_reserved_data_space(inode, &data_reserved,
+				alloc_start, alloc_end - cur_offset);
 	extent_changeset_release(&data_reserved);
 	return ret;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9477424..5ca88f0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -324,7 +324,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
 	 * And at reserve time, it's always aligned to page size, so
 	 * just free one page here.
 	 */
-	btrfs_qgroup_free_data(inode, 0, PAGE_SIZE);
+	btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
 	btrfs_free_path(path);
 	btrfs_end_transaction(trans, root);
 	return ret;
@@ -3041,7 +3041,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		 * space for NOCOW range.
 		 * As NOCOW won't cause a new delayed ref, just free the space
 		 */
-		btrfs_qgroup_free_data(inode, ordered_extent->file_offset,
+		btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
 				       ordered_extent->len);
 		btrfs_ordered_update_i_size(inode, 0, ordered_extent);
 		if (nolock)
@@ -4872,7 +4872,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
 again:
 	page = find_or_create_page(mapping, index, mask);
 	if (!page) {
-		btrfs_delalloc_release_space(inode,
+		btrfs_delalloc_release_space(inode, &data_reserved,
 				round_down(from, blocksize),
 				blocksize, reserve_type);
 		ret = -ENOMEM;
@@ -4944,7 +4944,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
 
 out_unlock:
 	if (ret)
-		btrfs_delalloc_release_space(inode, block_start,
+		btrfs_delalloc_release_space(inode, &data_reserved, block_start,
 					     blocksize, reserve_type);
 	unlock_page(page);
 	put_page(page);
@@ -5334,7 +5334,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
 		 * Note, end is the bytenr of last byte, so we need + 1 here.
 		 */
 		if (state->state & EXTENT_DELALLOC)
-			btrfs_qgroup_free_data(inode, start, end - start + 1);
+			btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
 
 		clear_extent_bit(io_tree, start, end,
 				 EXTENT_LOCKED | EXTENT_DIRTY |
@@ -8815,8 +8815,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 		current->journal_info = NULL;
 		if (ret < 0 && ret != -EIOCBQUEUED) {
 			if (dio_data.reserve)
-				btrfs_delalloc_release_space(inode, offset,
-					dio_data.reserve, BTRFS_RESERVE_NORMAL);
+				btrfs_delalloc_release_space(inode, &data_reserved,
+					offset, dio_data.reserve,
+					BTRFS_RESERVE_NORMAL);
 			/*
 			 * On error we might have left some ordered extents
 			 * without submitting corresponding bios for them, so
@@ -8831,9 +8832,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 					dio_data.unsubmitted_oe_range_start,
 					0);
 		} else if (ret >= 0 && (size_t)ret < count)
-			btrfs_delalloc_release_space(inode, offset,
-						     count - (size_t)ret,
-						     BTRFS_RESERVE_NORMAL);
+			btrfs_delalloc_release_space(inode, &data_reserved,
+					offset, count - (size_t)ret,
+					BTRFS_RESERVE_NORMAL);
 	}
 out:
 	if (wakeup)
@@ -9031,7 +9032,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 	 *    free the entire extent.
 	 */
 	if (PageDirty(page))
-		btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
+		btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
 	if (!inode_evicting) {
 		clear_extent_bit(tree, page_start, page_end,
 				 EXTENT_LOCKED | EXTENT_DIRTY |
@@ -9154,9 +9155,9 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 			spin_lock(&BTRFS_I(inode)->lock);
 			BTRFS_I(inode)->outstanding_extents++;
 			spin_unlock(&BTRFS_I(inode)->lock);
-			btrfs_delalloc_release_space(inode, page_start,
-						PAGE_SIZE - reserved_space,
-						reserve_type);
+			btrfs_delalloc_release_space(inode, &data_reserved,
+					page_start, PAGE_SIZE - reserved_space,
+					reserve_type);
 		}
 	}
 
@@ -9212,8 +9213,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 	unlock_page(page);
 out:
-	btrfs_delalloc_release_space(inode, page_start, reserved_space,
-				     reserve_type);
+	btrfs_delalloc_release_space(inode, &data_reserved, page_start,
+			reserved_space, reserve_type);
 out_noreserve:
 	sb_end_pagefault(inode->i_sb);
 	extent_changeset_release(&data_reserved);
@@ -10553,7 +10554,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 			btrfs_end_transaction(trans, root);
 	}
 	if (cur_offset < end)
-		btrfs_free_reserved_data_space(inode, cur_offset,
+		btrfs_free_reserved_data_space(inode, NULL, cur_offset,
 			end - cur_offset + 1);
 	return ret;
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8678104..adf5111 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1231,7 +1231,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
 		spin_lock(&BTRFS_I(inode)->lock);
 		BTRFS_I(inode)->outstanding_extents++;
 		spin_unlock(&BTRFS_I(inode)->lock);
-		btrfs_delalloc_release_space(inode,
+		btrfs_delalloc_release_space(inode, &data_reserved,
 				start_index << PAGE_SHIFT,
 				(page_cnt - i_done) << PAGE_SHIFT,
 				reserve_type);
@@ -1258,7 +1258,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
 		unlock_page(pages[i]);
 		put_page(pages[i]);
 	}
-	btrfs_delalloc_release_space(inode,
+	btrfs_delalloc_release_space(inode, &data_reserved,
 			start_index << PAGE_SHIFT,
 			page_cnt << PAGE_SHIFT, reserve_type);
 	extent_changeset_release(&data_reserved);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 05d210f..df4261e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2864,13 +2864,64 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
 	return ret;
 }
 
-static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
-				       int free)
+/* Free ranges specified by @reserved, normally in error path */
+static int qgroup_free_reserved_data(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct extent_changeset changeset = { 0 };
+	int freed = 0;
+	int ret;
+
+	len = round_up(start + len, root->sectorsize);
+	start = round_down(start, root->sectorsize);
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(reserved->range_changed, &uiter))) {
+		u64 range_start = unode->val;
+		/* unode->aux is the inclusive end */
+		u64 range_len = unode->aux - range_start + 1;
+		u64 free_start;
+		u64 free_len;
+
+		changeset.bytes_changed = 0;
+		if (changeset.range_changed)
+			ulist_reinit(changeset.range_changed);
+
+		/* Only free range in range [start, start + len) */
+		if (range_start >= start + len ||
+		    range_start + range_len <= start)
+			continue;
+		free_start = max(range_start, start);
+		free_len = min(start + len, range_start + range_len) -
+			   free_start;
+		ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree,
+				free_start, free_start + free_len - 1,
+				EXTENT_QGROUP_RESERVED, &changeset);
+		if (ret < 0)
+			goto out;
+		freed += changeset.bytes_changed;
+	}
+	ret = freed;
+out:
+	ulist_free(changeset.range_changed);
+	return ret;
+}
+
+static int __btrfs_qgroup_release_data(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len,
+			int free)
 {
 	struct extent_changeset changeset;
 	int trace_op = QGROUP_RELEASE;
 	int ret;
 
+	/* In release case, we shouldn't have @reserved */
+	WARN_ON(!free && reserved);
+	if (reserved && reserved->range_changed)
+		return qgroup_free_reserved_data(inode, reserved, start, len);
 	changeset.bytes_changed = 0;
 	changeset.range_changed = ulist_alloc(GFP_NOFS);
 	if (!changeset.range_changed)
@@ -2898,14 +2949,17 @@ static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len,
  *
  * Should be called when a range of pages get invalidated before reaching disk.
  * Or for error cleanup case.
+ * if @reserved is given, only reserved range in [@start, @start + @len) will
+ * be freed.
  *
  * For data written to disk, use btrfs_qgroup_release_data().
  *
  * NOTE: This function may sleep for memory allocation.
  */
-int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len)
+int btrfs_qgroup_free_data(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len)
 {
-	return __btrfs_qgroup_release_data(inode, start, len, 1);
+	return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
 }
 
 /*
@@ -2925,7 +2979,7 @@ int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len)
  */
 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
 {
-	return __btrfs_qgroup_release_data(inode, start, len, 0);
+	return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
 }
 
 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes)
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index a15baf5..cc81997 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -180,7 +180,8 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
 int btrfs_qgroup_reserve_data(struct inode *inode,
 			struct extent_changeset *reserved, u64 start, u64 len);
 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
-int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len);
+int btrfs_qgroup_free_data(struct inode *inode,
+			struct extent_changeset *reserved, u64 start, u64 len);
 
 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes);
 void btrfs_qgroup_free_meta_all(struct btrfs_root *root);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 72ac7d7..f57180a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3098,8 +3098,8 @@ int prealloc_file_extent_cluster(struct inode *inode,
 		lock_extent(&BTRFS_I(inode)->io_tree, start, end);
 		num_bytes = end + 1 - start;
 		if (cur_offset < start)
-			btrfs_free_reserved_data_space(inode, cur_offset,
-					start - cur_offset);
+			btrfs_free_reserved_data_space(inode, &data_reserved,
+					cur_offset, start - cur_offset);
 		ret = btrfs_prealloc_file_range(inode, 0, start,
 						num_bytes, num_bytes,
 						end + 1, &alloc_hint);
@@ -3110,8 +3110,8 @@ int prealloc_file_extent_cluster(struct inode *inode,
 		nr++;
 	}
 	if (cur_offset < prealloc_end)
-		btrfs_free_reserved_data_space(inode, cur_offset,
-				       prealloc_end + 1 - cur_offset);
+		btrfs_free_reserved_data_space(inode, &data_reserved,
+				cur_offset, prealloc_end + 1 - cur_offset);
 out:
 	inode_unlock(inode);
 	extent_changeset_release(&data_reserved);
-- 
2.7.4




^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions
  2016-12-02  2:03 [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions Qu Wenruo
  2016-12-02  2:03 ` [PATCH 2/2] btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges Qu Wenruo
@ 2016-12-08 14:29 ` Chandan Rajendra
  2016-12-09  0:39   ` Qu Wenruo
  2016-12-13 12:54 ` Chandan Rajendra
  2 siblings, 1 reply; 6+ messages in thread
From: Chandan Rajendra @ 2016-12-08 14:29 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs, dsterba

On Friday, December 02, 2016 10:03:06 AM Qu Wenruo wrote:
> Introduce a new parameter, struct extent_changeset for
> btrfs_qgroup_reserved_data() and its callers.
> 
> Such extent_changeset was used in btrfs_qgroup_reserve_data() to record
> which range it reserved in current reserve, so it can free it at error
> path.
> 
> The reason we need to export it to callers is, at buffered write error
> path, without knowing what exactly which range we reserved in current
> allocation, we can free space which is not reserved by us.
> 
> This will lead to qgroup reserved space underflow.
>

Hi Qu,

On which git tree are these patches based off? This patch fails to apply
cleanly on kdave/for-next branch that is available as of today.

-- 
chandan


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions
  2016-12-08 14:29 ` [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions Chandan Rajendra
@ 2016-12-09  0:39   ` Qu Wenruo
  0 siblings, 0 replies; 6+ messages in thread
From: Qu Wenruo @ 2016-12-09  0:39 UTC (permalink / raw)
  To: Chandan Rajendra; +Cc: linux-btrfs, dsterba



At 12/08/2016 10:29 PM, Chandan Rajendra wrote:
> On Friday, December 02, 2016 10:03:06 AM Qu Wenruo wrote:
>> Introduce a new parameter, struct extent_changeset for
>> btrfs_qgroup_reserved_data() and its callers.
>>
>> Such extent_changeset was used in btrfs_qgroup_reserve_data() to record
>> which range it reserved in current reserve, so it can free it at error
>> path.
>>
>> The reason we need to export it to callers is, at buffered write error
>> path, without knowing what exactly which range we reserved in current
>> allocation, we can free space which is not reserved by us.
>>
>> This will lead to qgroup reserved space underflow.
>>
>
> Hi Qu,
>
> On which git tree are these patches based off? This patch fails to apply
> cleanly on kdave/for-next branch that is available as of today.
>
It's based on David's for-next-20161125 branch, but with several other 
qgroup fixes as prerequisite.
btrfs: qgroup: Fix qgroup reserved space underflow caused by buffered 
write and quota enable
btrfs: qgroup: Return actually freed bytes for qgroup release or free data
btrfs: Add trace point for qgroup reserved space
btrfs: Add WARN_ON for qgroup reserved underflow

Or you can get the branch from my github repo:
https://github.com/adam900710/linux.git qgroup_fixes

Thanks,
Qu



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions
  2016-12-02  2:03 [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions Qu Wenruo
  2016-12-02  2:03 ` [PATCH 2/2] btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges Qu Wenruo
  2016-12-08 14:29 ` [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions Chandan Rajendra
@ 2016-12-13 12:54 ` Chandan Rajendra
  2 siblings, 0 replies; 6+ messages in thread
From: Chandan Rajendra @ 2016-12-13 12:54 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs, dsterba

On Friday, December 02, 2016 10:03:06 AM Qu Wenruo wrote:
> Introduce a new parameter, struct extent_changeset for
> btrfs_qgroup_reserved_data() and its callers.
> 
> Such extent_changeset was used in btrfs_qgroup_reserve_data() to record
> which range it reserved in current reserve, so it can free it at error
> path.
> 
> The reason we need to export it to callers is, at buffered write error
> path, without knowing what exactly which range we reserved in current
> allocation, we can free space which is not reserved by us.
> 
> This will lead to qgroup reserved space underflow.

The changes look good to me.

Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>

-- 
chandan


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
  2016-12-02  2:03 ` [PATCH 2/2] btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges Qu Wenruo
@ 2016-12-13 12:56   ` Chandan Rajendra
  0 siblings, 0 replies; 6+ messages in thread
From: Chandan Rajendra @ 2016-12-13 12:56 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs, dsterba

On Friday, December 02, 2016 10:03:07 AM Qu Wenruo wrote:
> [BUG]
> For the following case, btrfs can underflow qgroup reserved space
> at error path:
> (Page size 4K, function name without "btrfs_" prefix)
> 
>          Task A                  |             Task B
> ----------------------------------------------------------------------
> Buffered_write [0, 2K)           |
> |- check_data_free_space()       |
> |  |- qgroup_reserve_data()      |
> |     Range aligned to page      |
> |     range [0, 4K)          <<< |
> |     4K bytes reserved      <<< |
> |- copy pages to page cache      |
>                                  | Buffered_write [2K, 4K)
>                                  | |- check_data_free_space()
>                                  | |  |- qgroup_reserved_data()
>                                  | |     Range alinged to page
>                                  | |     range [0, 4K)
>                                  | |     Already reserved by A <<<
>                                  | |     0 bytes reserved      <<<
>                                  | |- delalloc_reserve_metadata()
>                                  | |  And it *FAILED* (Maybe EQUOTA)
>                                  | |- free_reserved_data_space()
>                                       |- qgroup_free_data()
>                                          Range aligned to page range
>                                          [0, 4K)
>                                          Freeing 4K
> (Special thanks to Chandan for the detailed report and analyse)
> 
> [CAUSE]
> Above Task B is freeing reserved data range [0, 4K) which is actually
> reserved by Task A.
> 
> And at write back time, page dirty by Task A will go through writeback
> routine, which will free 4K reserved data space at file extent insert
> time, causing the qgroup underflow.
> 
> [FIX]
> For btrfs_qgroup_free_data(), add @reserved parameter to only free
> data ranges reserved by previous btrfs_qgroup_reserve_data().
> So in above case, Task B will try to free 0 byte, so no underflow.
>

The changes look good to me. Also, I did not notice any regressions when
executing fstests with the patch applied.

Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>

-- 
chandan


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-12-13 12:57 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-02  2:03 [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions Qu Wenruo
2016-12-02  2:03 ` [PATCH 2/2] btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges Qu Wenruo
2016-12-13 12:56   ` Chandan Rajendra
2016-12-08 14:29 ` [PATCH 1/2] btrfs: qgroup: Introduce extent changeset for qgroup reserve functions Chandan Rajendra
2016-12-09  0:39   ` Qu Wenruo
2016-12-13 12:54 ` Chandan Rajendra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.