* [PATCH 1/7] Btrfs: create a helper for getting chunk map
2017-02-18 1:28 [PATCH 0/7] cleanup __btrfs_map_block Liu Bo
@ 2017-02-18 1:28 ` Liu Bo
2017-02-20 3:20 ` Qu Wenruo
2017-02-18 1:28 ` [PATCH 2/7] Btrfs: separate DISCARD from __btrfs_map_block Liu Bo
` (8 subsequent siblings)
9 siblings, 1 reply; 20+ messages in thread
From: Liu Bo @ 2017-02-18 1:28 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
We have similar code here and there, this merges them into a helper.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
fs/btrfs/extent_io.c | 3 +-
fs/btrfs/volumes.c | 163 +++++++++++++++++----------------------------------
fs/btrfs/volumes.h | 2 +-
3 files changed, 57 insertions(+), 111 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4ac383a..609ece1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2007,14 +2007,13 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
u64 map_length = 0;
u64 sector;
struct btrfs_bio *bbio = NULL;
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
int ret;
ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
BUG_ON(!mirror_num);
/* we can't repair anything in raid56 yet */
- if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
+ if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num))
return 0;
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3c3c69c..c52b0fe 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2794,10 +2794,38 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info,
return ret;
}
+static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 length)
+{
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+
+ em_tree = &fs_info->mapping_tree.map_tree;
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, logical, length);
+ read_unlock(&em_tree->lock);
+
+ if (!em) {
+ btrfs_crit(fs_info, "unable to find logical %llu len %llu",
+ logical, length);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (em->start > logical || em->start + em->len < logical) {
+ btrfs_crit(fs_info,
+ "found a bad mapping, wanted %llu, found %llu-%llu",
+ logical, em->start, em->start + em->len);
+ free_extent_map(em);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* callers are responsible for dropping em's ref. */
+ return em;
+}
+
int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 chunk_offset)
{
- struct extent_map_tree *em_tree;
struct extent_map *em;
struct map_lookup *map;
u64 dev_extent_len = 0;
@@ -2805,23 +2833,15 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
int i, ret = 0;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
- em_tree = &fs_info->mapping_tree.map_tree;
-
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, chunk_offset, 1);
- read_unlock(&em_tree->lock);
-
- if (!em || em->start > chunk_offset ||
- em->start + em->len < chunk_offset) {
+ em = get_chunk_map(fs_info, chunk_offset, 1);
+ if (IS_ERR(em)) {
/*
* This is a logic error, but we don't want to just rely on the
* user having built with ASSERT enabled, so if ASSERT doesn't
* do anything we still error out.
*/
ASSERT(0);
- if (em)
- free_extent_map(em);
- return -EINVAL;
+ return PTR_ERR(em);
}
map = em->map_lookup;
mutex_lock(&fs_info->chunk_mutex);
@@ -4888,7 +4908,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_device *device;
struct btrfs_chunk *chunk;
struct btrfs_stripe *stripe;
- struct extent_map_tree *em_tree;
struct extent_map *em;
struct map_lookup *map;
size_t item_size;
@@ -4897,24 +4916,9 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
int i = 0;
int ret = 0;
- em_tree = &fs_info->mapping_tree.map_tree;
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size);
- read_unlock(&em_tree->lock);
-
- if (!em) {
- btrfs_crit(fs_info, "unable to find logical %Lu len %Lu",
- chunk_offset, chunk_size);
- return -EINVAL;
- }
-
- if (em->start != chunk_offset || em->len != chunk_size) {
- btrfs_crit(fs_info,
- "found a bad mapping, wanted %Lu-%Lu, found %Lu-%Lu",
- chunk_offset, chunk_size, em->start, em->len);
- free_extent_map(em);
- return -EINVAL;
- }
+ em = get_chunk_map(fs_info, chunk_offset, chunk_size);
+ if (IS_ERR(em))
+ return PTR_ERR(em);
map = em->map_lookup;
item_size = btrfs_chunk_item_size(map->num_stripes);
@@ -5057,15 +5061,12 @@ int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
{
struct extent_map *em;
struct map_lookup *map;
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
int readonly = 0;
int miss_ndevs = 0;
int i;
- read_lock(&map_tree->map_tree.lock);
- em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
- read_unlock(&map_tree->map_tree.lock);
- if (!em)
+ em = get_chunk_map(fs_info, chunk_offset, 1);
+ if (IS_ERR(em))
return 1;
map = em->map_lookup;
@@ -5119,34 +5120,19 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
{
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
struct extent_map *em;
struct map_lookup *map;
- struct extent_map_tree *em_tree = &map_tree->map_tree;
int ret;
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, logical, len);
- read_unlock(&em_tree->lock);
-
- /*
- * We could return errors for these cases, but that could get ugly and
- * we'd probably do the same thing which is just not do anything else
- * and exit, so return 1 so the callers don't try to use other copies.
- */
- if (!em) {
- btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical,
- logical+len);
- return 1;
- }
-
- if (em->start > logical || em->start + em->len < logical) {
- btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got %Lu-%Lu",
- logical, logical+len, em->start,
- em->start + em->len);
- free_extent_map(em);
+ em = get_chunk_map(fs_info, logical, len);
+ if (IS_ERR(em))
+ /*
+ * We could return errors for these cases, but that could get
+ * ugly and we'd probably do the same thing which is just not do
+ * anything else and exit, so return 1 so the callers don't try
+ * to use other copies.
+ */
return 1;
- }
map = em->map_lookup;
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
@@ -5175,15 +5161,11 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
{
struct extent_map *em;
struct map_lookup *map;
- struct extent_map_tree *em_tree = &map_tree->map_tree;
unsigned long len = fs_info->sectorsize;
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, logical, len);
- read_unlock(&em_tree->lock);
- BUG_ON(!em);
+ em = get_chunk_map(fs_info, logical, len);
+ BUG_ON(IS_ERR(em));
- BUG_ON(em->start > logical || em->start + em->len < logical);
map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
len = map->stripe_len * nr_data_stripes(map);
@@ -5191,20 +5173,16 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
return len;
}
-int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
+int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
u64 logical, u64 len, int mirror_num)
{
struct extent_map *em;
struct map_lookup *map;
- struct extent_map_tree *em_tree = &map_tree->map_tree;
int ret = 0;
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, logical, len);
- read_unlock(&em_tree->lock);
- BUG_ON(!em);
+ em = get_chunk_map(fs_info, logical, len);
+ BUG_ON(IS_ERR(em));
- BUG_ON(em->start > logical || em->start + em->len < logical);
map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
ret = 1;
@@ -5324,8 +5302,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
{
struct extent_map *em;
struct map_lookup *map;
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
- struct extent_map_tree *em_tree = &map_tree->map_tree;
u64 offset;
u64 stripe_offset;
u64 stripe_end_offset;
@@ -5347,23 +5323,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
u64 physical_to_patch_in_first_stripe = 0;
u64 raid56_full_stripe_start = (u64)-1;
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, logical, *length);
- read_unlock(&em_tree->lock);
-
- if (!em) {
- btrfs_crit(fs_info, "unable to find logical %llu len %llu",
- logical, *length);
- return -EINVAL;
- }
-
- if (em->start > logical || em->start + em->len < logical) {
- btrfs_crit(fs_info,
- "found a bad mapping, wanted %Lu, found %Lu-%Lu",
- logical, em->start, em->start + em->len);
- free_extent_map(em);
- return -EINVAL;
- }
+ em = get_chunk_map(fs_info, logical, *length);
+ if (IS_ERR(em))
+ return PTR_ERR(em);
map = em->map_lookup;
offset = logical - em->start;
@@ -5899,8 +5861,6 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
u64 chunk_start, u64 physical, u64 devid,
u64 **logical, int *naddrs, int *stripe_len)
{
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
- struct extent_map_tree *em_tree = &map_tree->map_tree;
struct extent_map *em;
struct map_lookup *map;
u64 *buf;
@@ -5910,24 +5870,11 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
u64 rmap_len;
int i, j, nr = 0;
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, chunk_start, 1);
- read_unlock(&em_tree->lock);
-
- if (!em) {
- btrfs_err(fs_info, "couldn't find em for chunk %Lu",
- chunk_start);
+ em = get_chunk_map(fs_info, chunk_start, 1);
+ if (IS_ERR(em))
return -EIO;
- }
- if (em->start != chunk_start) {
- btrfs_err(fs_info, "bad chunk start, em=%Lu, wanted=%Lu",
- em->start, chunk_start);
- free_extent_map(em);
- return -EIO;
- }
map = em->map_lookup;
-
length = em->len;
rmap_len = map->stripe_len;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 24ba6bc..bd5f6cd 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -475,7 +475,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev);
void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path);
-int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
+int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
u64 logical, u64 len, int mirror_num);
unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
struct btrfs_mapping_tree *map_tree,
--
2.5.5
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 1/7] Btrfs: create a helper for getting chunk map
2017-02-18 1:28 ` [PATCH 1/7] Btrfs: create a helper for getting chunk map Liu Bo
@ 2017-02-20 3:20 ` Qu Wenruo
2017-03-01 2:19 ` Liu Bo
0 siblings, 1 reply; 20+ messages in thread
From: Qu Wenruo @ 2017-02-20 3:20 UTC (permalink / raw)
To: Liu Bo, linux-btrfs; +Cc: David Sterba
At 02/18/2017 09:28 AM, Liu Bo wrote:
> We have similar code here and there, this merges them into a helper.
>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Looks good overall.
Although small nitpick inlined below.
> ---
> fs/btrfs/extent_io.c | 3 +-
> fs/btrfs/volumes.c | 163 +++++++++++++++++----------------------------------
> fs/btrfs/volumes.h | 2 +-
> 3 files changed, 57 insertions(+), 111 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 4ac383a..609ece1 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2007,14 +2007,13 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
> u64 map_length = 0;
> u64 sector;
> struct btrfs_bio *bbio = NULL;
> - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> int ret;
>
> ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
> BUG_ON(!mirror_num);
>
> /* we can't repair anything in raid56 yet */
> - if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
> + if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num))
Not sure if such small parameter cleanup can be split into a separate patch.
At least it's less related to the get_chunk_map() helper.
> return 0;
>
> bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 3c3c69c..c52b0fe 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -2794,10 +2794,38 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info,
> return ret;
> }
>
> +static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
> + u64 logical, u64 length)
> +{
> + struct extent_map_tree *em_tree;
> + struct extent_map *em;
> +
> + em_tree = &fs_info->mapping_tree.map_tree;
> + read_lock(&em_tree->lock);
> + em = lookup_extent_mapping(em_tree, logical, length);
> + read_unlock(&em_tree->lock);
> +
> + if (!em) {
> + btrfs_crit(fs_info, "unable to find logical %llu len %llu",
> + logical, length);
Nice error message, would be quite helpful when we hit some bug later.
> + return ERR_PTR(-EINVAL);
Normally I'd return -ENOENT, not sure what's the correct return here though.
> + }
> +
> + if (em->start > logical || em->start + em->len < logical) {
> + btrfs_crit(fs_info,
> + "found a bad mapping, wanted %llu, found %llu-%llu",
> + logical, em->start, em->start + em->len);
Better outputting @length also.
Thanks,
Qu
> + free_extent_map(em);
> + return ERR_PTR(-EINVAL);
> + }
> +
> + /* callers are responsible for dropping em's ref. */
> + return em;
> +}
> +
> int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
> struct btrfs_fs_info *fs_info, u64 chunk_offset)
> {
> - struct extent_map_tree *em_tree;
> struct extent_map *em;
> struct map_lookup *map;
> u64 dev_extent_len = 0;
> @@ -2805,23 +2833,15 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
> int i, ret = 0;
> struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
>
> - em_tree = &fs_info->mapping_tree.map_tree;
> -
> - read_lock(&em_tree->lock);
> - em = lookup_extent_mapping(em_tree, chunk_offset, 1);
> - read_unlock(&em_tree->lock);
> -
> - if (!em || em->start > chunk_offset ||
> - em->start + em->len < chunk_offset) {
> + em = get_chunk_map(fs_info, chunk_offset, 1);
> + if (IS_ERR(em)) {
> /*
> * This is a logic error, but we don't want to just rely on the
> * user having built with ASSERT enabled, so if ASSERT doesn't
> * do anything we still error out.
> */
> ASSERT(0);
> - if (em)
> - free_extent_map(em);
> - return -EINVAL;
> + return PTR_ERR(em);
> }
> map = em->map_lookup;
> mutex_lock(&fs_info->chunk_mutex);
> @@ -4888,7 +4908,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
> struct btrfs_device *device;
> struct btrfs_chunk *chunk;
> struct btrfs_stripe *stripe;
> - struct extent_map_tree *em_tree;
> struct extent_map *em;
> struct map_lookup *map;
> size_t item_size;
> @@ -4897,24 +4916,9 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
> int i = 0;
> int ret = 0;
>
> - em_tree = &fs_info->mapping_tree.map_tree;
> - read_lock(&em_tree->lock);
> - em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size);
> - read_unlock(&em_tree->lock);
> -
> - if (!em) {
> - btrfs_crit(fs_info, "unable to find logical %Lu len %Lu",
> - chunk_offset, chunk_size);
> - return -EINVAL;
> - }
> -
> - if (em->start != chunk_offset || em->len != chunk_size) {
> - btrfs_crit(fs_info,
> - "found a bad mapping, wanted %Lu-%Lu, found %Lu-%Lu",
> - chunk_offset, chunk_size, em->start, em->len);
> - free_extent_map(em);
> - return -EINVAL;
> - }
> + em = get_chunk_map(fs_info, chunk_offset, chunk_size);
> + if (IS_ERR(em))
> + return PTR_ERR(em);
>
> map = em->map_lookup;
> item_size = btrfs_chunk_item_size(map->num_stripes);
> @@ -5057,15 +5061,12 @@ int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
> {
> struct extent_map *em;
> struct map_lookup *map;
> - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> int readonly = 0;
> int miss_ndevs = 0;
> int i;
>
> - read_lock(&map_tree->map_tree.lock);
> - em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
> - read_unlock(&map_tree->map_tree.lock);
> - if (!em)
> + em = get_chunk_map(fs_info, chunk_offset, 1);
> + if (IS_ERR(em))
> return 1;
>
> map = em->map_lookup;
> @@ -5119,34 +5120,19 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
>
> int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
> {
> - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> struct extent_map *em;
> struct map_lookup *map;
> - struct extent_map_tree *em_tree = &map_tree->map_tree;
> int ret;
>
> - read_lock(&em_tree->lock);
> - em = lookup_extent_mapping(em_tree, logical, len);
> - read_unlock(&em_tree->lock);
> -
> - /*
> - * We could return errors for these cases, but that could get ugly and
> - * we'd probably do the same thing which is just not do anything else
> - * and exit, so return 1 so the callers don't try to use other copies.
> - */
> - if (!em) {
> - btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical,
> - logical+len);
> - return 1;
> - }
> -
> - if (em->start > logical || em->start + em->len < logical) {
> - btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got %Lu-%Lu",
> - logical, logical+len, em->start,
> - em->start + em->len);
> - free_extent_map(em);
> + em = get_chunk_map(fs_info, logical, len);
> + if (IS_ERR(em))
> + /*
> + * We could return errors for these cases, but that could get
> + * ugly and we'd probably do the same thing which is just not do
> + * anything else and exit, so return 1 so the callers don't try
> + * to use other copies.
> + */
> return 1;
> - }
>
> map = em->map_lookup;
> if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
> @@ -5175,15 +5161,11 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
> {
> struct extent_map *em;
> struct map_lookup *map;
> - struct extent_map_tree *em_tree = &map_tree->map_tree;
> unsigned long len = fs_info->sectorsize;
>
> - read_lock(&em_tree->lock);
> - em = lookup_extent_mapping(em_tree, logical, len);
> - read_unlock(&em_tree->lock);
> - BUG_ON(!em);
> + em = get_chunk_map(fs_info, logical, len);
> + BUG_ON(IS_ERR(em));
>
> - BUG_ON(em->start > logical || em->start + em->len < logical);
> map = em->map_lookup;
> if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
> len = map->stripe_len * nr_data_stripes(map);
> @@ -5191,20 +5173,16 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
> return len;
> }
>
> -int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
> +int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
> u64 logical, u64 len, int mirror_num)
> {
> struct extent_map *em;
> struct map_lookup *map;
> - struct extent_map_tree *em_tree = &map_tree->map_tree;
> int ret = 0;
>
> - read_lock(&em_tree->lock);
> - em = lookup_extent_mapping(em_tree, logical, len);
> - read_unlock(&em_tree->lock);
> - BUG_ON(!em);
> + em = get_chunk_map(fs_info, logical, len);
> + BUG_ON(IS_ERR(em));
>
> - BUG_ON(em->start > logical || em->start + em->len < logical);
> map = em->map_lookup;
> if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
> ret = 1;
> @@ -5324,8 +5302,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> {
> struct extent_map *em;
> struct map_lookup *map;
> - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> - struct extent_map_tree *em_tree = &map_tree->map_tree;
> u64 offset;
> u64 stripe_offset;
> u64 stripe_end_offset;
> @@ -5347,23 +5323,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> u64 physical_to_patch_in_first_stripe = 0;
> u64 raid56_full_stripe_start = (u64)-1;
>
> - read_lock(&em_tree->lock);
> - em = lookup_extent_mapping(em_tree, logical, *length);
> - read_unlock(&em_tree->lock);
> -
> - if (!em) {
> - btrfs_crit(fs_info, "unable to find logical %llu len %llu",
> - logical, *length);
> - return -EINVAL;
> - }
> -
> - if (em->start > logical || em->start + em->len < logical) {
> - btrfs_crit(fs_info,
> - "found a bad mapping, wanted %Lu, found %Lu-%Lu",
> - logical, em->start, em->start + em->len);
> - free_extent_map(em);
> - return -EINVAL;
> - }
> + em = get_chunk_map(fs_info, logical, *length);
> + if (IS_ERR(em))
> + return PTR_ERR(em);
>
> map = em->map_lookup;
> offset = logical - em->start;
> @@ -5899,8 +5861,6 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
> u64 chunk_start, u64 physical, u64 devid,
> u64 **logical, int *naddrs, int *stripe_len)
> {
> - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> - struct extent_map_tree *em_tree = &map_tree->map_tree;
> struct extent_map *em;
> struct map_lookup *map;
> u64 *buf;
> @@ -5910,24 +5870,11 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
> u64 rmap_len;
> int i, j, nr = 0;
>
> - read_lock(&em_tree->lock);
> - em = lookup_extent_mapping(em_tree, chunk_start, 1);
> - read_unlock(&em_tree->lock);
> -
> - if (!em) {
> - btrfs_err(fs_info, "couldn't find em for chunk %Lu",
> - chunk_start);
> + em = get_chunk_map(fs_info, chunk_start, 1);
> + if (IS_ERR(em))
> return -EIO;
> - }
>
> - if (em->start != chunk_start) {
> - btrfs_err(fs_info, "bad chunk start, em=%Lu, wanted=%Lu",
> - em->start, chunk_start);
> - free_extent_map(em);
> - return -EIO;
> - }
> map = em->map_lookup;
> -
> length = em->len;
> rmap_len = map->stripe_len;
>
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 24ba6bc..bd5f6cd 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -475,7 +475,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
> void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
> struct btrfs_device *tgtdev);
> void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path);
> -int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
> +int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
> u64 logical, u64 len, int mirror_num);
> unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
> struct btrfs_mapping_tree *map_tree,
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 1/7] Btrfs: create a helper for getting chunk map
2017-02-20 3:20 ` Qu Wenruo
@ 2017-03-01 2:19 ` Liu Bo
0 siblings, 0 replies; 20+ messages in thread
From: Liu Bo @ 2017-03-01 2:19 UTC (permalink / raw)
To: Qu Wenruo; +Cc: linux-btrfs, David Sterba
On Mon, Feb 20, 2017 at 11:20:33AM +0800, Qu Wenruo wrote:
>
>
> At 02/18/2017 09:28 AM, Liu Bo wrote:
> > We have similar code here and there, this merges them into a helper.
> >
> > Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
>
> Looks good overall.
>
> Although small nitpick inlined below.
Thank you for going through this.
> > ---
> > fs/btrfs/extent_io.c | 3 +-
> > fs/btrfs/volumes.c | 163 +++++++++++++++++----------------------------------
> > fs/btrfs/volumes.h | 2 +-
> > 3 files changed, 57 insertions(+), 111 deletions(-)
> >
> > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > index 4ac383a..609ece1 100644
> > --- a/fs/btrfs/extent_io.c
> > +++ b/fs/btrfs/extent_io.c
> > @@ -2007,14 +2007,13 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
> > u64 map_length = 0;
> > u64 sector;
> > struct btrfs_bio *bbio = NULL;
> > - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> > int ret;
> >
> > ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
> > BUG_ON(!mirror_num);
> >
> > /* we can't repair anything in raid56 yet */
> > - if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
> > + if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num))
>
> Not sure if such small parameter cleanup can be split into a separate patch.
> At least it's less related to the get_chunk_map() helper.
>
But it's not a cleanup, it is get_chunk_map() that needs @fs_info.
> > return 0;
> >
> > bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
> > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> > index 3c3c69c..c52b0fe 100644
> > --- a/fs/btrfs/volumes.c
> > +++ b/fs/btrfs/volumes.c
> > @@ -2794,10 +2794,38 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info,
> > return ret;
> > }
> >
> > +static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
> > + u64 logical, u64 length)
> > +{
> > + struct extent_map_tree *em_tree;
> > + struct extent_map *em;
> > +
> > + em_tree = &fs_info->mapping_tree.map_tree;
> > + read_lock(&em_tree->lock);
> > + em = lookup_extent_mapping(em_tree, logical, length);
> > + read_unlock(&em_tree->lock);
> > +
> > + if (!em) {
> > + btrfs_crit(fs_info, "unable to find logical %llu len %llu",
> > + logical, length);
>
> Nice error message, would be quite helpful when we hit some bug later.
>
> > + return ERR_PTR(-EINVAL);
>
> Normally I'd return -ENOENT, not sure what's the correct return here though.
>
So I tried to be consistent with the error handling of other places of searching
chunk mapping tree.
I think EINVAL makes sense here, either @logical or @length is not valid.
> > + }
> > +
> > + if (em->start > logical || em->start + em->len < logical) {
> > + btrfs_crit(fs_info,
> > + "found a bad mapping, wanted %llu, found %llu-%llu",
> > + logical, em->start, em->start + em->len);
>
> Better outputting @length also.
>
OK, I'll update it with @length.
Thanks,
-liubo
> Thanks,
> Qu
>
> > + free_extent_map(em);
> > + return ERR_PTR(-EINVAL);
> > + }
> > +
> > + /* callers are responsible for dropping em's ref. */
> > + return em;
> > +}
> > +
> > int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
> > struct btrfs_fs_info *fs_info, u64 chunk_offset)
> > {
> > - struct extent_map_tree *em_tree;
> > struct extent_map *em;
> > struct map_lookup *map;
> > u64 dev_extent_len = 0;
> > @@ -2805,23 +2833,15 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
> > int i, ret = 0;
> > struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> >
> > - em_tree = &fs_info->mapping_tree.map_tree;
> > -
> > - read_lock(&em_tree->lock);
> > - em = lookup_extent_mapping(em_tree, chunk_offset, 1);
> > - read_unlock(&em_tree->lock);
> > -
> > - if (!em || em->start > chunk_offset ||
> > - em->start + em->len < chunk_offset) {
> > + em = get_chunk_map(fs_info, chunk_offset, 1);
> > + if (IS_ERR(em)) {
> > /*
> > * This is a logic error, but we don't want to just rely on the
> > * user having built with ASSERT enabled, so if ASSERT doesn't
> > * do anything we still error out.
> > */
> > ASSERT(0);
> > - if (em)
> > - free_extent_map(em);
> > - return -EINVAL;
> > + return PTR_ERR(em);
> > }
> > map = em->map_lookup;
> > mutex_lock(&fs_info->chunk_mutex);
> > @@ -4888,7 +4908,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
> > struct btrfs_device *device;
> > struct btrfs_chunk *chunk;
> > struct btrfs_stripe *stripe;
> > - struct extent_map_tree *em_tree;
> > struct extent_map *em;
> > struct map_lookup *map;
> > size_t item_size;
> > @@ -4897,24 +4916,9 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
> > int i = 0;
> > int ret = 0;
> >
> > - em_tree = &fs_info->mapping_tree.map_tree;
> > - read_lock(&em_tree->lock);
> > - em = lookup_extent_mapping(em_tree, chunk_offset, chunk_size);
> > - read_unlock(&em_tree->lock);
> > -
> > - if (!em) {
> > - btrfs_crit(fs_info, "unable to find logical %Lu len %Lu",
> > - chunk_offset, chunk_size);
> > - return -EINVAL;
> > - }
> > -
> > - if (em->start != chunk_offset || em->len != chunk_size) {
> > - btrfs_crit(fs_info,
> > - "found a bad mapping, wanted %Lu-%Lu, found %Lu-%Lu",
> > - chunk_offset, chunk_size, em->start, em->len);
> > - free_extent_map(em);
> > - return -EINVAL;
> > - }
> > + em = get_chunk_map(fs_info, chunk_offset, chunk_size);
> > + if (IS_ERR(em))
> > + return PTR_ERR(em);
> >
> > map = em->map_lookup;
> > item_size = btrfs_chunk_item_size(map->num_stripes);
> > @@ -5057,15 +5061,12 @@ int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
> > {
> > struct extent_map *em;
> > struct map_lookup *map;
> > - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> > int readonly = 0;
> > int miss_ndevs = 0;
> > int i;
> >
> > - read_lock(&map_tree->map_tree.lock);
> > - em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
> > - read_unlock(&map_tree->map_tree.lock);
> > - if (!em)
> > + em = get_chunk_map(fs_info, chunk_offset, 1);
> > + if (IS_ERR(em))
> > return 1;
> >
> > map = em->map_lookup;
> > @@ -5119,34 +5120,19 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
> >
> > int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
> > {
> > - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> > struct extent_map *em;
> > struct map_lookup *map;
> > - struct extent_map_tree *em_tree = &map_tree->map_tree;
> > int ret;
> >
> > - read_lock(&em_tree->lock);
> > - em = lookup_extent_mapping(em_tree, logical, len);
> > - read_unlock(&em_tree->lock);
> > -
> > - /*
> > - * We could return errors for these cases, but that could get ugly and
> > - * we'd probably do the same thing which is just not do anything else
> > - * and exit, so return 1 so the callers don't try to use other copies.
> > - */
> > - if (!em) {
> > - btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical,
> > - logical+len);
> > - return 1;
> > - }
> > -
> > - if (em->start > logical || em->start + em->len < logical) {
> > - btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got %Lu-%Lu",
> > - logical, logical+len, em->start,
> > - em->start + em->len);
> > - free_extent_map(em);
> > + em = get_chunk_map(fs_info, logical, len);
> > + if (IS_ERR(em))
> > + /*
> > + * We could return errors for these cases, but that could get
> > + * ugly and we'd probably do the same thing which is just not do
> > + * anything else and exit, so return 1 so the callers don't try
> > + * to use other copies.
> > + */
> > return 1;
> > - }
> >
> > map = em->map_lookup;
> > if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
> > @@ -5175,15 +5161,11 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
> > {
> > struct extent_map *em;
> > struct map_lookup *map;
> > - struct extent_map_tree *em_tree = &map_tree->map_tree;
> > unsigned long len = fs_info->sectorsize;
> >
> > - read_lock(&em_tree->lock);
> > - em = lookup_extent_mapping(em_tree, logical, len);
> > - read_unlock(&em_tree->lock);
> > - BUG_ON(!em);
> > + em = get_chunk_map(fs_info, logical, len);
> > + BUG_ON(IS_ERR(em));
> >
> > - BUG_ON(em->start > logical || em->start + em->len < logical);
> > map = em->map_lookup;
> > if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
> > len = map->stripe_len * nr_data_stripes(map);
> > @@ -5191,20 +5173,16 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
> > return len;
> > }
> >
> > -int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
> > +int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
> > u64 logical, u64 len, int mirror_num)
> > {
> > struct extent_map *em;
> > struct map_lookup *map;
> > - struct extent_map_tree *em_tree = &map_tree->map_tree;
> > int ret = 0;
> >
> > - read_lock(&em_tree->lock);
> > - em = lookup_extent_mapping(em_tree, logical, len);
> > - read_unlock(&em_tree->lock);
> > - BUG_ON(!em);
> > + em = get_chunk_map(fs_info, logical, len);
> > + BUG_ON(IS_ERR(em));
> >
> > - BUG_ON(em->start > logical || em->start + em->len < logical);
> > map = em->map_lookup;
> > if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
> > ret = 1;
> > @@ -5324,8 +5302,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> > {
> > struct extent_map *em;
> > struct map_lookup *map;
> > - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> > - struct extent_map_tree *em_tree = &map_tree->map_tree;
> > u64 offset;
> > u64 stripe_offset;
> > u64 stripe_end_offset;
> > @@ -5347,23 +5323,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> > u64 physical_to_patch_in_first_stripe = 0;
> > u64 raid56_full_stripe_start = (u64)-1;
> >
> > - read_lock(&em_tree->lock);
> > - em = lookup_extent_mapping(em_tree, logical, *length);
> > - read_unlock(&em_tree->lock);
> > -
> > - if (!em) {
> > - btrfs_crit(fs_info, "unable to find logical %llu len %llu",
> > - logical, *length);
> > - return -EINVAL;
> > - }
> > -
> > - if (em->start > logical || em->start + em->len < logical) {
> > - btrfs_crit(fs_info,
> > - "found a bad mapping, wanted %Lu, found %Lu-%Lu",
> > - logical, em->start, em->start + em->len);
> > - free_extent_map(em);
> > - return -EINVAL;
> > - }
> > + em = get_chunk_map(fs_info, logical, *length);
> > + if (IS_ERR(em))
> > + return PTR_ERR(em);
> >
> > map = em->map_lookup;
> > offset = logical - em->start;
> > @@ -5899,8 +5861,6 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
> > u64 chunk_start, u64 physical, u64 devid,
> > u64 **logical, int *naddrs, int *stripe_len)
> > {
> > - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> > - struct extent_map_tree *em_tree = &map_tree->map_tree;
> > struct extent_map *em;
> > struct map_lookup *map;
> > u64 *buf;
> > @@ -5910,24 +5870,11 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
> > u64 rmap_len;
> > int i, j, nr = 0;
> >
> > - read_lock(&em_tree->lock);
> > - em = lookup_extent_mapping(em_tree, chunk_start, 1);
> > - read_unlock(&em_tree->lock);
> > -
> > - if (!em) {
> > - btrfs_err(fs_info, "couldn't find em for chunk %Lu",
> > - chunk_start);
> > + em = get_chunk_map(fs_info, chunk_start, 1);
> > + if (IS_ERR(em))
> > return -EIO;
> > - }
> >
> > - if (em->start != chunk_start) {
> > - btrfs_err(fs_info, "bad chunk start, em=%Lu, wanted=%Lu",
> > - em->start, chunk_start);
> > - free_extent_map(em);
> > - return -EIO;
> > - }
> > map = em->map_lookup;
> > -
> > length = em->len;
> > rmap_len = map->stripe_len;
> >
> > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> > index 24ba6bc..bd5f6cd 100644
> > --- a/fs/btrfs/volumes.h
> > +++ b/fs/btrfs/volumes.h
> > @@ -475,7 +475,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
> > void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
> > struct btrfs_device *tgtdev);
> > void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path);
> > -int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
> > +int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
> > u64 logical, u64 len, int mirror_num);
> > unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
> > struct btrfs_mapping_tree *map_tree,
> >
>
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 2/7] Btrfs: separate DISCARD from __btrfs_map_block
2017-02-18 1:28 [PATCH 0/7] cleanup __btrfs_map_block Liu Bo
2017-02-18 1:28 ` [PATCH 1/7] Btrfs: create a helper for getting chunk map Liu Bo
@ 2017-02-18 1:28 ` Liu Bo
2017-02-20 3:54 ` Qu Wenruo
2017-02-18 1:28 ` [PATCH 3/7] Btrfs: introduce a function to get extra mirror from replace Liu Bo
` (7 subsequent siblings)
9 siblings, 1 reply; 20+ messages in thread
From: Liu Bo @ 2017-02-18 1:28 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
Since DISCARD is not as important as an operation like write, we don't
copy it to target device during replace, and it makes __btrfs_map_block
less complex.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
fs/btrfs/volumes.c | 306 +++++++++++++++++++++++++++++++++--------------------
1 file changed, 192 insertions(+), 114 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c52b0fe..96228f3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5294,6 +5294,175 @@ void btrfs_put_bbio(struct btrfs_bio *bbio)
kfree(bbio);
}
+/* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */
+/*
+ * Please note that, discard won't be sent to target device of device
+ * replace.
+ */
+static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 length,
+ struct btrfs_bio **bbio_ret)
+{
+ struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+ struct extent_map *em;
+ struct map_lookup *map;
+ struct btrfs_bio *bbio;
+ u64 offset;
+ u64 stripe_nr;
+ u64 stripe_nr_end;
+ u64 stripe_end_offset;
+ u64 stripe_cnt;
+ u64 stripe_len;
+ u64 stripe_offset;
+ u64 num_stripes;
+ u32 stripe_index;
+ u32 factor = 0;
+ u32 sub_stripes = 0;
+ u64 stripes_per_dev = 0;
+ u32 remaining_stripes = 0;
+ u32 last_stripe = 0;
+ int ret = 0;
+ int i;
+
+ /* discard always return a bbio */
+ ASSERT(bbio_ret);
+
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, logical, length);
+ read_unlock(&em_tree->lock);
+
+ if (!em) {
+ btrfs_crit(fs_info, "unable to find logical %llu len %llu",
+ logical, length);
+ return -EINVAL;
+ }
+
+ if (em->start > logical || em->start + em->len < logical) {
+ btrfs_crit(fs_info,
+ "found a bad mapping, wanted %Lu, found %Lu-%Lu",
+ logical, em->start, em->start + em->len);
+ free_extent_map(em);
+ return -EINVAL;
+ }
+
+ map = em->map_lookup;
+ /* we don't discard raid56 yet */
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ offset = logical - em->start;
+ length = min_t(u64, em->len - offset, length);
+
+ stripe_len = map->stripe_len;
+ /*
+ * stripe_nr counts the total number of stripes we have to stride
+ * to get to this block
+ */
+ stripe_nr = div64_u64(offset, stripe_len);
+ stripe_offset = stripe_nr * stripe_len;
+ ASSERT(offset >= stripe_offset);
+
+ /* stripe_offset is the offset of this block in its stripe */
+ stripe_offset = offset - stripe_offset;
+
+ stripe_nr_end = ALIGN(offset + length, map->stripe_len);
+ stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
+ stripe_cnt = stripe_nr_end - stripe_nr;
+ stripe_end_offset = stripe_nr_end * map->stripe_len -
+ (offset + length);
+ /*
+ * after this, stripe_nr is the number of stripes on this
+ * device we have to walk to find the data, and stripe_index is
+ * the number of our device in the stripe array
+ */
+ num_stripes = 1;
+ stripe_index = 0;
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID10)) {
+ if (map->type & BTRFS_BLOCK_GROUP_RAID0)
+ sub_stripes = 1;
+ else
+ sub_stripes = map->sub_stripes;
+
+ factor = map->num_stripes / sub_stripes;
+ num_stripes = min_t(u64, map->num_stripes,
+ sub_stripes * stripe_cnt);
+ stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
+ stripe_index *= sub_stripes;
+ stripes_per_dev = div_u64_rem(stripe_cnt, factor,
+ &remaining_stripes);
+ div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
+ last_stripe *= sub_stripes;
+ } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_DUP)) {
+ num_stripes = map->num_stripes;
+ } else {
+ stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
+ &stripe_index);
+ }
+
+ bbio = alloc_btrfs_bio(num_stripes, 0);
+ if (!bbio) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < num_stripes; i++) {
+ bbio->stripes[i].physical =
+ map->stripes[stripe_index].physical +
+ stripe_offset + stripe_nr * map->stripe_len;
+ bbio->stripes[i].dev = map->stripes[stripe_index].dev;
+
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+ BTRFS_BLOCK_GROUP_RAID10)) {
+ bbio->stripes[i].length = stripes_per_dev *
+ map->stripe_len;
+
+ if (i / sub_stripes < remaining_stripes)
+ bbio->stripes[i].length +=
+ map->stripe_len;
+
+ /*
+ * Special for the first stripe and
+ * the last stripe:
+ *
+ * |-------|...|-------|
+ * |----------|
+ * off end_off
+ */
+ if (i < sub_stripes)
+ bbio->stripes[i].length -=
+ stripe_offset;
+
+ if (stripe_index >= last_stripe &&
+ stripe_index <= (last_stripe +
+ sub_stripes - 1))
+ bbio->stripes[i].length -=
+ stripe_end_offset;
+
+ if (i == sub_stripes - 1)
+ stripe_offset = 0;
+ } else {
+ bbio->stripes[i].length = length;
+ }
+
+ stripe_index++;
+ if (stripe_index == map->num_stripes) {
+ stripe_index = 0;
+ stripe_nr++;
+ }
+ }
+
+ *bbio_ret = bbio;
+ bbio->map_type = map->type;
+ bbio->num_stripes = num_stripes;
+out:
+ free_extent_map(em);
+ return ret;
+}
+
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
enum btrfs_map_op op,
u64 logical, u64 *length,
@@ -5304,10 +5473,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
struct map_lookup *map;
u64 offset;
u64 stripe_offset;
- u64 stripe_end_offset;
u64 stripe_nr;
- u64 stripe_nr_orig;
- u64 stripe_nr_end;
u64 stripe_len;
u32 stripe_index;
int i;
@@ -5323,6 +5489,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
u64 physical_to_patch_in_first_stripe = 0;
u64 raid56_full_stripe_start = (u64)-1;
+ if (op == BTRFS_MAP_DISCARD)
+ return __btrfs_map_block_for_discard(fs_info, logical,
+ *length, bbio_ret);
+
em = get_chunk_map(fs_info, logical, *length);
if (IS_ERR(em))
return PTR_ERR(em);
@@ -5364,14 +5534,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
raid56_full_stripe_start *= full_stripe_len;
}
- if (op == BTRFS_MAP_DISCARD) {
- /* we don't discard raid56 yet */
- if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- ret = -EOPNOTSUPP;
- goto out;
- }
- *length = min_t(u64, em->len - offset, *length);
- } else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
u64 max_len;
/* For writes to RAID[56], allow a full stripeset across all disks.
For other RAID types and for RAID[56] reads, just allow a single
@@ -5402,8 +5565,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
btrfs_dev_replace_set_lock_blocking(dev_replace);
if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
- op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
- op != BTRFS_MAP_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) {
+ op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS &&
+ dev_replace->tgtdev != NULL) {
/*
* in dev-replace case, for repair case (that's the only
* case where the mirror is selected explicitly when
@@ -5483,24 +5646,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
num_stripes = 1;
stripe_index = 0;
- stripe_nr_orig = stripe_nr;
- stripe_nr_end = ALIGN(offset + *length, map->stripe_len);
- stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
- stripe_end_offset = stripe_nr_end * map->stripe_len -
- (offset + *length);
-
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
- if (op == BTRFS_MAP_DISCARD)
- num_stripes = min_t(u64, map->num_stripes,
- stripe_nr_end - stripe_nr_orig);
stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
&stripe_index);
- if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
- op != BTRFS_MAP_GET_READ_MIRRORS)
+ if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS)
mirror_num = 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
- if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD ||
- op == BTRFS_MAP_GET_READ_MIRRORS)
+ if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
@@ -5513,8 +5665,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
}
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
- if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD ||
- op == BTRFS_MAP_GET_READ_MIRRORS) {
+ if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) {
num_stripes = map->num_stripes;
} else if (mirror_num) {
stripe_index = mirror_num - 1;
@@ -5530,10 +5681,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
num_stripes = map->sub_stripes;
- else if (op == BTRFS_MAP_DISCARD)
- num_stripes = min_t(u64, map->sub_stripes *
- (stripe_nr_end - stripe_nr_orig),
- map->num_stripes);
else if (mirror_num)
stripe_index += mirror_num - 1;
else {
@@ -5576,8 +5723,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
/* We distribute the parity blocks across stripes */
div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
&stripe_index);
- if ((op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
- op != BTRFS_MAP_GET_READ_MIRRORS) && mirror_num <= 1)
+ if ((op != BTRFS_MAP_WRITE &&
+ op != BTRFS_MAP_GET_READ_MIRRORS) &&
+ mirror_num <= 1)
mirror_num = 1;
}
} else {
@@ -5600,7 +5748,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
num_alloc_stripes = num_stripes;
if (dev_replace_is_ongoing) {
- if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD)
+ if (op == BTRFS_MAP_WRITE)
num_alloc_stripes <<= 1;
if (op == BTRFS_MAP_GET_READ_MIRRORS)
num_alloc_stripes++;
@@ -5643,84 +5791,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
RAID6_Q_STRIPE;
}
- if (op == BTRFS_MAP_DISCARD) {
- u32 factor = 0;
- u32 sub_stripes = 0;
- u64 stripes_per_dev = 0;
- u32 remaining_stripes = 0;
- u32 last_stripe = 0;
- if (map->type &
- (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
- if (map->type & BTRFS_BLOCK_GROUP_RAID0)
- sub_stripes = 1;
- else
- sub_stripes = map->sub_stripes;
-
- factor = map->num_stripes / sub_stripes;
- stripes_per_dev = div_u64_rem(stripe_nr_end -
- stripe_nr_orig,
- factor,
- &remaining_stripes);
- div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
- last_stripe *= sub_stripes;
- }
-
- for (i = 0; i < num_stripes; i++) {
- bbio->stripes[i].physical =
- map->stripes[stripe_index].physical +
- stripe_offset + stripe_nr * map->stripe_len;
- bbio->stripes[i].dev = map->stripes[stripe_index].dev;
-
- if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID10)) {
- bbio->stripes[i].length = stripes_per_dev *
- map->stripe_len;
-
- if (i / sub_stripes < remaining_stripes)
- bbio->stripes[i].length +=
- map->stripe_len;
-
- /*
- * Special for the first stripe and
- * the last stripe:
- *
- * |-------|...|-------|
- * |----------|
- * off end_off
- */
- if (i < sub_stripes)
- bbio->stripes[i].length -=
- stripe_offset;
-
- if (stripe_index >= last_stripe &&
- stripe_index <= (last_stripe +
- sub_stripes - 1))
- bbio->stripes[i].length -=
- stripe_end_offset;
-
- if (i == sub_stripes - 1)
- stripe_offset = 0;
- } else
- bbio->stripes[i].length = *length;
-
- stripe_index++;
- if (stripe_index == map->num_stripes) {
- /* This could only happen for RAID0/10 */
- stripe_index = 0;
- stripe_nr++;
- }
- }
- } else {
- for (i = 0; i < num_stripes; i++) {
- bbio->stripes[i].physical =
- map->stripes[stripe_index].physical +
- stripe_offset +
- stripe_nr * map->stripe_len;
- bbio->stripes[i].dev =
- map->stripes[stripe_index].dev;
- stripe_index++;
- }
+ for (i = 0; i < num_stripes; i++) {
+ bbio->stripes[i].physical =
+ map->stripes[stripe_index].physical +
+ stripe_offset +
+ stripe_nr * map->stripe_len;
+ bbio->stripes[i].dev =
+ map->stripes[stripe_index].dev;
+ stripe_index++;
}
if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
@@ -5730,8 +5809,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
sort_parity_stripes(bbio, num_stripes);
tgtdev_indexes = 0;
- if (dev_replace_is_ongoing &&
- (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) &&
+ if (dev_replace_is_ongoing && op == BTRFS_MAP_WRITE &&
dev_replace->tgtdev != NULL) {
int index_where_to_add;
u64 srcdev_devid = dev_replace->srcdev->devid;
--
2.5.5
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 2/7] Btrfs: separate DISCARD from __btrfs_map_block
2017-02-18 1:28 ` [PATCH 2/7] Btrfs: separate DISCARD from __btrfs_map_block Liu Bo
@ 2017-02-20 3:54 ` Qu Wenruo
2017-03-06 19:49 ` Liu Bo
0 siblings, 1 reply; 20+ messages in thread
From: Qu Wenruo @ 2017-02-20 3:54 UTC (permalink / raw)
To: Liu Bo, linux-btrfs; +Cc: David Sterba
At 02/18/2017 09:28 AM, Liu Bo wrote:
> Since DISCARD is not as important as an operation like write, we don't
> copy it to target device during replace, and it makes __btrfs_map_block
> less complex.
Makes sense to me.
>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> ---
> fs/btrfs/volumes.c | 306 +++++++++++++++++++++++++++++++++--------------------
> 1 file changed, 192 insertions(+), 114 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index c52b0fe..96228f3 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -5294,6 +5294,175 @@ void btrfs_put_bbio(struct btrfs_bio *bbio)
> kfree(bbio);
> }
>
> +/* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */
> +/*
> + * Please note that, discard won't be sent to target device of device
> + * replace.
> + */
> +static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
> + u64 logical, u64 length,
> + struct btrfs_bio **bbio_ret)
> +{
> + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
> + struct extent_map *em;
> + struct map_lookup *map;
> + struct btrfs_bio *bbio;
> + u64 offset;
> + u64 stripe_nr;
> + u64 stripe_nr_end;
> + u64 stripe_end_offset;
> + u64 stripe_cnt;
> + u64 stripe_len;
> + u64 stripe_offset;
> + u64 num_stripes;
> + u32 stripe_index;
> + u32 factor = 0;
> + u32 sub_stripes = 0;
> + u64 stripes_per_dev = 0;
> + u32 remaining_stripes = 0;
> + u32 last_stripe = 0;
> + int ret = 0;
> + int i;
> +
> + /* discard always return a bbio */
> + ASSERT(bbio_ret);
> +
> + read_lock(&em_tree->lock);
> + em = lookup_extent_mapping(em_tree, logical, length);
> + read_unlock(&em_tree->lock);
It seems that get_chunk_map() in previous patch can replace such
searching and error message.
> +
> + if (!em) {
> + btrfs_crit(fs_info, "unable to find logical %llu len %llu",
> + logical, length);
> + return -EINVAL;
> + }
> +
> + if (em->start > logical || em->start + em->len < logical) {
> + btrfs_crit(fs_info,
> + "found a bad mapping, wanted %Lu, found %Lu-%Lu",
> + logical, em->start, em->start + em->len);
> + free_extent_map(em);
> + return -EINVAL;
> + }
> +
> + map = em->map_lookup;
> + /* we don't discard raid56 yet */
> + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
> + ret = -EOPNOTSUPP;
> + goto out;
> + }
> +
> + offset = logical - em->start;
> + length = min_t(u64, em->len - offset, length);
> +
> + stripe_len = map->stripe_len;
> + /*
> + * stripe_nr counts the total number of stripes we have to stride
> + * to get to this block
> + */
> + stripe_nr = div64_u64(offset, stripe_len);
> + stripe_offset = stripe_nr * stripe_len;
> + ASSERT(offset >= stripe_offset);
What about a DIV_ROUND_DOWN helper?
Surprisingly we only have DIR_ROUND_UP, not not DIV_ROUND_DOWN.
And if we're only going to support 64K stripe len, then round_down() is
good for current usage.
> +
> + /* stripe_offset is the offset of this block in its stripe */
> + stripe_offset = offset - stripe_offset;
This is a little confusing.
What about using another variable called @stripe_start instead of using
the same variable @stripe_offset to temporarily store stripe start bytenr.
I prefer to do it in one run without resuing @stripe_offset variable to
avoid confusion.
> +
> + stripe_nr_end = ALIGN(offset + length, map->stripe_len);
round_up() causes less confusion.
And IIRC, ALIGN/round_up can only handle power of 2, this implies the
stripe_len must be power of 2, which is OK for now.
If using ALIGN here, we can also use round_down() in previous stripe_nr.
Thanks,
Qu
> + stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
> + stripe_cnt = stripe_nr_end - stripe_nr;
> + stripe_end_offset = stripe_nr_end * map->stripe_len -
> + (offset + length);
> + /*
> + * after this, stripe_nr is the number of stripes on this
> + * device we have to walk to find the data, and stripe_index is
> + * the number of our device in the stripe array
> + */
> + num_stripes = 1;
> + stripe_index = 0;
> + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
> + BTRFS_BLOCK_GROUP_RAID10)) {
> + if (map->type & BTRFS_BLOCK_GROUP_RAID0)
> + sub_stripes = 1;
> + else
> + sub_stripes = map->sub_stripes;
> +
> + factor = map->num_stripes / sub_stripes;
> + num_stripes = min_t(u64, map->num_stripes,
> + sub_stripes * stripe_cnt);
> + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
> + stripe_index *= sub_stripes;
> + stripes_per_dev = div_u64_rem(stripe_cnt, factor,
> + &remaining_stripes);
> + div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
> + last_stripe *= sub_stripes;
> + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
> + BTRFS_BLOCK_GROUP_DUP)) {
> + num_stripes = map->num_stripes;
> + } else {
> + stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
> + &stripe_index);
> + }
> +
> + bbio = alloc_btrfs_bio(num_stripes, 0);
> + if (!bbio) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + for (i = 0; i < num_stripes; i++) {
> + bbio->stripes[i].physical =
> + map->stripes[stripe_index].physical +
> + stripe_offset + stripe_nr * map->stripe_len;
> + bbio->stripes[i].dev = map->stripes[stripe_index].dev;
> +
> + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
> + BTRFS_BLOCK_GROUP_RAID10)) {
> + bbio->stripes[i].length = stripes_per_dev *
> + map->stripe_len;
> +
> + if (i / sub_stripes < remaining_stripes)
> + bbio->stripes[i].length +=
> + map->stripe_len;
> +
> + /*
> + * Special for the first stripe and
> + * the last stripe:
> + *
> + * |-------|...|-------|
> + * |----------|
> + * off end_off
> + */
> + if (i < sub_stripes)
> + bbio->stripes[i].length -=
> + stripe_offset;
> +
> + if (stripe_index >= last_stripe &&
> + stripe_index <= (last_stripe +
> + sub_stripes - 1))
> + bbio->stripes[i].length -=
> + stripe_end_offset;
> +
> + if (i == sub_stripes - 1)
> + stripe_offset = 0;
> + } else {
> + bbio->stripes[i].length = length;
> + }
> +
> + stripe_index++;
> + if (stripe_index == map->num_stripes) {
> + stripe_index = 0;
> + stripe_nr++;
> + }
> + }
> +
> + *bbio_ret = bbio;
> + bbio->map_type = map->type;
> + bbio->num_stripes = num_stripes;
> +out:
> + free_extent_map(em);
> + return ret;
> +}
> +
> static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> enum btrfs_map_op op,
> u64 logical, u64 *length,
> @@ -5304,10 +5473,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> struct map_lookup *map;
> u64 offset;
> u64 stripe_offset;
> - u64 stripe_end_offset;
> u64 stripe_nr;
> - u64 stripe_nr_orig;
> - u64 stripe_nr_end;
> u64 stripe_len;
> u32 stripe_index;
> int i;
> @@ -5323,6 +5489,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> u64 physical_to_patch_in_first_stripe = 0;
> u64 raid56_full_stripe_start = (u64)-1;
>
> + if (op == BTRFS_MAP_DISCARD)
> + return __btrfs_map_block_for_discard(fs_info, logical,
> + *length, bbio_ret);
> +
> em = get_chunk_map(fs_info, logical, *length);
> if (IS_ERR(em))
> return PTR_ERR(em);
> @@ -5364,14 +5534,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> raid56_full_stripe_start *= full_stripe_len;
> }
>
> - if (op == BTRFS_MAP_DISCARD) {
> - /* we don't discard raid56 yet */
> - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
> - ret = -EOPNOTSUPP;
> - goto out;
> - }
> - *length = min_t(u64, em->len - offset, *length);
> - } else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> + if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> u64 max_len;
> /* For writes to RAID[56], allow a full stripeset across all disks.
> For other RAID types and for RAID[56] reads, just allow a single
> @@ -5402,8 +5565,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> btrfs_dev_replace_set_lock_blocking(dev_replace);
>
> if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
> - op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
> - op != BTRFS_MAP_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) {
> + op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS &&
> + dev_replace->tgtdev != NULL) {
> /*
> * in dev-replace case, for repair case (that's the only
> * case where the mirror is selected explicitly when
> @@ -5483,24 +5646,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
>
> num_stripes = 1;
> stripe_index = 0;
> - stripe_nr_orig = stripe_nr;
> - stripe_nr_end = ALIGN(offset + *length, map->stripe_len);
> - stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
> - stripe_end_offset = stripe_nr_end * map->stripe_len -
> - (offset + *length);
> -
> if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
> - if (op == BTRFS_MAP_DISCARD)
> - num_stripes = min_t(u64, map->num_stripes,
> - stripe_nr_end - stripe_nr_orig);
> stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
> &stripe_index);
> - if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
> - op != BTRFS_MAP_GET_READ_MIRRORS)
> + if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS)
> mirror_num = 1;
> } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
> - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD ||
> - op == BTRFS_MAP_GET_READ_MIRRORS)
> + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
> num_stripes = map->num_stripes;
> else if (mirror_num)
> stripe_index = mirror_num - 1;
> @@ -5513,8 +5665,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> }
>
> } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
> - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD ||
> - op == BTRFS_MAP_GET_READ_MIRRORS) {
> + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) {
> num_stripes = map->num_stripes;
> } else if (mirror_num) {
> stripe_index = mirror_num - 1;
> @@ -5530,10 +5681,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
>
> if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
> num_stripes = map->sub_stripes;
> - else if (op == BTRFS_MAP_DISCARD)
> - num_stripes = min_t(u64, map->sub_stripes *
> - (stripe_nr_end - stripe_nr_orig),
> - map->num_stripes);
> else if (mirror_num)
> stripe_index += mirror_num - 1;
> else {
> @@ -5576,8 +5723,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> /* We distribute the parity blocks across stripes */
> div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
> &stripe_index);
> - if ((op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD &&
> - op != BTRFS_MAP_GET_READ_MIRRORS) && mirror_num <= 1)
> + if ((op != BTRFS_MAP_WRITE &&
> + op != BTRFS_MAP_GET_READ_MIRRORS) &&
> + mirror_num <= 1)
> mirror_num = 1;
> }
> } else {
> @@ -5600,7 +5748,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
>
> num_alloc_stripes = num_stripes;
> if (dev_replace_is_ongoing) {
> - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD)
> + if (op == BTRFS_MAP_WRITE)
> num_alloc_stripes <<= 1;
> if (op == BTRFS_MAP_GET_READ_MIRRORS)
> num_alloc_stripes++;
> @@ -5643,84 +5791,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> RAID6_Q_STRIPE;
> }
>
> - if (op == BTRFS_MAP_DISCARD) {
> - u32 factor = 0;
> - u32 sub_stripes = 0;
> - u64 stripes_per_dev = 0;
> - u32 remaining_stripes = 0;
> - u32 last_stripe = 0;
>
> - if (map->type &
> - (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
> - if (map->type & BTRFS_BLOCK_GROUP_RAID0)
> - sub_stripes = 1;
> - else
> - sub_stripes = map->sub_stripes;
> -
> - factor = map->num_stripes / sub_stripes;
> - stripes_per_dev = div_u64_rem(stripe_nr_end -
> - stripe_nr_orig,
> - factor,
> - &remaining_stripes);
> - div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
> - last_stripe *= sub_stripes;
> - }
> -
> - for (i = 0; i < num_stripes; i++) {
> - bbio->stripes[i].physical =
> - map->stripes[stripe_index].physical +
> - stripe_offset + stripe_nr * map->stripe_len;
> - bbio->stripes[i].dev = map->stripes[stripe_index].dev;
> -
> - if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
> - BTRFS_BLOCK_GROUP_RAID10)) {
> - bbio->stripes[i].length = stripes_per_dev *
> - map->stripe_len;
> -
> - if (i / sub_stripes < remaining_stripes)
> - bbio->stripes[i].length +=
> - map->stripe_len;
> -
> - /*
> - * Special for the first stripe and
> - * the last stripe:
> - *
> - * |-------|...|-------|
> - * |----------|
> - * off end_off
> - */
> - if (i < sub_stripes)
> - bbio->stripes[i].length -=
> - stripe_offset;
> -
> - if (stripe_index >= last_stripe &&
> - stripe_index <= (last_stripe +
> - sub_stripes - 1))
> - bbio->stripes[i].length -=
> - stripe_end_offset;
> -
> - if (i == sub_stripes - 1)
> - stripe_offset = 0;
> - } else
> - bbio->stripes[i].length = *length;
> -
> - stripe_index++;
> - if (stripe_index == map->num_stripes) {
> - /* This could only happen for RAID0/10 */
> - stripe_index = 0;
> - stripe_nr++;
> - }
> - }
> - } else {
> - for (i = 0; i < num_stripes; i++) {
> - bbio->stripes[i].physical =
> - map->stripes[stripe_index].physical +
> - stripe_offset +
> - stripe_nr * map->stripe_len;
> - bbio->stripes[i].dev =
> - map->stripes[stripe_index].dev;
> - stripe_index++;
> - }
> + for (i = 0; i < num_stripes; i++) {
> + bbio->stripes[i].physical =
> + map->stripes[stripe_index].physical +
> + stripe_offset +
> + stripe_nr * map->stripe_len;
> + bbio->stripes[i].dev =
> + map->stripes[stripe_index].dev;
> + stripe_index++;
> }
>
> if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
> @@ -5730,8 +5809,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> sort_parity_stripes(bbio, num_stripes);
>
> tgtdev_indexes = 0;
> - if (dev_replace_is_ongoing &&
> - (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) &&
> + if (dev_replace_is_ongoing && op == BTRFS_MAP_WRITE &&
> dev_replace->tgtdev != NULL) {
> int index_where_to_add;
> u64 srcdev_devid = dev_replace->srcdev->devid;
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/7] Btrfs: separate DISCARD from __btrfs_map_block
2017-02-20 3:54 ` Qu Wenruo
@ 2017-03-06 19:49 ` Liu Bo
0 siblings, 0 replies; 20+ messages in thread
From: Liu Bo @ 2017-03-06 19:49 UTC (permalink / raw)
To: Qu Wenruo; +Cc: linux-btrfs, David Sterba
On Mon, Feb 20, 2017 at 11:54:31AM +0800, Qu Wenruo wrote:
>
>
> At 02/18/2017 09:28 AM, Liu Bo wrote:
> > Since DISCARD is not as important as an operation like write, we don't
> > copy it to target device during replace, and it makes __btrfs_map_block
> > less complex.
>
> Makes sense to me.
>
> >
> > Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> > ---
> > fs/btrfs/volumes.c | 306 +++++++++++++++++++++++++++++++++--------------------
> > 1 file changed, 192 insertions(+), 114 deletions(-)
> >
> > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> > index c52b0fe..96228f3 100644
> > --- a/fs/btrfs/volumes.c
> > +++ b/fs/btrfs/volumes.c
> > @@ -5294,6 +5294,175 @@ void btrfs_put_bbio(struct btrfs_bio *bbio)
> > kfree(bbio);
> > }
> >
> > +/* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */
> > +/*
> > + * Please note that, discard won't be sent to target device of device
> > + * replace.
> > + */
> > +static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
> > + u64 logical, u64 length,
> > + struct btrfs_bio **bbio_ret)
> > +{
> > + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
> > + struct extent_map *em;
> > + struct map_lookup *map;
> > + struct btrfs_bio *bbio;
> > + u64 offset;
> > + u64 stripe_nr;
> > + u64 stripe_nr_end;
> > + u64 stripe_end_offset;
> > + u64 stripe_cnt;
> > + u64 stripe_len;
> > + u64 stripe_offset;
> > + u64 num_stripes;
> > + u32 stripe_index;
> > + u32 factor = 0;
> > + u32 sub_stripes = 0;
> > + u64 stripes_per_dev = 0;
> > + u32 remaining_stripes = 0;
> > + u32 last_stripe = 0;
> > + int ret = 0;
> > + int i;
> > +
> > + /* discard always return a bbio */
> > + ASSERT(bbio_ret);
> > +
> > + read_lock(&em_tree->lock);
> > + em = lookup_extent_mapping(em_tree, logical, length);
> > + read_unlock(&em_tree->lock);
>
> It seems that get_chunk_map() in previous patch can replace such searching
> and error message.
>
Yeah, I forgot to update with it.
> > +
> > + if (!em) {
> > + btrfs_crit(fs_info, "unable to find logical %llu len %llu",
> > + logical, length);
> > + return -EINVAL;
> > + }
> > +
> > + if (em->start > logical || em->start + em->len < logical) {
> > + btrfs_crit(fs_info,
> > + "found a bad mapping, wanted %Lu, found %Lu-%Lu",
> > + logical, em->start, em->start + em->len);
> > + free_extent_map(em);
> > + return -EINVAL;
> > + }
> > +
> > + map = em->map_lookup;
> > + /* we don't discard raid56 yet */
> > + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
> > + ret = -EOPNOTSUPP;
> > + goto out;
> > + }
> > +
> > + offset = logical - em->start;
> > + length = min_t(u64, em->len - offset, length);
> > +
> > + stripe_len = map->stripe_len;
> > + /*
> > + * stripe_nr counts the total number of stripes we have to stride
> > + * to get to this block
> > + */
> > + stripe_nr = div64_u64(offset, stripe_len);
> > + stripe_offset = stripe_nr * stripe_len;
> > + ASSERT(offset >= stripe_offset);
>
> What about a DIV_ROUND_DOWN helper?
> Surprisingly we only have DIR_ROUND_UP, not not DIV_ROUND_DOWN.
>
> And if we're only going to support 64K stripe len, then round_down() is good
> for current usage.
>
> > +
> > + /* stripe_offset is the offset of this block in its stripe */
> > + stripe_offset = offset - stripe_offset;
>
> This is a little confusing.
> What about using another variable called @stripe_start instead of using the
> same variable @stripe_offset to temporarily store stripe start bytenr.
>
> I prefer to do it in one run without resuing @stripe_offset variable to
> avoid confusion.
Right, I was trying to keep the check of (offset >= stripe_offset), but it's not
necessary.
>
> > +
> > + stripe_nr_end = ALIGN(offset + length, map->stripe_len);
>
> round_up() causes less confusion.
>
> And IIRC, ALIGN/round_up can only handle power of 2, this implies the
> stripe_len must be power of 2, which is OK for now.
> If using ALIGN here, we can also use round_down() in previous stripe_nr.
>
Good point.
Thanks,
-liubo
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 3/7] Btrfs: introduce a function to get extra mirror from replace
2017-02-18 1:28 [PATCH 0/7] cleanup __btrfs_map_block Liu Bo
2017-02-18 1:28 ` [PATCH 1/7] Btrfs: create a helper for getting chunk map Liu Bo
2017-02-18 1:28 ` [PATCH 2/7] Btrfs: separate DISCARD from __btrfs_map_block Liu Bo
@ 2017-02-18 1:28 ` Liu Bo
2017-02-20 6:26 ` Qu Wenruo
2017-02-18 1:28 ` [PATCH 4/7] Btrfs: handle operations for device replace separately Liu Bo
` (6 subsequent siblings)
9 siblings, 1 reply; 20+ messages in thread
From: Liu Bo @ 2017-02-18 1:28 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
As the part of getting extra mirror in __btrfs_map_block is
self-independent, this puts it into a separate function.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
fs/btrfs/volumes.c | 161 +++++++++++++++++++++++++++++------------------------
1 file changed, 89 insertions(+), 72 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 96228f3..f62efc7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -140,6 +140,11 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
+static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+ enum btrfs_map_op op,
+ u64 logical, u64 *length,
+ struct btrfs_bio **bbio_ret,
+ int mirror_num, int need_raid_map);
DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
@@ -5463,6 +5468,83 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
return ret;
}
+/*
+ * In dev-replace case, for repair case (that's the only case where the mirror
+ * is selected explicitly when calling btrfs_map_block), blocks left of the
+ * left cursor can also be read from the target drive.
+ *
+ * For REQ_GET_READ_MIRRORS, the target drive is added as the last one to the
+ * array of stripes.
+ * For READ, it also needs to be supported using the same mirror number.
+ *
+ * If the requested block is not left of the left cursor, EIO is returned. This
+ * can happen because btrfs_num_copies() returns one more in the dev-replace
+ * case.
+ */
+static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 length,
+ u64 srcdev_devid, int *mirror_num,
+ u64 *physical)
+{
+ struct btrfs_bio *bbio = NULL;
+ int num_stripes;
+ int index_srcdev = 0;
+ int found = 0;
+ u64 physical_of_found = 0;
+ int i;
+ int ret = 0;
+
+ ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
+ logical, &length, &bbio, 0, 0);
+ if (ret) {
+ ASSERT(bbio == NULL);
+ return ret;
+ }
+
+ num_stripes = bbio->num_stripes;
+ if (*mirror_num > num_stripes) {
+ /*
+ * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror,
+ * that means that the requested area is not left of the left
+ * cursor
+ */
+ btrfs_put_bbio(bbio);
+ return -EIO;
+ }
+
+ /*
+ * process the rest of the function using the mirror_num of the source
+ * drive. Therefore look it up first. At the end, patch the device
+ * pointer to the one of the target drive.
+ */
+ for (i = 0; i < num_stripes; i++) {
+ if (bbio->stripes[i].dev->devid != srcdev_devid)
+ continue;
+
+ /*
+ * In case of DUP, in order to keep it simple, only add the
+ * mirror with the lowest physical address
+ */
+ if (found &&
+ physical_of_found <= bbio->stripes[i].physical)
+ continue;
+
+ index_srcdev = i;
+ found = 1;
+ physical_of_found = bbio->stripes[i].physical;
+ }
+
+ btrfs_put_bbio(bbio);
+
+ ASSERT(found);
+ if (!found)
+ return -EIO;
+
+ *mirror_num = index_srcdev + 1;
+ *physical = physical_of_found;
+ return ret;
+}
+
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
enum btrfs_map_op op,
u64 logical, u64 *length,
@@ -5567,79 +5649,14 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS &&
dev_replace->tgtdev != NULL) {
- /*
- * in dev-replace case, for repair case (that's the only
- * case where the mirror is selected explicitly when
- * calling btrfs_map_block), blocks left of the left cursor
- * can also be read from the target drive.
- * For REQ_GET_READ_MIRRORS, the target drive is added as
- * the last one to the array of stripes. For READ, it also
- * needs to be supported using the same mirror number.
- * If the requested block is not left of the left cursor,
- * EIO is returned. This can happen because btrfs_num_copies()
- * returns one more in the dev-replace case.
- */
- u64 tmp_length = *length;
- struct btrfs_bio *tmp_bbio = NULL;
- int tmp_num_stripes;
- u64 srcdev_devid = dev_replace->srcdev->devid;
- int index_srcdev = 0;
- int found = 0;
- u64 physical_of_found = 0;
-
- ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
- logical, &tmp_length, &tmp_bbio, 0, 0);
- if (ret) {
- WARN_ON(tmp_bbio != NULL);
- goto out;
- }
-
- tmp_num_stripes = tmp_bbio->num_stripes;
- if (mirror_num > tmp_num_stripes) {
- /*
- * BTRFS_MAP_GET_READ_MIRRORS does not contain this
- * mirror, that means that the requested area
- * is not left of the left cursor
- */
- ret = -EIO;
- btrfs_put_bbio(tmp_bbio);
- goto out;
- }
-
- /*
- * process the rest of the function using the mirror_num
- * of the source drive. Therefore look it up first.
- * At the end, patch the device pointer to the one of the
- * target drive.
- */
- for (i = 0; i < tmp_num_stripes; i++) {
- if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
- continue;
-
- /*
- * In case of DUP, in order to keep it simple, only add
- * the mirror with the lowest physical address
- */
- if (found &&
- physical_of_found <= tmp_bbio->stripes[i].physical)
- continue;
-
- index_srcdev = i;
- found = 1;
- physical_of_found = tmp_bbio->stripes[i].physical;
- }
-
- btrfs_put_bbio(tmp_bbio);
-
- if (!found) {
- WARN_ON(1);
- ret = -EIO;
+ ret = get_extra_mirror_from_replace(fs_info, logical, *length,
+ dev_replace->srcdev->devid,
+ &mirror_num,
+ &physical_to_patch_in_first_stripe);
+ if (ret)
goto out;
- }
-
- mirror_num = index_srcdev + 1;
- patch_the_first_stripe_for_dev_replace = 1;
- physical_to_patch_in_first_stripe = physical_of_found;
+ else
+ patch_the_first_stripe_for_dev_replace = 1;
} else if (mirror_num > map->num_stripes) {
mirror_num = 0;
}
--
2.5.5
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 3/7] Btrfs: introduce a function to get extra mirror from replace
2017-02-18 1:28 ` [PATCH 3/7] Btrfs: introduce a function to get extra mirror from replace Liu Bo
@ 2017-02-20 6:26 ` Qu Wenruo
0 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2017-02-20 6:26 UTC (permalink / raw)
To: Liu Bo, linux-btrfs; +Cc: David Sterba
At 02/18/2017 09:28 AM, Liu Bo wrote:
> As the part of getting extra mirror in __btrfs_map_block is
> self-independent, this puts it into a separate function.
>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Looks good to me.
Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Thanks,
Qu
> ---
> fs/btrfs/volumes.c | 161 +++++++++++++++++++++++++++++------------------------
> 1 file changed, 89 insertions(+), 72 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 96228f3..f62efc7 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -140,6 +140,11 @@ static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
> static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
> static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
> static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
> +static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> + enum btrfs_map_op op,
> + u64 logical, u64 *length,
> + struct btrfs_bio **bbio_ret,
> + int mirror_num, int need_raid_map);
>
> DEFINE_MUTEX(uuid_mutex);
> static LIST_HEAD(fs_uuids);
> @@ -5463,6 +5468,83 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
> return ret;
> }
>
> +/*
> + * In dev-replace case, for repair case (that's the only case where the mirror
> + * is selected explicitly when calling btrfs_map_block), blocks left of the
> + * left cursor can also be read from the target drive.
> + *
> + * For REQ_GET_READ_MIRRORS, the target drive is added as the last one to the
> + * array of stripes.
> + * For READ, it also needs to be supported using the same mirror number.
> + *
> + * If the requested block is not left of the left cursor, EIO is returned. This
> + * can happen because btrfs_num_copies() returns one more in the dev-replace
> + * case.
> + */
> +static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
> + u64 logical, u64 length,
> + u64 srcdev_devid, int *mirror_num,
> + u64 *physical)
> +{
> + struct btrfs_bio *bbio = NULL;
> + int num_stripes;
> + int index_srcdev = 0;
> + int found = 0;
> + u64 physical_of_found = 0;
> + int i;
> + int ret = 0;
> +
> + ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
> + logical, &length, &bbio, 0, 0);
> + if (ret) {
> + ASSERT(bbio == NULL);
> + return ret;
> + }
> +
> + num_stripes = bbio->num_stripes;
> + if (*mirror_num > num_stripes) {
> + /*
> + * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror,
> + * that means that the requested area is not left of the left
> + * cursor
> + */
> + btrfs_put_bbio(bbio);
> + return -EIO;
> + }
> +
> + /*
> + * process the rest of the function using the mirror_num of the source
> + * drive. Therefore look it up first. At the end, patch the device
> + * pointer to the one of the target drive.
> + */
> + for (i = 0; i < num_stripes; i++) {
> + if (bbio->stripes[i].dev->devid != srcdev_devid)
> + continue;
> +
> + /*
> + * In case of DUP, in order to keep it simple, only add the
> + * mirror with the lowest physical address
> + */
> + if (found &&
> + physical_of_found <= bbio->stripes[i].physical)
> + continue;
> +
> + index_srcdev = i;
> + found = 1;
> + physical_of_found = bbio->stripes[i].physical;
> + }
> +
> + btrfs_put_bbio(bbio);
> +
> + ASSERT(found);
> + if (!found)
> + return -EIO;
> +
> + *mirror_num = index_srcdev + 1;
> + *physical = physical_of_found;
> + return ret;
> +}
> +
> static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> enum btrfs_map_op op,
> u64 logical, u64 *length,
> @@ -5567,79 +5649,14 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
> op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS &&
> dev_replace->tgtdev != NULL) {
> - /*
> - * in dev-replace case, for repair case (that's the only
> - * case where the mirror is selected explicitly when
> - * calling btrfs_map_block), blocks left of the left cursor
> - * can also be read from the target drive.
> - * For REQ_GET_READ_MIRRORS, the target drive is added as
> - * the last one to the array of stripes. For READ, it also
> - * needs to be supported using the same mirror number.
> - * If the requested block is not left of the left cursor,
> - * EIO is returned. This can happen because btrfs_num_copies()
> - * returns one more in the dev-replace case.
> - */
> - u64 tmp_length = *length;
> - struct btrfs_bio *tmp_bbio = NULL;
> - int tmp_num_stripes;
> - u64 srcdev_devid = dev_replace->srcdev->devid;
> - int index_srcdev = 0;
> - int found = 0;
> - u64 physical_of_found = 0;
> -
> - ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
> - logical, &tmp_length, &tmp_bbio, 0, 0);
> - if (ret) {
> - WARN_ON(tmp_bbio != NULL);
> - goto out;
> - }
> -
> - tmp_num_stripes = tmp_bbio->num_stripes;
> - if (mirror_num > tmp_num_stripes) {
> - /*
> - * BTRFS_MAP_GET_READ_MIRRORS does not contain this
> - * mirror, that means that the requested area
> - * is not left of the left cursor
> - */
> - ret = -EIO;
> - btrfs_put_bbio(tmp_bbio);
> - goto out;
> - }
> -
> - /*
> - * process the rest of the function using the mirror_num
> - * of the source drive. Therefore look it up first.
> - * At the end, patch the device pointer to the one of the
> - * target drive.
> - */
> - for (i = 0; i < tmp_num_stripes; i++) {
> - if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
> - continue;
> -
> - /*
> - * In case of DUP, in order to keep it simple, only add
> - * the mirror with the lowest physical address
> - */
> - if (found &&
> - physical_of_found <= tmp_bbio->stripes[i].physical)
> - continue;
> -
> - index_srcdev = i;
> - found = 1;
> - physical_of_found = tmp_bbio->stripes[i].physical;
> - }
> -
> - btrfs_put_bbio(tmp_bbio);
> -
> - if (!found) {
> - WARN_ON(1);
> - ret = -EIO;
> + ret = get_extra_mirror_from_replace(fs_info, logical, *length,
> + dev_replace->srcdev->devid,
> + &mirror_num,
> + &physical_to_patch_in_first_stripe);
> + if (ret)
> goto out;
> - }
> -
> - mirror_num = index_srcdev + 1;
> - patch_the_first_stripe_for_dev_replace = 1;
> - physical_to_patch_in_first_stripe = physical_of_found;
> + else
> + patch_the_first_stripe_for_dev_replace = 1;
> } else if (mirror_num > map->num_stripes) {
> mirror_num = 0;
> }
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 4/7] Btrfs: handle operations for device replace separately
2017-02-18 1:28 [PATCH 0/7] cleanup __btrfs_map_block Liu Bo
` (2 preceding siblings ...)
2017-02-18 1:28 ` [PATCH 3/7] Btrfs: introduce a function to get extra mirror from replace Liu Bo
@ 2017-02-18 1:28 ` Liu Bo
2017-02-20 6:30 ` Qu Wenruo
2017-02-18 1:28 ` [PATCH 5/7] Btrfs: do not add extra mirror when dev_replace target dev is not available Liu Bo
` (5 subsequent siblings)
9 siblings, 1 reply; 20+ messages in thread
From: Liu Bo @ 2017-02-18 1:28 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
Since this part is mostly self-independent, this moves it to a separate
function.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
fs/btrfs/volumes.c | 179 +++++++++++++++++++++++++++++------------------------
1 file changed, 98 insertions(+), 81 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f62efc7..c434489 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5545,6 +5545,100 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
return ret;
}
+static void handle_ops_on_dev_replace(enum btrfs_map_op op,
+ struct btrfs_bio **bbio_ret,
+ struct btrfs_dev_replace *dev_replace,
+ int *num_stripes_ret, int *max_errors_ret)
+{
+ struct btrfs_bio *bbio = *bbio_ret;
+ u64 srcdev_devid = dev_replace->srcdev->devid;
+ int tgtdev_indexes = 0;
+ int num_stripes = *num_stripes_ret;
+ int max_errors = *max_errors_ret;
+ int i;
+
+ if (op == BTRFS_MAP_WRITE) {
+ int index_where_to_add;
+
+ /*
+ * duplicate the write operations while the dev replace
+ * procedure is running. Since the copying of the old disk to
+ * the new disk takes place at run time while the filesystem is
+ * mounted writable, the regular write operations to the old
+ * disk have to be duplicated to go to the new disk as well.
+ *
+ * Note that device->missing is handled by the caller, and that
+ * the write to the old disk is already set up in the stripes
+ * array.
+ */
+ index_where_to_add = num_stripes;
+ for (i = 0; i < num_stripes; i++) {
+ if (bbio->stripes[i].dev->devid == srcdev_devid) {
+ /* write to new disk, too */
+ struct btrfs_bio_stripe *new =
+ bbio->stripes + index_where_to_add;
+ struct btrfs_bio_stripe *old =
+ bbio->stripes + i;
+
+ new->physical = old->physical;
+ new->length = old->length;
+ new->dev = dev_replace->tgtdev;
+ bbio->tgtdev_map[i] = index_where_to_add;
+ index_where_to_add++;
+ max_errors++;
+ tgtdev_indexes++;
+ }
+ }
+ num_stripes = index_where_to_add;
+ } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
+ int index_srcdev = 0;
+ int found = 0;
+ u64 physical_of_found = 0;
+
+ /*
+ * During the dev-replace procedure, the target drive can also
+ * be used to read data in case it is needed to repair a corrupt
+ * block elsewhere. This is possible if the requested area is
+ * left of the left cursor. In this area, the target drive is a
+ * full copy of the source drive.
+ */
+ for (i = 0; i < num_stripes; i++) {
+ if (bbio->stripes[i].dev->devid == srcdev_devid) {
+ /*
+ * In case of DUP, in order to keep it simple,
+ * only add the mirror with the lowest physical
+ * address
+ */
+ if (found &&
+ physical_of_found <=
+ bbio->stripes[i].physical)
+ continue;
+ index_srcdev = i;
+ found = 1;
+ physical_of_found = bbio->stripes[i].physical;
+ }
+ }
+ if (found) {
+ struct btrfs_bio_stripe *tgtdev_stripe =
+ bbio->stripes + num_stripes;
+
+ tgtdev_stripe->physical = physical_of_found;
+ tgtdev_stripe->length =
+ bbio->stripes[index_srcdev].length;
+ tgtdev_stripe->dev = dev_replace->tgtdev;
+ bbio->tgtdev_map[index_srcdev] = num_stripes;
+
+ tgtdev_indexes++;
+ num_stripes++;
+ }
+ }
+
+ *num_stripes_ret = num_stripes;
+ *max_errors_ret = max_errors;
+ bbio->num_tgtdevs = tgtdev_indexes;
+ *bbio_ret = bbio;
+}
+
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
enum btrfs_map_op op,
u64 logical, u64 *length,
@@ -5825,86 +5919,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
if (bbio->raid_map)
sort_parity_stripes(bbio, num_stripes);
- tgtdev_indexes = 0;
- if (dev_replace_is_ongoing && op == BTRFS_MAP_WRITE &&
- dev_replace->tgtdev != NULL) {
- int index_where_to_add;
- u64 srcdev_devid = dev_replace->srcdev->devid;
-
- /*
- * duplicate the write operations while the dev replace
- * procedure is running. Since the copying of the old disk
- * to the new disk takes place at run time while the
- * filesystem is mounted writable, the regular write
- * operations to the old disk have to be duplicated to go
- * to the new disk as well.
- * Note that device->missing is handled by the caller, and
- * that the write to the old disk is already set up in the
- * stripes array.
- */
- index_where_to_add = num_stripes;
- for (i = 0; i < num_stripes; i++) {
- if (bbio->stripes[i].dev->devid == srcdev_devid) {
- /* write to new disk, too */
- struct btrfs_bio_stripe *new =
- bbio->stripes + index_where_to_add;
- struct btrfs_bio_stripe *old =
- bbio->stripes + i;
-
- new->physical = old->physical;
- new->length = old->length;
- new->dev = dev_replace->tgtdev;
- bbio->tgtdev_map[i] = index_where_to_add;
- index_where_to_add++;
- max_errors++;
- tgtdev_indexes++;
- }
- }
- num_stripes = index_where_to_add;
- } else if (dev_replace_is_ongoing &&
- op == BTRFS_MAP_GET_READ_MIRRORS &&
- dev_replace->tgtdev != NULL) {
- u64 srcdev_devid = dev_replace->srcdev->devid;
- int index_srcdev = 0;
- int found = 0;
- u64 physical_of_found = 0;
-
- /*
- * During the dev-replace procedure, the target drive can
- * also be used to read data in case it is needed to repair
- * a corrupt block elsewhere. This is possible if the
- * requested area is left of the left cursor. In this area,
- * the target drive is a full copy of the source drive.
- */
- for (i = 0; i < num_stripes; i++) {
- if (bbio->stripes[i].dev->devid == srcdev_devid) {
- /*
- * In case of DUP, in order to keep it
- * simple, only add the mirror with the
- * lowest physical address
- */
- if (found &&
- physical_of_found <=
- bbio->stripes[i].physical)
- continue;
- index_srcdev = i;
- found = 1;
- physical_of_found = bbio->stripes[i].physical;
- }
- }
- if (found) {
- struct btrfs_bio_stripe *tgtdev_stripe =
- bbio->stripes + num_stripes;
-
- tgtdev_stripe->physical = physical_of_found;
- tgtdev_stripe->length =
- bbio->stripes[index_srcdev].length;
- tgtdev_stripe->dev = dev_replace->tgtdev;
- bbio->tgtdev_map[index_srcdev] = num_stripes;
-
- tgtdev_indexes++;
- num_stripes++;
- }
+ if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
+ (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)) {
+ handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
+ &max_errors);
}
*bbio_ret = bbio;
@@ -5912,7 +5930,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
bbio->num_stripes = num_stripes;
bbio->max_errors = max_errors;
bbio->mirror_num = mirror_num;
- bbio->num_tgtdevs = tgtdev_indexes;
/*
* this is the case that REQ_READ && dev_replace_is_ongoing &&
--
2.5.5
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 4/7] Btrfs: handle operations for device replace separately
2017-02-18 1:28 ` [PATCH 4/7] Btrfs: handle operations for device replace separately Liu Bo
@ 2017-02-20 6:30 ` Qu Wenruo
0 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2017-02-20 6:30 UTC (permalink / raw)
To: Liu Bo, linux-btrfs; +Cc: David Sterba
At 02/18/2017 09:28 AM, Liu Bo wrote:
> Since this part is mostly self-independent, this moves it to a separate
> function.
Just code move and modified part looks good to me.
Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Thanks,
Qu
>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> ---
> fs/btrfs/volumes.c | 179 +++++++++++++++++++++++++++++------------------------
> 1 file changed, 98 insertions(+), 81 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index f62efc7..c434489 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -5545,6 +5545,100 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
> return ret;
> }
>
> +static void handle_ops_on_dev_replace(enum btrfs_map_op op,
> + struct btrfs_bio **bbio_ret,
> + struct btrfs_dev_replace *dev_replace,
> + int *num_stripes_ret, int *max_errors_ret)
> +{
> + struct btrfs_bio *bbio = *bbio_ret;
> + u64 srcdev_devid = dev_replace->srcdev->devid;
> + int tgtdev_indexes = 0;
> + int num_stripes = *num_stripes_ret;
> + int max_errors = *max_errors_ret;
> + int i;
> +
> + if (op == BTRFS_MAP_WRITE) {
> + int index_where_to_add;
> +
> + /*
> + * duplicate the write operations while the dev replace
> + * procedure is running. Since the copying of the old disk to
> + * the new disk takes place at run time while the filesystem is
> + * mounted writable, the regular write operations to the old
> + * disk have to be duplicated to go to the new disk as well.
> + *
> + * Note that device->missing is handled by the caller, and that
> + * the write to the old disk is already set up in the stripes
> + * array.
> + */
> + index_where_to_add = num_stripes;
> + for (i = 0; i < num_stripes; i++) {
> + if (bbio->stripes[i].dev->devid == srcdev_devid) {
> + /* write to new disk, too */
> + struct btrfs_bio_stripe *new =
> + bbio->stripes + index_where_to_add;
> + struct btrfs_bio_stripe *old =
> + bbio->stripes + i;
> +
> + new->physical = old->physical;
> + new->length = old->length;
> + new->dev = dev_replace->tgtdev;
> + bbio->tgtdev_map[i] = index_where_to_add;
> + index_where_to_add++;
> + max_errors++;
> + tgtdev_indexes++;
> + }
> + }
> + num_stripes = index_where_to_add;
> + } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
> + int index_srcdev = 0;
> + int found = 0;
> + u64 physical_of_found = 0;
> +
> + /*
> + * During the dev-replace procedure, the target drive can also
> + * be used to read data in case it is needed to repair a corrupt
> + * block elsewhere. This is possible if the requested area is
> + * left of the left cursor. In this area, the target drive is a
> + * full copy of the source drive.
> + */
> + for (i = 0; i < num_stripes; i++) {
> + if (bbio->stripes[i].dev->devid == srcdev_devid) {
> + /*
> + * In case of DUP, in order to keep it simple,
> + * only add the mirror with the lowest physical
> + * address
> + */
> + if (found &&
> + physical_of_found <=
> + bbio->stripes[i].physical)
> + continue;
> + index_srcdev = i;
> + found = 1;
> + physical_of_found = bbio->stripes[i].physical;
> + }
> + }
> + if (found) {
> + struct btrfs_bio_stripe *tgtdev_stripe =
> + bbio->stripes + num_stripes;
> +
> + tgtdev_stripe->physical = physical_of_found;
> + tgtdev_stripe->length =
> + bbio->stripes[index_srcdev].length;
> + tgtdev_stripe->dev = dev_replace->tgtdev;
> + bbio->tgtdev_map[index_srcdev] = num_stripes;
> +
> + tgtdev_indexes++;
> + num_stripes++;
> + }
> + }
> +
> + *num_stripes_ret = num_stripes;
> + *max_errors_ret = max_errors;
> + bbio->num_tgtdevs = tgtdev_indexes;
> + *bbio_ret = bbio;
> +}
> +
> static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> enum btrfs_map_op op,
> u64 logical, u64 *length,
> @@ -5825,86 +5919,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> if (bbio->raid_map)
> sort_parity_stripes(bbio, num_stripes);
>
> - tgtdev_indexes = 0;
> - if (dev_replace_is_ongoing && op == BTRFS_MAP_WRITE &&
> - dev_replace->tgtdev != NULL) {
> - int index_where_to_add;
> - u64 srcdev_devid = dev_replace->srcdev->devid;
> -
> - /*
> - * duplicate the write operations while the dev replace
> - * procedure is running. Since the copying of the old disk
> - * to the new disk takes place at run time while the
> - * filesystem is mounted writable, the regular write
> - * operations to the old disk have to be duplicated to go
> - * to the new disk as well.
> - * Note that device->missing is handled by the caller, and
> - * that the write to the old disk is already set up in the
> - * stripes array.
> - */
> - index_where_to_add = num_stripes;
> - for (i = 0; i < num_stripes; i++) {
> - if (bbio->stripes[i].dev->devid == srcdev_devid) {
> - /* write to new disk, too */
> - struct btrfs_bio_stripe *new =
> - bbio->stripes + index_where_to_add;
> - struct btrfs_bio_stripe *old =
> - bbio->stripes + i;
> -
> - new->physical = old->physical;
> - new->length = old->length;
> - new->dev = dev_replace->tgtdev;
> - bbio->tgtdev_map[i] = index_where_to_add;
> - index_where_to_add++;
> - max_errors++;
> - tgtdev_indexes++;
> - }
> - }
> - num_stripes = index_where_to_add;
> - } else if (dev_replace_is_ongoing &&
> - op == BTRFS_MAP_GET_READ_MIRRORS &&
> - dev_replace->tgtdev != NULL) {
> - u64 srcdev_devid = dev_replace->srcdev->devid;
> - int index_srcdev = 0;
> - int found = 0;
> - u64 physical_of_found = 0;
> -
> - /*
> - * During the dev-replace procedure, the target drive can
> - * also be used to read data in case it is needed to repair
> - * a corrupt block elsewhere. This is possible if the
> - * requested area is left of the left cursor. In this area,
> - * the target drive is a full copy of the source drive.
> - */
> - for (i = 0; i < num_stripes; i++) {
> - if (bbio->stripes[i].dev->devid == srcdev_devid) {
> - /*
> - * In case of DUP, in order to keep it
> - * simple, only add the mirror with the
> - * lowest physical address
> - */
> - if (found &&
> - physical_of_found <=
> - bbio->stripes[i].physical)
> - continue;
> - index_srcdev = i;
> - found = 1;
> - physical_of_found = bbio->stripes[i].physical;
> - }
> - }
> - if (found) {
> - struct btrfs_bio_stripe *tgtdev_stripe =
> - bbio->stripes + num_stripes;
> -
> - tgtdev_stripe->physical = physical_of_found;
> - tgtdev_stripe->length =
> - bbio->stripes[index_srcdev].length;
> - tgtdev_stripe->dev = dev_replace->tgtdev;
> - bbio->tgtdev_map[index_srcdev] = num_stripes;
> -
> - tgtdev_indexes++;
> - num_stripes++;
> - }
> + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
> + (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)) {
> + handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
> + &max_errors);
> }
>
> *bbio_ret = bbio;
> @@ -5912,7 +5930,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> bbio->num_stripes = num_stripes;
> bbio->max_errors = max_errors;
> bbio->mirror_num = mirror_num;
> - bbio->num_tgtdevs = tgtdev_indexes;
>
> /*
> * this is the case that REQ_READ && dev_replace_is_ongoing &&
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 5/7] Btrfs: do not add extra mirror when dev_replace target dev is not available
2017-02-18 1:28 [PATCH 0/7] cleanup __btrfs_map_block Liu Bo
` (3 preceding siblings ...)
2017-02-18 1:28 ` [PATCH 4/7] Btrfs: handle operations for device replace separately Liu Bo
@ 2017-02-18 1:28 ` Liu Bo
2017-02-20 6:39 ` Qu Wenruo
2017-02-18 1:28 ` [PATCH 6/7] Btrfs: helper for ops that requires full stripe Liu Bo
` (4 subsequent siblings)
9 siblings, 1 reply; 20+ messages in thread
From: Liu Bo @ 2017-02-18 1:28 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
With this, we can avoid allocating memory for dev replace copies if the
target dev is not available.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
fs/btrfs/volumes.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c434489..bcd44eb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5153,7 +5153,8 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
free_extent_map(em);
btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
- if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
+ if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
+ fs_info->dev_replace.tgtdev)
ret++;
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
@@ -5858,7 +5859,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
}
num_alloc_stripes = num_stripes;
- if (dev_replace_is_ongoing) {
+ if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
if (op == BTRFS_MAP_WRITE)
num_alloc_stripes <<= 1;
if (op == BTRFS_MAP_GET_READ_MIRRORS)
@@ -5871,7 +5872,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
ret = -ENOMEM;
goto out;
}
- if (dev_replace_is_ongoing)
+ if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
/* build raid_map */
--
2.5.5
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 5/7] Btrfs: do not add extra mirror when dev_replace target dev is not available
2017-02-18 1:28 ` [PATCH 5/7] Btrfs: do not add extra mirror when dev_replace target dev is not available Liu Bo
@ 2017-02-20 6:39 ` Qu Wenruo
0 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2017-02-20 6:39 UTC (permalink / raw)
To: Liu Bo, linux-btrfs; +Cc: David Sterba
At 02/18/2017 09:28 AM, Liu Bo wrote:
> With this, we can avoid allocating memory for dev replace copies if the
> target dev is not available.
The patch itself looks good.
Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
However I'm just curious about in which case we can have
btrfs_dev_replace_is_going() returning 1 while dev_replace.tgtdev is NULL.
Since we are under the protect of btrfs_dev_replace_lock(), I wonder if
dev_replace->tgt/srcdev can be modified.
Thanks,
Qu
>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> ---
> fs/btrfs/volumes.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index c434489..bcd44eb 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -5153,7 +5153,8 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
> free_extent_map(em);
>
> btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
> - if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
> + if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
> + fs_info->dev_replace.tgtdev)
> ret++;
> btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
>
> @@ -5858,7 +5859,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> }
>
> num_alloc_stripes = num_stripes;
> - if (dev_replace_is_ongoing) {
> + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
> if (op == BTRFS_MAP_WRITE)
> num_alloc_stripes <<= 1;
> if (op == BTRFS_MAP_GET_READ_MIRRORS)
> @@ -5871,7 +5872,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> ret = -ENOMEM;
> goto out;
> }
> - if (dev_replace_is_ongoing)
> + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
> bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
>
> /* build raid_map */
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 6/7] Btrfs: helper for ops that requires full stripe
2017-02-18 1:28 [PATCH 0/7] cleanup __btrfs_map_block Liu Bo
` (4 preceding siblings ...)
2017-02-18 1:28 ` [PATCH 5/7] Btrfs: do not add extra mirror when dev_replace target dev is not available Liu Bo
@ 2017-02-18 1:28 ` Liu Bo
2017-02-20 6:40 ` Qu Wenruo
2017-02-18 1:28 ` [PATCH 7/7] Btrfs: convert BUG_ON to WARN_ON Liu Bo
` (3 subsequent siblings)
9 siblings, 1 reply; 20+ messages in thread
From: Liu Bo @ 2017-02-18 1:28 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
This adds a helper to show directly whether ops require full stripe.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
fs/btrfs/volumes.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bcd44eb..4a334a9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5640,6 +5640,11 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
*bbio_ret = bbio;
}
+static bool need_full_stripe(enum btrfs_map_op op)
+{
+ return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
+}
+
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
enum btrfs_map_op op,
u64 logical, u64 *length,
@@ -5742,8 +5747,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
btrfs_dev_replace_set_lock_blocking(dev_replace);
if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
- op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS &&
- dev_replace->tgtdev != NULL) {
+ !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
ret = get_extra_mirror_from_replace(fs_info, logical, *length,
dev_replace->srcdev->devid,
&mirror_num,
@@ -5876,10 +5880,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
/* build raid_map */
- if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
- need_raid_map &&
- ((op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) ||
- mirror_num > 1)) {
+ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
+ (need_full_stripe(op) || mirror_num > 1)) {
u64 tmp;
unsigned rot;
@@ -5914,14 +5916,14 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
stripe_index++;
}
- if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
+ if (need_full_stripe(op))
max_errors = btrfs_chunk_max_errors(map);
if (bbio->raid_map)
sort_parity_stripes(bbio, num_stripes);
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
- (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)) {
+ need_full_stripe(op)) {
handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
&max_errors);
}
--
2.5.5
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 6/7] Btrfs: helper for ops that requires full stripe
2017-02-18 1:28 ` [PATCH 6/7] Btrfs: helper for ops that requires full stripe Liu Bo
@ 2017-02-20 6:40 ` Qu Wenruo
0 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2017-02-20 6:40 UTC (permalink / raw)
To: Liu Bo, linux-btrfs; +Cc: David Sterba
At 02/18/2017 09:28 AM, Liu Bo wrote:
> This adds a helper to show directly whether ops require full stripe.
>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Looks good to me.
Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Thanks,
Qu
> ---
> fs/btrfs/volumes.c | 18 ++++++++++--------
> 1 file changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index bcd44eb..4a334a9 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -5640,6 +5640,11 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
> *bbio_ret = bbio;
> }
>
> +static bool need_full_stripe(enum btrfs_map_op op)
> +{
> + return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
> +}
> +
> static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> enum btrfs_map_op op,
> u64 logical, u64 *length,
> @@ -5742,8 +5747,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> btrfs_dev_replace_set_lock_blocking(dev_replace);
>
> if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
> - op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS &&
> - dev_replace->tgtdev != NULL) {
> + !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
> ret = get_extra_mirror_from_replace(fs_info, logical, *length,
> dev_replace->srcdev->devid,
> &mirror_num,
> @@ -5876,10 +5880,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
>
> /* build raid_map */
> - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
> - need_raid_map &&
> - ((op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) ||
> - mirror_num > 1)) {
> + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
> + (need_full_stripe(op) || mirror_num > 1)) {
> u64 tmp;
> unsigned rot;
>
> @@ -5914,14 +5916,14 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
> stripe_index++;
> }
>
> - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)
> + if (need_full_stripe(op))
> max_errors = btrfs_chunk_max_errors(map);
>
> if (bbio->raid_map)
> sort_parity_stripes(bbio, num_stripes);
>
> if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
> - (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS)) {
> + need_full_stripe(op)) {
> handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
> &max_errors);
> }
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 7/7] Btrfs: convert BUG_ON to WARN_ON
2017-02-18 1:28 [PATCH 0/7] cleanup __btrfs_map_block Liu Bo
` (5 preceding siblings ...)
2017-02-18 1:28 ` [PATCH 6/7] Btrfs: helper for ops that requires full stripe Liu Bo
@ 2017-02-18 1:28 ` Liu Bo
2017-02-20 6:42 ` Qu Wenruo
2017-02-18 11:03 ` [PATCH 0/7] cleanup __btrfs_map_block Mike Fleetwood
` (2 subsequent siblings)
9 siblings, 1 reply; 20+ messages in thread
From: Liu Bo @ 2017-02-18 1:28 UTC (permalink / raw)
To: linux-btrfs; +Cc: David Sterba
These two BUG_ON()s would never be true, ensured by callers' logic.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
fs/btrfs/volumes.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4a334a9..ce74b6c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5170,7 +5170,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
unsigned long len = fs_info->sectorsize;
em = get_chunk_map(fs_info, logical, len);
- BUG_ON(IS_ERR(em));
+ WARN_ON(IS_ERR(em));
map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
@@ -5187,7 +5187,7 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
int ret = 0;
em = get_chunk_map(fs_info, logical, len);
- BUG_ON(IS_ERR(em));
+ WARN_ON(IS_ERR(em));
map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
--
2.5.5
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH 7/7] Btrfs: convert BUG_ON to WARN_ON
2017-02-18 1:28 ` [PATCH 7/7] Btrfs: convert BUG_ON to WARN_ON Liu Bo
@ 2017-02-20 6:42 ` Qu Wenruo
0 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2017-02-20 6:42 UTC (permalink / raw)
To: Liu Bo, linux-btrfs; +Cc: David Sterba
At 02/18/2017 09:28 AM, Liu Bo wrote:
> These two BUG_ON()s would never be true, ensured by callers' logic.
Never a bad idea to remove BUG_ON.
Reviewed-by: Qu Wenruo <quwenruo@cn.fujits.com>
Thanks,
Qu
>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> ---
> fs/btrfs/volumes.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 4a334a9..ce74b6c 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -5170,7 +5170,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
> unsigned long len = fs_info->sectorsize;
>
> em = get_chunk_map(fs_info, logical, len);
> - BUG_ON(IS_ERR(em));
> + WARN_ON(IS_ERR(em));
>
> map = em->map_lookup;
> if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
> @@ -5187,7 +5187,7 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
> int ret = 0;
>
> em = get_chunk_map(fs_info, logical, len);
> - BUG_ON(IS_ERR(em));
> + WARN_ON(IS_ERR(em));
>
> map = em->map_lookup;
> if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 0/7] cleanup __btrfs_map_block
2017-02-18 1:28 [PATCH 0/7] cleanup __btrfs_map_block Liu Bo
` (6 preceding siblings ...)
2017-02-18 1:28 ` [PATCH 7/7] Btrfs: convert BUG_ON to WARN_ON Liu Bo
@ 2017-02-18 11:03 ` Mike Fleetwood
2017-02-20 3:12 ` Qu Wenruo
2017-03-14 17:27 ` David Sterba
9 siblings, 0 replies; 20+ messages in thread
From: Mike Fleetwood @ 2017-02-18 11:03 UTC (permalink / raw)
To: Liu Bo; +Cc: linux-btrfs, David Sterba
On 18 February 2017 at 01:28, Liu Bo <bo.li.liu@oracle.com> wrote:
> This is attempting to make __btrfs_map_block less scary :)
>
> The major changes are
>
> 1) split operations for discard out of __btrfs_map_block and we don't copy
> discard operations for the target device of dev replace since they're not
> as important as writes.
>
> 2) put dev replace stuff into helpers since they're basically
> self-independant.
Just a note on English. Self-independant / self-independent is a made
up word. (Used in PATCH 0/7, 3/7 and 4/7). I assume you intended to
use either self-contained or independent.
Thanks,
Mike
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 0/7] cleanup __btrfs_map_block
2017-02-18 1:28 [PATCH 0/7] cleanup __btrfs_map_block Liu Bo
` (7 preceding siblings ...)
2017-02-18 11:03 ` [PATCH 0/7] cleanup __btrfs_map_block Mike Fleetwood
@ 2017-02-20 3:12 ` Qu Wenruo
2017-03-14 17:27 ` David Sterba
9 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2017-02-20 3:12 UTC (permalink / raw)
To: Liu Bo, linux-btrfs; +Cc: David Sterba
At 02/18/2017 09:28 AM, Liu Bo wrote:
> This is attempting to make __btrfs_map_block less scary :)
>
Great!!
That's also what I want to do in btrfs-progs.
And it would be better if we can take a step further to have a better
API for both pure mirrored/striped profile and parity profile.
I tried to do it in btrfs-progs with the following patchset, while it
needs more work to do in kernel.
https://patchwork.kernel.org/patch/9488519/
> The major changes are
>
> 1) split operations for discard out of __btrfs_map_block and we don't copy
> discard operations for the target device of dev replace since they're not
> as important as writes.
>
> 2) put dev replace stuff into helpers since they're basically
> self-independant.
Quite good starting point.
Thanks,
Qu
>
>
> Liu Bo (7):
> Btrfs: create a helper for getting chunk map
> Btrfs: separate DISCARD from __btrfs_map_block
> Btrfs: introduce a function to get extra mirror from replace
> Btrfs: handle operations for device replace separately
> Btrfs: do not add extra mirror when dev_replace target dev is not
> available
> Btrfs: helper for ops that requires full stripe
> Btrfs: convert BUG_ON to WARN_ON
>
> fs/btrfs/extent_io.c | 3 +-
> fs/btrfs/volumes.c | 824 +++++++++++++++++++++++++++------------------------
> fs/btrfs/volumes.h | 2 +-
> 3 files changed, 445 insertions(+), 384 deletions(-)
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 0/7] cleanup __btrfs_map_block
2017-02-18 1:28 [PATCH 0/7] cleanup __btrfs_map_block Liu Bo
` (8 preceding siblings ...)
2017-02-20 3:12 ` Qu Wenruo
@ 2017-03-14 17:27 ` David Sterba
9 siblings, 0 replies; 20+ messages in thread
From: David Sterba @ 2017-03-14 17:27 UTC (permalink / raw)
To: Liu Bo; +Cc: linux-btrfs, David Sterba
On Fri, Feb 17, 2017 at 05:28:14PM -0800, Liu Bo wrote:
> This is attempting to make __btrfs_map_block less scary :)
>
> The major changes are
>
> 1) split operations for discard out of __btrfs_map_block and we don't copy
> discard operations for the target device of dev replace since they're not
> as important as writes.
>
> 2) put dev replace stuff into helpers since they're basically
> self-independant.
The cleanup looks good to me, still reviewable. There's some feedback
beyond trivial, please update the patchset and resend so I can add it to
for-next.
^ permalink raw reply [flat|nested] 20+ messages in thread