From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-yw0-f179.google.com ([209.85.161.179]:34437 "EHLO mail-yw0-f179.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752982AbcC2RbH (ORCPT ); Tue, 29 Mar 2016 13:31:07 -0400 Received: by mail-yw0-f179.google.com with SMTP id h129so27867521ywb.1 for ; Tue, 29 Mar 2016 10:31:06 -0700 (PDT) MIME-Version: 1.0 In-Reply-To: <1458610552-9845-12-git-send-email-quwenruo@cn.fujitsu.com> References: <1458610552-9845-1-git-send-email-quwenruo@cn.fujitsu.com> <1458610552-9845-12-git-send-email-quwenruo@cn.fujitsu.com> Date: Tue, 29 Mar 2016 19:31:06 +0200 Message-ID: Subject: Re: [PATCH v8 11/27] btrfs: dedupe: Introduce interfaces to resume and cleanup dedupe info From: Alex Lyakas To: Qu Wenruo Cc: linux-btrfs , Wang Xiaoguang Content-Type: text/plain; charset=UTF-8 Sender: linux-btrfs-owner@vger.kernel.org List-ID: Hi Qu, Wang, On Tue, Mar 22, 2016 at 3:35 AM, Qu Wenruo wrote: > Since we will introduce a new on-disk based dedupe method, introduce new > interfaces to resume previous dedupe setup. > > And since we introduce a new tree for status, also add disable handler > for it. > > Signed-off-by: Wang Xiaoguang > Signed-off-by: Qu Wenruo > --- > fs/btrfs/dedupe.c | 269 +++++++++++++++++++++++++++++++++++++++++++++++++---- > fs/btrfs/dedupe.h | 13 +++ > fs/btrfs/disk-io.c | 21 ++++- > fs/btrfs/disk-io.h | 1 + > 4 files changed, 283 insertions(+), 21 deletions(-) > > diff --git a/fs/btrfs/dedupe.c b/fs/btrfs/dedupe.c > index 7ef2c37..1112fec 100644 > --- a/fs/btrfs/dedupe.c > +++ b/fs/btrfs/dedupe.c > @@ -21,6 +21,8 @@ > #include "transaction.h" > #include "delayed-ref.h" > #include "qgroup.h" > +#include "disk-io.h" > +#include "locking.h" > > struct inmem_hash { > struct rb_node hash_node; > @@ -41,10 +43,103 @@ static inline struct inmem_hash *inmem_alloc_hash(u16 type) > GFP_NOFS); > } > > +static int init_dedupe_info(struct btrfs_dedupe_info **ret_info, u16 type, > + u16 backend, u64 blocksize, u64 limit) > +{ > + struct btrfs_dedupe_info *dedupe_info; > + > + dedupe_info = kzalloc(sizeof(*dedupe_info), GFP_NOFS); > + if (!dedupe_info) > + return -ENOMEM; > + > + dedupe_info->hash_type = type; > + dedupe_info->backend = backend; > + dedupe_info->blocksize = blocksize; > + dedupe_info->limit_nr = limit; > + > + /* only support SHA256 yet */ > + dedupe_info->dedupe_driver = crypto_alloc_shash("sha256", 0, 0); > + if (IS_ERR(dedupe_info->dedupe_driver)) { > + int ret; > + > + ret = PTR_ERR(dedupe_info->dedupe_driver); > + kfree(dedupe_info); > + return ret; > + } > + > + dedupe_info->hash_root = RB_ROOT; > + dedupe_info->bytenr_root = RB_ROOT; > + dedupe_info->current_nr = 0; > + INIT_LIST_HEAD(&dedupe_info->lru_list); > + mutex_init(&dedupe_info->lock); > + > + *ret_info = dedupe_info; > + return 0; > +} > + > +static int init_dedupe_tree(struct btrfs_fs_info *fs_info, > + struct btrfs_dedupe_info *dedupe_info) > +{ > + struct btrfs_root *dedupe_root; > + struct btrfs_key key; > + struct btrfs_path *path; > + struct btrfs_dedupe_status_item *status; > + struct btrfs_trans_handle *trans; > + int ret; > + > + path = btrfs_alloc_path(); > + if (!path) > + return -ENOMEM; > + > + trans = btrfs_start_transaction(fs_info->tree_root, 2); > + if (IS_ERR(trans)) { > + ret = PTR_ERR(trans); > + goto out; > + } > + dedupe_root = btrfs_create_tree(trans, fs_info, > + BTRFS_DEDUPE_TREE_OBJECTID); > + if (IS_ERR(dedupe_root)) { > + ret = PTR_ERR(dedupe_root); > + btrfs_abort_transaction(trans, fs_info->tree_root, ret); > + goto out; > + } > + dedupe_info->dedupe_root = dedupe_root; > + > + key.objectid = 0; > + key.type = BTRFS_DEDUPE_STATUS_ITEM_KEY; > + key.offset = 0; > + > + ret = btrfs_insert_empty_item(trans, dedupe_root, path, &key, > + sizeof(*status)); > + if (ret < 0) { > + btrfs_abort_transaction(trans, fs_info->tree_root, ret); > + goto out; > + } > + > + status = btrfs_item_ptr(path->nodes[0], path->slots[0], > + struct btrfs_dedupe_status_item); > + btrfs_set_dedupe_status_blocksize(path->nodes[0], status, > + dedupe_info->blocksize); > + btrfs_set_dedupe_status_limit(path->nodes[0], status, > + dedupe_info->limit_nr); > + btrfs_set_dedupe_status_hash_type(path->nodes[0], status, > + dedupe_info->hash_type); > + btrfs_set_dedupe_status_backend(path->nodes[0], status, > + dedupe_info->backend); > + btrfs_mark_buffer_dirty(path->nodes[0]); > +out: > + btrfs_free_path(path); > + if (ret == 0) > + btrfs_commit_transaction(trans, fs_info->tree_root); > + return ret; > +} > + > int btrfs_dedupe_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend, > u64 blocksize, u64 limit_nr) > { > struct btrfs_dedupe_info *dedupe_info; > + int create_tree; > + u64 compat_ro_flag = btrfs_super_compat_ro_flags(fs_info->super_copy); > u64 limit = limit_nr; > int ret = 0; > > @@ -63,6 +158,14 @@ int btrfs_dedupe_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend, > limit = BTRFS_DEDUPE_LIMIT_NR_DEFAULT; > if (backend == BTRFS_DEDUPE_BACKEND_ONDISK && limit_nr != 0) > limit = 0; > + /* Ondisk backend needs DEDUP RO compat feature */ > + if (!(compat_ro_flag & BTRFS_FEATURE_COMPAT_RO_DEDUPE) && > + backend == BTRFS_DEDUPE_BACKEND_ONDISK) > + return -EOPNOTSUPP; > + > + /* Meaningless and unable to enable dedupe for RO fs */ > + if (fs_info->sb->s_flags & MS_RDONLY) > + return -EROFS; > > dedupe_info = fs_info->dedupe_info; > if (dedupe_info) { > @@ -81,29 +184,71 @@ int btrfs_dedupe_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend, > return 0; > } > > + dedupe_info = NULL; > enable: > - dedupe_info = kzalloc(sizeof(*dedupe_info), GFP_NOFS); > - if (dedupe_info) > + create_tree = compat_ro_flag & BTRFS_FEATURE_COMPAT_RO_DEDUPE; > + > + ret = init_dedupe_info(&dedupe_info, type, backend, blocksize, limit); > + if (ret < 0) > + return ret; > + if (create_tree) { > + ret = init_dedupe_tree(fs_info, dedupe_info); > + if (ret < 0) > + goto out; > + } > + > + fs_info->dedupe_info = dedupe_info; I think this leaks memory. If previously we had a valid fs_info->dedupe_info, it will remain allocated. > + /* We must ensure dedupe_enabled is modified after dedupe_info */ > + smp_wmb(); > + fs_info->dedupe_enabled = 1; > +out: > + if (ret < 0) { > + crypto_free_shash(dedupe_info->dedupe_driver); > + kfree(dedupe_info); > + } > + return ret; > +} > + > +int btrfs_dedupe_resume(struct btrfs_fs_info *fs_info, > + struct btrfs_root *dedupe_root) > +{ > + struct btrfs_dedupe_info *dedupe_info; > + struct btrfs_dedupe_status_item *status; > + struct btrfs_key key; > + struct btrfs_path *path; > + u64 blocksize; > + u64 limit; > + u16 type; > + u16 backend; > + int ret = 0; > + > + path = btrfs_alloc_path(); > + if (!path) > return -ENOMEM; > > - dedupe_info->hash_type = type; > - dedupe_info->backend = backend; > - dedupe_info->blocksize = blocksize; > - dedupe_info->limit_nr = limit; > + key.objectid = 0; > + key.type = BTRFS_DEDUPE_STATUS_ITEM_KEY; > + key.offset = 0; > > - /* Only support SHA256 yet */ > - dedupe_info->dedupe_driver = crypto_alloc_shash("sha256", 0, 0); > - if (IS_ERR(dedupe_info->dedupe_driver)) { > - btrfs_err(fs_info, "failed to init sha256 driver"); > - ret = PTR_ERR(dedupe_info->dedupe_driver); > + ret = btrfs_search_slot(NULL, dedupe_root, &key, path, 0, 0); > + if (ret > 0) { > + ret = -ENOENT; > + goto out; > + } else if (ret < 0) { > goto out; > } > > - dedupe_info->hash_root = RB_ROOT; > - dedupe_info->bytenr_root = RB_ROOT; > - dedupe_info->current_nr = 0; > - INIT_LIST_HEAD(&dedupe_info->lru_list); > - mutex_init(&dedupe_info->lock); > + status = btrfs_item_ptr(path->nodes[0], path->slots[0], > + struct btrfs_dedupe_status_item); > + blocksize = btrfs_dedupe_status_blocksize(path->nodes[0], status); > + limit = btrfs_dedupe_status_limit(path->nodes[0], status); > + type = btrfs_dedupe_status_hash_type(path->nodes[0], status); > + backend = btrfs_dedupe_status_backend(path->nodes[0], status); > + > + ret = init_dedupe_info(&dedupe_info, type, backend, blocksize, limit); > + if (ret < 0) > + goto out; > + dedupe_info->dedupe_root = dedupe_root; > > fs_info->dedupe_info = dedupe_info; > /* We must ensure dedupe_enabled is modified after dedupe_info */ > @@ -111,11 +256,36 @@ enable: > fs_info->dedupe_enabled = 1; > > out: > - if (ret < 0) > - kfree(dedupe_info); > + btrfs_free_path(path); > return ret; > } > > +static void inmem_destroy(struct btrfs_dedupe_info *dedupe_info); > +int btrfs_dedupe_cleanup(struct btrfs_fs_info *fs_info) > +{ > + struct btrfs_dedupe_info *dedupe_info; > + > + fs_info->dedupe_enabled = 0; > + > + /* same as disable */ > + smp_wmb(); > + dedupe_info = fs_info->dedupe_info; > + fs_info->dedupe_info = NULL; > + > + if (!dedupe_info) > + return 0; > + > + if (dedupe_info->backend == BTRFS_DEDUPE_BACKEND_INMEMORY) > + inmem_destroy(dedupe_info); > + if (dedupe_info->dedupe_root) { > + free_root_extent_buffers(dedupe_info->dedupe_root); > + kfree(dedupe_info->dedupe_root); > + } > + crypto_free_shash(dedupe_info->dedupe_driver); > + kfree(dedupe_info); > + return 0; > +} > + > static int inmem_insert_hash(struct rb_root *root, > struct inmem_hash *hash, int hash_len) > { > @@ -325,6 +495,65 @@ static void inmem_destroy(struct btrfs_dedupe_info *dedupe_info) > mutex_unlock(&dedupe_info->lock); > } > > +static int remove_dedupe_tree(struct btrfs_root *dedupe_root) > +{ > + struct btrfs_trans_handle *trans; > + struct btrfs_fs_info *fs_info = dedupe_root->fs_info; > + struct btrfs_path *path; > + struct btrfs_key key; > + struct extent_buffer *node; > + int ret; > + int nr; > + > + path = btrfs_alloc_path(); > + if (!path) > + return -ENOMEM; > + trans = btrfs_start_transaction(fs_info->tree_root, 2); > + if (IS_ERR(trans)) { > + ret = PTR_ERR(trans); > + goto out; > + } > + > + path->leave_spinning = 1; > + key.objectid = 0; > + key.offset = 0; > + key.type = 0; > + > + while (1) { > + ret = btrfs_search_slot(trans, dedupe_root, &key, path, -1, 1); > + if (ret < 0) > + goto out; > + node = path->nodes[0]; > + nr = btrfs_header_nritems(node); > + if (nr == 0) { > + btrfs_release_path(path); > + break; > + } > + path->slots[0] = 0; > + ret = btrfs_del_items(trans, dedupe_root, path, 0, nr); > + if (ret) > + goto out; > + btrfs_release_path(path); > + } > + > + ret = btrfs_del_root(trans, fs_info->tree_root, &dedupe_root->root_key); > + if (ret) > + goto out; > + > + list_del(&dedupe_root->dirty_list); > + btrfs_tree_lock(dedupe_root->node); > + clean_tree_block(trans, fs_info, dedupe_root->node); > + btrfs_tree_unlock(dedupe_root->node); > + btrfs_free_tree_block(trans, dedupe_root, dedupe_root->node, 0, 1); > + free_extent_buffer(dedupe_root->node); > + free_extent_buffer(dedupe_root->commit_root); > + kfree(dedupe_root); > + ret = btrfs_commit_transaction(trans, fs_info->tree_root); > +out: > + btrfs_free_path(path); > + return ret; > +} > + > int btrfs_dedupe_disable(struct btrfs_fs_info *fs_info) > { > struct btrfs_dedupe_info *dedupe_info; > @@ -358,10 +587,12 @@ int btrfs_dedupe_disable(struct btrfs_fs_info *fs_info) > /* now we are OK to clean up everything */ > if (dedupe_info->backend == BTRFS_DEDUPE_BACKEND_INMEMORY) > inmem_destroy(dedupe_info); > + if (dedupe_info->dedupe_root) > + ret = remove_dedupe_tree(dedupe_info->dedupe_root); > > crypto_free_shash(dedupe_info->dedupe_driver); > kfree(dedupe_info); > - return 0; > + return ret; > } > > /* > diff --git a/fs/btrfs/dedupe.h b/fs/btrfs/dedupe.h > index 537f0b8..120e630 100644 > --- a/fs/btrfs/dedupe.h > +++ b/fs/btrfs/dedupe.h > @@ -112,6 +112,19 @@ int btrfs_dedupe_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend, > */ > int btrfs_dedupe_disable(struct btrfs_fs_info *fs_info); > > + /* > + * Restore previous dedupe setup from disk > + * Called at mount time > + */ > +int btrfs_dedupe_resume(struct btrfs_fs_info *fs_info, > + struct btrfs_root *dedupe_root); > + > +/* > + * Cleanup current btrfs_dedupe_info > + * Called in umount time > + */ > +int btrfs_dedupe_cleanup(struct btrfs_fs_info *fs_info); > + > /* > * Calculate hash for dedup. > * Caller must ensure [start, start + dedupe_bs) has valid data. > diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c > index 57ae928..44d098d 100644 > --- a/fs/btrfs/disk-io.c > +++ b/fs/btrfs/disk-io.c > @@ -51,6 +51,7 @@ > #include "sysfs.h" > #include "qgroup.h" > #include "compression.h" > +#include "dedupe.h" > > #ifdef CONFIG_X86 > #include > @@ -2156,7 +2157,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) > btrfs_destroy_workqueue(fs_info->extent_workers); > } > > -static void free_root_extent_buffers(struct btrfs_root *root) > +void free_root_extent_buffers(struct btrfs_root *root) > { > if (root) { > free_extent_buffer(root->node); > @@ -2490,7 +2491,21 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info, > fs_info->free_space_root = root; > } > > - return 0; > + location.objectid = BTRFS_DEDUPE_TREE_OBJECTID; > + root = btrfs_read_tree_root(tree_root, &location); > + if (IS_ERR(root)) { > + ret = PTR_ERR(root); > + if (ret != -ENOENT) > + return ret; > + return 0; > + } > + set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); > + ret = btrfs_dedupe_resume(fs_info, root); > + if (ret < 0) { > + free_root_extent_buffers(root); > + kfree(root); > + } > + return ret; > } > > int open_ctree(struct super_block *sb, > @@ -3885,6 +3900,8 @@ void close_ctree(struct btrfs_root *root) > > btrfs_free_qgroup_config(fs_info); > > + btrfs_dedupe_cleanup(fs_info); > + > if (percpu_counter_sum(&fs_info->delalloc_bytes)) { > btrfs_info(fs_info, "at unmount delalloc count %lld", > percpu_counter_sum(&fs_info->delalloc_bytes)); > diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h > index 8e79d00..42c4ff2 100644 > --- a/fs/btrfs/disk-io.h > +++ b/fs/btrfs/disk-io.h > @@ -70,6 +70,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, > int btrfs_init_fs_root(struct btrfs_root *root); > int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, > struct btrfs_root *root); > +void free_root_extent_buffers(struct btrfs_root *root); > void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); > > struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, > -- > 2.7.3 > > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html