* [PATCH v5 01/19] btrfs: dedup: Introduce dedup framework and its header
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 02/19] btrfs: dedup: Introduce function to initialize dedup info Qu Wenruo
` (17 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Introduce the header for btrfs online(write time) de-duplication
framework and needed header.
The new de-duplication framework is going to support 2 different dedup
method and 1 dedup hash.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/ctree.h | 5 ++
fs/btrfs/dedup.h | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/disk-io.c | 2 +
3 files changed, 164 insertions(+)
create mode 100644 fs/btrfs/dedup.h
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a949664..034216e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1860,6 +1860,11 @@ struct btrfs_fs_info {
struct list_head pinned_chunks;
int creating_free_space_tree;
+
+ /* reference to inband de-duplication info */
+ struct btrfs_dedup_info *dedup_info;
+ spinlock_t dedup_ref_lock;
+ struct mutex dedup_ioctl_lock;
};
struct btrfs_subvolume_writers {
diff --git a/fs/btrfs/dedup.h b/fs/btrfs/dedup.h
new file mode 100644
index 0000000..d4f072d
--- /dev/null
+++ b/fs/btrfs/dedup.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2015 Fujitsu. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_DEDUP__
+#define __BTRFS_DEDUP__
+
+#include <linux/btrfs.h>
+#include <linux/wait.h>
+#include <crypto/hash.h>
+
+/*
+ * Dedup storage backend
+ * On disk is persist storage but overhead is large
+ * In memory is fast but will lose all its hash on umount
+ */
+#define BTRFS_DEDUP_BACKEND_INMEMORY 0
+#define BTRFS_DEDUP_BACKEND_ONDISK 1
+#define BTRFS_DEDUP_BACKEND_LAST 2
+
+/* Dedup block size limit and default value */
+#define BTRFS_DEDUP_BLOCKSIZE_MAX (8 * 1024 * 1024)
+#define BTRFS_DEDUP_BLOCKSIZE_MIN (16 * 1024)
+#define BTRFS_DEDUP_BLOCKSIZE_DEFAULT (128 * 1024)
+
+/* Hash algorithm, only support SHA256 yet */
+#define BTRFS_DEDUP_HASH_SHA256 0
+
+static int btrfs_dedup_sizes[] = { 32 };
+
+/*
+ * For caller outside of dedup.c
+ *
+ * Different dedup backends should have their own hash structure
+ */
+struct btrfs_dedup_hash {
+ u64 bytenr;
+ u32 num_bytes;
+
+ /* last field is a variable length array of dedup hash */
+ u8 hash[];
+};
+
+struct btrfs_dedup_info {
+ /* dedup blocksize */
+ u64 blocksize;
+ u16 backend;
+ u16 hash_type;
+
+ struct crypto_shash *dedup_driver;
+ struct mutex lock;
+
+ /* To wait for all existing callers */
+ atomic_t refs;
+ wait_queue_head_t refs_wq;
+
+ /* following members are only used in in-memory dedup mode */
+ struct rb_root hash_root;
+ struct rb_root bytenr_root;
+ struct list_head lru_list;
+ u64 limit_nr;
+ u64 current_nr;
+};
+
+struct btrfs_trans_handle;
+
+int btrfs_dedup_hash_size(u16 type);
+struct btrfs_dedup_hash *btrfs_dedup_alloc_hash(u16 type);
+
+/*
+ * Initial inband dedup info
+ * Called at dedup enable time.
+ */
+int btrfs_dedup_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend,
+ u64 blocksize, u64 limit_nr);
+
+/*
+ * Disable dedup and invalidate all its dedup data.
+ * Called at dedup disable time.
+ */
+int btrfs_dedup_disable(struct btrfs_fs_info *fs_info);
+
+/*
+ * Caller need to grab a valid dedup_info by this function,
+ * not grab it from fs_info directly.
+ */
+static inline struct btrfs_dedup_info *
+btrfs_dedup_get_info(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_dedup_info *dedup_info;
+
+ spin_lock(&fs_info->dedup_ref_lock);
+ dedup_info = fs_info->dedup_info;
+ if (dedup_info)
+ atomic_inc(&dedup_info->refs);
+ spin_unlock(&fs_info->dedup_ref_lock);
+
+ return dedup_info;
+}
+
+static inline void btrfs_dedup_put_info(struct btrfs_dedup_info *dedup_info)
+{
+ if (!dedup_info)
+ return;
+ atomic_dec(&dedup_info->refs);
+ wake_up(&dedup_info->refs_wq);
+}
+
+/*
+ * Calculate hash for dedup.
+ * Caller must ensure [start, start + dedup_bs) has valid data.
+ */
+int btrfs_dedup_calc_hash(struct btrfs_dedup_info *dedup_info,
+ struct inode *inode, u64 start,
+ struct btrfs_dedup_hash *hash);
+
+/*
+ * Search for duplicated extents by calculated hash
+ * Caller must call btrfs_dedup_calc_hash() first to get the hash.
+ *
+ * @inode: the inode for we are writing
+ * @file_pos: offset inside the inode
+ * As we will increase extent ref immediately after a hash match,
+ * we need @file_pos and @inode in this case.
+ *
+ * Return > 0 for a hash match, and the extent ref will be
+ * *INCREASED*, and hash->bytenr/num_bytes will record the existing
+ * extent data.
+ * Return 0 for a hash miss. Nothing is done
+ */
+int btrfs_dedup_search(struct btrfs_dedup_info *dedup_info,
+ struct inode *inode, u64 file_pos,
+ struct btrfs_dedup_hash *hash);
+
+/* Add a dedup hash into dedup info */
+int btrfs_dedup_add(struct btrfs_trans_handle *trans,
+ struct btrfs_dedup_info *dedup_info,
+ struct btrfs_dedup_hash *hash);
+
+/* Remove a dedup hash from dedup info */
+int btrfs_dedup_del(struct btrfs_trans_handle *trans,
+ struct btrfs_dedup_info *dedup_info, u64 bytenr);
+#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 50bed6c..84825e5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2575,12 +2575,14 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
+ spin_lock_init(&fs_info->dedup_ref_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex);
mutex_init(&fs_info->delete_unused_bgs_mutex);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
mutex_init(&fs_info->cleaner_delayed_iput_mutex);
+ mutex_init(&fs_info->dedup_ioctl_lock);
seqlock_init(&fs_info->profiles_lock);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 02/19] btrfs: dedup: Introduce function to initialize dedup info
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 01/19] btrfs: dedup: Introduce dedup framework and its header Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 03/19] btrfs: dedup: Introduce function to add hash into in-memory tree Qu Wenruo
` (16 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Add generic function to initialize dedup info.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/Makefile | 2 +-
fs/btrfs/dedup.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/dedup.h | 17 ++++++++--
3 files changed, 116 insertions(+), 3 deletions(-)
create mode 100644 fs/btrfs/dedup.c
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 128ce17..a6207ff 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
- uuid-tree.o props.o hash.o free-space-tree.o
+ uuid-tree.o props.o hash.o free-space-tree.o dedup.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/dedup.c b/fs/btrfs/dedup.c
new file mode 100644
index 0000000..b37db50
--- /dev/null
+++ b/fs/btrfs/dedup.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2015 Fujitsu. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include "ctree.h"
+#include "dedup.h"
+#include "btrfs_inode.h"
+#include "transaction.h"
+#include "delayed-ref.h"
+
+int btrfs_dedup_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend,
+ u64 blocksize, u64 limit_nr)
+{
+ struct btrfs_dedup_info *dedup_info;
+ u64 limit = limit_nr;
+ int ret = 0;
+
+ /* Sanity check */
+ if (blocksize > BTRFS_DEDUP_BLOCKSIZE_MAX ||
+ blocksize < BTRFS_DEDUP_BLOCKSIZE_MIN ||
+ blocksize < fs_info->tree_root->sectorsize ||
+ !is_power_of_2(blocksize))
+ return -EINVAL;
+ if (type >= ARRAY_SIZE(btrfs_dedup_sizes))
+ return -EINVAL;
+ if (backend >= BTRFS_DEDUP_BACKEND_LAST)
+ return -EINVAL;
+
+ if (backend == BTRFS_DEDUP_BACKEND_INMEMORY && limit_nr == 0)
+ limit = 4096; /* default value */
+ if (backend == BTRFS_DEDUP_BACKEND_ONDISK && limit_nr != 0)
+ limit = 0;
+
+ dedup_info = btrfs_dedup_get_info(fs_info);
+ if (dedup_info) {
+
+ /* Check if we are re-enable for different dedup config */
+ if (dedup_info->blocksize != blocksize ||
+ dedup_info->hash_type != type ||
+ dedup_info->backend != backend) {
+ btrfs_dedup_put_info(dedup_info);
+ btrfs_dedup_disable(fs_info);
+ goto enable;
+ }
+
+ /* On-fly limit change is OK */
+ mutex_lock(&dedup_info->lock);
+ fs_info->dedup_info->limit_nr = limit;
+ mutex_unlock(&dedup_info->lock);
+ btrfs_dedup_put_info(dedup_info);
+ return 0;
+ }
+
+enable:
+ dedup_info = kzalloc(sizeof(*dedup_info), GFP_NOFS);
+ if (dedup_info)
+ return -ENOMEM;
+
+ dedup_info->hash_type = type;
+ dedup_info->backend = backend;
+ dedup_info->blocksize = blocksize;
+ dedup_info->limit_nr = limit;
+
+ /* Only support SHA256 yet */
+ dedup_info->dedup_driver = crypto_alloc_shash("sha256", 0, 0);
+ if (IS_ERR(dedup_info->dedup_driver)) {
+ btrfs_err(fs_info, "failed to init sha256 driver");
+ ret = PTR_ERR(dedup_info->dedup_driver);
+ goto out;
+ }
+
+ dedup_info->hash_root = RB_ROOT;
+ dedup_info->bytenr_root = RB_ROOT;
+ dedup_info->current_nr = 0;
+ INIT_LIST_HEAD(&dedup_info->lru_list);
+ mutex_init(&dedup_info->lock);
+ init_waitqueue_head(&dedup_info->refs_wq);
+ atomic_set(&dedup_info->refs, 0);
+
+ spin_lock(&fs_info->dedup_ref_lock);
+ fs_info->dedup_info = dedup_info;
+ spin_unlock(&fs_info->dedup_ref_lock);
+out:
+ if (ret < 0)
+ kfree(dedup_info);
+ return ret;
+}
diff --git a/fs/btrfs/dedup.h b/fs/btrfs/dedup.h
index d4f072d..8fed1ce 100644
--- a/fs/btrfs/dedup.h
+++ b/fs/btrfs/dedup.h
@@ -37,6 +37,9 @@
#define BTRFS_DEDUP_BLOCKSIZE_MIN (16 * 1024)
#define BTRFS_DEDUP_BLOCKSIZE_DEFAULT (128 * 1024)
+/* Default dedup limit on number of hash */
+#define BTRFS_DEDUP_LIMIT_NR_DEFAULT (32 * 1024)
+
/* Hash algorithm, only support SHA256 yet */
#define BTRFS_DEDUP_HASH_SHA256 0
@@ -78,8 +81,18 @@ struct btrfs_dedup_info {
struct btrfs_trans_handle;
-int btrfs_dedup_hash_size(u16 type);
-struct btrfs_dedup_hash *btrfs_dedup_alloc_hash(u16 type);
+static inline int btrfs_dedup_hash_size(u16 type)
+{
+ if (WARN_ON(type >= ARRAY_SIZE(btrfs_dedup_sizes)))
+ return -EINVAL;
+ return sizeof(struct btrfs_dedup_hash) + btrfs_dedup_sizes[type];
+}
+
+static inline struct btrfs_dedup_hash *btrfs_dedup_alloc_hash(u16 type)
+{
+ return kzalloc(btrfs_dedup_hash_size(type), GFP_NOFS);
+}
+
/*
* Initial inband dedup info
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 03/19] btrfs: dedup: Introduce function to add hash into in-memory tree
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 01/19] btrfs: dedup: Introduce dedup framework and its header Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 02/19] btrfs: dedup: Introduce function to initialize dedup info Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 04/19] btrfs: dedup: Introduce function to remove hash from " Qu Wenruo
` (15 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Introduce static function inmem_add() to add hash into in-memory tree.
And now we can implement the btrfs_dedup_add() interface.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/dedup.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 157 insertions(+)
diff --git a/fs/btrfs/dedup.c b/fs/btrfs/dedup.c
index b37db50..b6bff0a 100644
--- a/fs/btrfs/dedup.c
+++ b/fs/btrfs/dedup.c
@@ -21,6 +21,25 @@
#include "transaction.h"
#include "delayed-ref.h"
+struct inmem_hash {
+ struct rb_node hash_node;
+ struct rb_node bytenr_node;
+ struct list_head lru_list;
+
+ u64 bytenr;
+ u32 num_bytes;
+
+ u8 hash[];
+};
+
+static inline struct inmem_hash *inmem_alloc_hash(u16 type)
+{
+ if (WARN_ON(type >= ARRAY_SIZE(btrfs_dedup_sizes)))
+ return NULL;
+ return kzalloc(sizeof(struct inmem_hash) + btrfs_dedup_sizes[type],
+ GFP_NOFS);
+}
+
int btrfs_dedup_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend,
u64 blocksize, u64 limit_nr)
{
@@ -98,3 +117,141 @@ out:
kfree(dedup_info);
return ret;
}
+
+static int inmem_insert_hash(struct rb_root *root,
+ struct inmem_hash *hash, int hash_len)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct inmem_hash *entry = NULL;
+
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct inmem_hash, hash_node);
+ if (memcmp(hash->hash, entry->hash, hash_len) < 0)
+ p = &(*p)->rb_left;
+ else if (memcmp(hash->hash, entry->hash, hash_len) > 0)
+ p = &(*p)->rb_right;
+ else
+ return 1;
+ }
+ rb_link_node(&hash->hash_node, parent, p);
+ rb_insert_color(&hash->hash_node, root);
+ return 0;
+}
+
+static int inmem_insert_bytenr(struct rb_root *root,
+ struct inmem_hash *hash)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct inmem_hash *entry = NULL;
+
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct inmem_hash, bytenr_node);
+ if (hash->bytenr < entry->bytenr)
+ p = &(*p)->rb_left;
+ else if (hash->bytenr > entry->bytenr)
+ p = &(*p)->rb_right;
+ else
+ return 1;
+ }
+ rb_link_node(&hash->bytenr_node, parent, p);
+ rb_insert_color(&hash->bytenr_node, root);
+ return 0;
+}
+
+static void __inmem_del(struct btrfs_dedup_info *dedup_info,
+ struct inmem_hash *hash)
+{
+ list_del(&hash->lru_list);
+ rb_erase(&hash->hash_node, &dedup_info->hash_root);
+ rb_erase(&hash->bytenr_node, &dedup_info->bytenr_root);
+
+ if (!WARN_ON(dedup_info->current_nr == 0))
+ dedup_info->current_nr--;
+
+ kfree(hash);
+}
+
+/*
+ * Insert a hash into in-memory dedup tree
+ * Will remove exceeding last recent use hash.
+ *
+ * If the hash mathced with existing one, we won't insert it, to
+ * save memory
+ */
+static int inmem_add(struct btrfs_dedup_info *dedup_info,
+ struct btrfs_dedup_hash *hash)
+{
+ int ret = 0;
+ u16 type = dedup_info->hash_type;
+ struct inmem_hash *ihash;
+
+ ihash = inmem_alloc_hash(type);
+
+ if (!ihash)
+ return -ENOMEM;
+
+ /* Copy the data out */
+ ihash->bytenr = hash->bytenr;
+ ihash->num_bytes = hash->num_bytes;
+ memcpy(ihash->hash, hash->hash, btrfs_dedup_sizes[type]);
+
+ mutex_lock(&dedup_info->lock);
+
+ ret = inmem_insert_bytenr(&dedup_info->bytenr_root, ihash);
+ if (ret > 0) {
+ kfree(ihash);
+ ret = 0;
+ goto out;
+ }
+
+ ret = inmem_insert_hash(&dedup_info->hash_root, ihash,
+ btrfs_dedup_sizes[type]);
+ if (ret > 0) {
+ /*
+ * We only keep one hash in tree to save memory, so if
+ * hash conflicts, free the one to insert.
+ */
+ rb_erase(&ihash->bytenr_node, &dedup_info->bytenr_root);
+ kfree(ihash);
+ ret = 0;
+ goto out;
+ }
+
+ list_add(&ihash->lru_list, &dedup_info->lru_list);
+ dedup_info->current_nr++;
+
+ /* Remove the last dedup hash if we exceed limit */
+ while (dedup_info->current_nr > dedup_info->limit_nr) {
+ struct inmem_hash *last;
+
+ last = list_entry(dedup_info->lru_list.prev,
+ struct inmem_hash, lru_list);
+ __inmem_del(dedup_info, last);
+ }
+out:
+ mutex_unlock(&dedup_info->lock);
+ return 0;
+}
+
+int btrfs_dedup_add(struct btrfs_trans_handle *trans,
+ struct btrfs_dedup_info *dedup_info,
+ struct btrfs_dedup_hash *hash)
+{
+ if (!dedup_info || !hash)
+ return 0;
+
+ /* ignore old hash */
+ if (dedup_info->blocksize != hash->num_bytes)
+ return 0;
+
+ if (WARN_ON(hash->bytenr == 0))
+ return -EINVAL;
+
+ if (dedup_info->backend == BTRFS_DEDUP_BACKEND_INMEMORY)
+ return inmem_add(dedup_info, hash);
+ return -EINVAL;
+}
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 04/19] btrfs: dedup: Introduce function to remove hash from in-memory tree
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (2 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 03/19] btrfs: dedup: Introduce function to add hash into in-memory tree Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 05/19] btrfs: delayed-ref: Add support for increasing data ref under spinlock Qu Wenruo
` (14 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Introduce static function inmem_del() to remove hash from in-memory
dedup tree.
And implement btrfs_dedup_del() and btrfs_dedup_destroy() interfaces.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/dedup.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 88 insertions(+)
diff --git a/fs/btrfs/dedup.c b/fs/btrfs/dedup.c
index b6bff0a..57c9dfe 100644
--- a/fs/btrfs/dedup.c
+++ b/fs/btrfs/dedup.c
@@ -255,3 +255,91 @@ int btrfs_dedup_add(struct btrfs_trans_handle *trans,
return inmem_add(dedup_info, hash);
return -EINVAL;
}
+
+static struct inmem_hash *
+inmem_search_bytenr(struct btrfs_dedup_info *dedup_info, u64 bytenr)
+{
+ struct rb_node **p = &dedup_info->bytenr_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct inmem_hash *entry = NULL;
+
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct inmem_hash, bytenr_node);
+
+ if (bytenr < entry->bytenr)
+ p = &(*p)->rb_left;
+ else if (bytenr > entry->bytenr)
+ p = &(*p)->rb_right;
+ else
+ return entry;
+ }
+
+ return NULL;
+}
+
+/* Delete a hash from in-memory dedup tree */
+static int inmem_del(struct btrfs_dedup_info *dedup_info, u64 bytenr)
+{
+ struct inmem_hash *hash;
+
+ mutex_lock(&dedup_info->lock);
+ hash = inmem_search_bytenr(dedup_info, bytenr);
+ if (!hash) {
+ mutex_unlock(&dedup_info->lock);
+ return 0;
+ }
+
+ __inmem_del(dedup_info, hash);
+ mutex_unlock(&dedup_info->lock);
+ return 0;
+}
+
+/* Remove a dedup hash from dedup tree */
+int btrfs_dedup_del(struct btrfs_trans_handle *trans,
+ struct btrfs_dedup_info *dedup_info, u64 bytenr)
+{
+ if (!dedup_info)
+ return 0;
+
+ if (dedup_info->backend == BTRFS_DEDUP_BACKEND_INMEMORY)
+ return inmem_del(dedup_info, bytenr);
+ return -EINVAL;
+}
+
+static void inmem_destroy(struct btrfs_dedup_info *dedup_info)
+{
+ struct inmem_hash *entry, *tmp;
+
+ mutex_lock(&dedup_info->lock);
+ list_for_each_entry_safe(entry, tmp, &dedup_info->lru_list, lru_list)
+ __inmem_del(dedup_info, entry);
+ mutex_unlock(&dedup_info->lock);
+}
+
+int btrfs_dedup_disable(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_dedup_info *dedup_info;
+
+ /* Here we don't want to increase refs of dedup_info */
+ spin_lock(&fs_info->dedup_ref_lock);
+ dedup_info = fs_info->dedup_info;
+
+ /* Block other caller from deduping */
+ fs_info->dedup_info = NULL;
+ spin_unlock(&fs_info->dedup_ref_lock);
+
+ if (!dedup_info)
+ return 0;
+
+ /* Wait all existing callers exit */
+ wait_event(dedup_info->refs_wq, atomic_read(&dedup_info->refs) == 0);
+
+ /* now we are OK to clean up everything */
+ if (dedup_info->backend == BTRFS_DEDUP_BACKEND_INMEMORY)
+ inmem_destroy(dedup_info);
+
+ crypto_free_shash(dedup_info->dedup_driver);
+ kfree(dedup_info);
+ return 0;
+}
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 05/19] btrfs: delayed-ref: Add support for increasing data ref under spinlock
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (3 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 04/19] btrfs: dedup: Introduce function to remove hash from " Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 06/19] btrfs: dedup: Introduce function to search for an existing hash Qu Wenruo
` (13 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs
For in-band dedup, btrfs needs to increase data ref with delayed_ref
locked, so add a new function btrfs_add_delayed_data_ref_lock() to
increase extent ref with delayed_refs already locked.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
fs/btrfs/dedup.c | 1 +
fs/btrfs/delayed-ref.c | 30 +++++++++++++++++++++++-------
fs/btrfs/delayed-ref.h | 8 ++++++++
3 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/fs/btrfs/dedup.c b/fs/btrfs/dedup.c
index 57c9dfe..be93cf3 100644
--- a/fs/btrfs/dedup.c
+++ b/fs/btrfs/dedup.c
@@ -20,6 +20,7 @@
#include "btrfs_inode.h"
#include "transaction.h"
#include "delayed-ref.h"
+#include "qgroup.h"
struct inmem_hash {
struct rb_node hash_node;
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 914ac13..1091810 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -805,6 +805,26 @@ free_ref:
}
/*
+ * Do real delayed data ref insert.
+ * Caller must hold delayed_refs->lock and allocation memory
+ * for dref,head_ref and record.
+ */
+void btrfs_add_delayed_data_ref_locked(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_data_ref *dref,
+ struct btrfs_delayed_ref_head *head_ref,
+ struct btrfs_qgroup_extent_record *qrecord,
+ u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
+ u64 owner, u64 offset, u64 reserved, int action)
+{
+ head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+ qrecord, bytenr, num_bytes, ref_root, reserved,
+ action, 1);
+ add_delayed_data_ref(fs_info, trans, head_ref, &dref->node, bytenr,
+ num_bytes, parent, ref_root, owner, offset, action);
+}
+
+/*
* add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
*/
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
@@ -849,13 +869,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
- bytenr, num_bytes, ref_root, reserved,
- action, 1);
-
- add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
- num_bytes, parent, ref_root, owner, offset,
- action);
+ btrfs_add_delayed_data_ref_locked(fs_info, trans, ref, head_ref, record,
+ bytenr, num_bytes, parent, ref_root, owner, offset,
+ reserved, action);
spin_unlock(&delayed_refs->lock);
return 0;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index c24b653..2765858 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -239,11 +239,19 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
}
}
+struct btrfs_qgroup_extent_record;
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op);
+void btrfs_add_delayed_data_ref_locked(struct btrfs_fs_info *fs_info,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_delayed_data_ref *dref,
+ struct btrfs_delayed_ref_head *head_ref,
+ struct btrfs_qgroup_extent_record *qrecord,
+ u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root,
+ u64 owner, u64 offset, u64 reserved, int action);
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 06/19] btrfs: dedup: Introduce function to search for an existing hash
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (4 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 05/19] btrfs: delayed-ref: Add support for increasing data ref under spinlock Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 07/19] btrfs: dedup: Implement btrfs_dedup_calc_hash interface Qu Wenruo
` (12 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Introduce static function inmem_search() to handle the job for in-memory
hash tree.
The trick is, we must ensure the delayed ref head is not being run at
the time we search the for the hash.
With inmem_search(), we can implement the btrfs_dedup_search()
interface.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/dedup.c | 172 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 172 insertions(+)
diff --git a/fs/btrfs/dedup.c b/fs/btrfs/dedup.c
index be93cf3..20749ce 100644
--- a/fs/btrfs/dedup.c
+++ b/fs/btrfs/dedup.c
@@ -344,3 +344,175 @@ int btrfs_dedup_disable(struct btrfs_fs_info *fs_info)
kfree(dedup_info);
return 0;
}
+
+/*
+ * Caller must ensure the corresponding ref head is not being run.
+ */
+static struct inmem_hash *
+inmem_search_hash(struct btrfs_dedup_info *dedup_info, u8 *hash)
+{
+ struct rb_node **p = &dedup_info->hash_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct inmem_hash *entry = NULL;
+ u16 hash_type = dedup_info->hash_type;
+ int hash_len = btrfs_dedup_sizes[hash_type];
+
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct inmem_hash, hash_node);
+
+ if (memcmp(hash, entry->hash, hash_len) < 0) {
+ p = &(*p)->rb_left;
+ } else if (memcmp(hash, entry->hash, hash_len) > 0) {
+ p = &(*p)->rb_right;
+ } else {
+ /* Found, need to re-add it to LRU list head */
+ list_del(&entry->lru_list);
+ list_add(&entry->lru_list, &dedup_info->lru_list);
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+static int inmem_search(struct btrfs_dedup_info *dedup_info,
+ struct inode *inode, u64 file_pos,
+ struct btrfs_dedup_hash *hash)
+{
+ int ret;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_delayed_ref_head *head;
+ struct btrfs_delayed_ref_head *insert_head;
+ struct btrfs_delayed_data_ref *insert_dref;
+ struct btrfs_qgroup_extent_record *insert_qrecord = NULL;
+ struct inmem_hash *found_hash;
+ int free_insert = 1;
+ u64 bytenr;
+ u32 num_bytes;
+
+ insert_head = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
+ if (!insert_head)
+ return -ENOMEM;
+ insert_head->extent_op = NULL;
+ insert_dref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
+ if (!insert_dref) {
+ kmem_cache_free(btrfs_delayed_ref_head_cachep, insert_head);
+ return -ENOMEM;
+ }
+ if (root->fs_info->quota_enabled &&
+ is_fstree(root->root_key.objectid)) {
+ insert_qrecord = kmalloc(sizeof(*insert_qrecord), GFP_NOFS);
+ if (!insert_qrecord) {
+ kmem_cache_free(btrfs_delayed_ref_head_cachep,
+ insert_head);
+ kmem_cache_free(btrfs_delayed_data_ref_cachep,
+ insert_dref);
+ return -ENOMEM;
+ }
+ }
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto free_mem;
+ }
+
+again:
+ mutex_lock(&dedup_info->lock);
+ found_hash = inmem_search_hash(dedup_info, hash->hash);
+ /* If we don't find a duplicated extent, just return. */
+ if (!found_hash) {
+ ret = 0;
+ goto out;
+ }
+ bytenr = found_hash->bytenr;
+ num_bytes = found_hash->num_bytes;
+
+ delayed_refs = &trans->transaction->delayed_refs;
+
+ spin_lock(&delayed_refs->lock);
+ head = btrfs_find_delayed_ref_head(trans, bytenr);
+ if (!head) {
+ /*
+ * We can safely insert a new delayed_ref as long as we
+ * hold delayed_refs->lock.
+ * Only need to use atomic inc_extent_ref()
+ */
+ btrfs_add_delayed_data_ref_locked(root->fs_info, trans,
+ insert_dref, insert_head, insert_qrecord,
+ bytenr, num_bytes, 0, root->root_key.objectid,
+ btrfs_ino(inode), file_pos, 0,
+ BTRFS_ADD_DELAYED_REF);
+ spin_unlock(&delayed_refs->lock);
+
+ /* add_delayed_data_ref_locked will free unused memory */
+ free_insert = 0;
+ hash->bytenr = bytenr;
+ hash->num_bytes = num_bytes;
+ ret = 1;
+ goto out;
+ }
+
+ /*
+ * We can't lock ref head with dedup_info->lock hold or we will cause
+ * ABBA dead lock.
+ */
+ mutex_unlock(&dedup_info->lock);
+ ret = btrfs_delayed_ref_lock(trans, head);
+ spin_unlock(&delayed_refs->lock);
+ if (ret == -EAGAIN)
+ goto again;
+
+ mutex_lock(&dedup_info->lock);
+ /* Search again to ensure the hash is still here */
+ found_hash = inmem_search_hash(dedup_info, hash->hash);
+ if (!found_hash) {
+ ret = 0;
+ mutex_unlock(&head->mutex);
+ goto out;
+ }
+ hash->bytenr = bytenr;
+ hash->num_bytes = num_bytes;
+
+ /*
+ * Increase the extent ref right now, to avoid delayed ref run
+ * Or we may increase ref on non-exist extent.
+ */
+ btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
+ root->root_key.objectid,
+ btrfs_ino(inode), file_pos);
+ mutex_unlock(&head->mutex);
+out:
+ mutex_unlock(&dedup_info->lock);
+ btrfs_end_transaction(trans, root);
+
+free_mem:
+ if (free_insert) {
+ kmem_cache_free(btrfs_delayed_ref_head_cachep, insert_head);
+ kmem_cache_free(btrfs_delayed_data_ref_cachep, insert_dref);
+ kfree(insert_qrecord);
+ }
+ return ret;
+}
+
+int btrfs_dedup_search(struct btrfs_dedup_info *dedup_info,
+ struct inode *inode, u64 file_pos,
+ struct btrfs_dedup_hash *hash)
+{
+ int ret = -EINVAL;
+
+ if (WARN_ON(!dedup_info || !hash))
+ return 0;
+
+ if (dedup_info->backend == BTRFS_DEDUP_BACKEND_INMEMORY)
+ ret = inmem_search(dedup_info, inode, file_pos, hash);
+
+ /* It's possible hash->bytenr/num_bytenr already changed */
+ if (ret == 0) {
+ hash->num_bytes = 0;
+ hash->bytenr = 0;
+ }
+ return ret;
+}
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 07/19] btrfs: dedup: Implement btrfs_dedup_calc_hash interface
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (5 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 06/19] btrfs: dedup: Introduce function to search for an existing hash Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 08/19] btrfs: ordered-extent: Add support for dedup Qu Wenruo
` (11 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Unlike in-memory or on-disk dedup method, only SHA256 hash method is
supported yet, so implement btrfs_dedup_calc_hash() interface using
SHA256.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/dedup.c | 43 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 43 insertions(+)
diff --git a/fs/btrfs/dedup.c b/fs/btrfs/dedup.c
index 20749ce..f6b518a 100644
--- a/fs/btrfs/dedup.c
+++ b/fs/btrfs/dedup.c
@@ -516,3 +516,46 @@ int btrfs_dedup_search(struct btrfs_dedup_info *dedup_info,
}
return ret;
}
+
+int btrfs_dedup_calc_hash(struct btrfs_dedup_info *dedup_info,
+ struct inode *inode, u64 start,
+ struct btrfs_dedup_hash *hash)
+{
+ int i;
+ int ret;
+ struct page *p;
+ struct crypto_shash *tfm = dedup_info->dedup_driver;
+ struct {
+ struct shash_desc desc;
+ char ctx[crypto_shash_descsize(tfm)];
+ } sdesc;
+ u64 dedup_bs = dedup_info->blocksize;
+ u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
+
+ if (!dedup_info || !hash)
+ return 0;
+
+ WARN_ON(!IS_ALIGNED(start, sectorsize));
+
+ sdesc.desc.tfm = tfm;
+ sdesc.desc.flags = 0;
+ ret = crypto_shash_init(&sdesc.desc);
+ if (ret)
+ return ret;
+ for (i = 0; sectorsize * i < dedup_bs; i++) {
+ char *d;
+
+ p = find_get_page(inode->i_mapping,
+ (start >> PAGE_CACHE_SHIFT) + i);
+ if (WARN_ON(!p))
+ return -ENOENT;
+ d = kmap(p);
+ ret = crypto_shash_update(&sdesc.desc, d, sectorsize);
+ kunmap(p);
+ page_cache_release(p);
+ if (ret)
+ return ret;
+ }
+ ret = crypto_shash_final(&sdesc.desc, hash->hash);
+ return ret;
+}
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 08/19] btrfs: ordered-extent: Add support for dedup
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (6 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 07/19] btrfs: dedup: Implement btrfs_dedup_calc_hash interface Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 09/19] btrfs: dedup: Inband in-memory only de-duplication implement Qu Wenruo
` (10 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Add ordered-extent support for dedup.
Note, current ordered-extent support only supports non-compressed source
extent.
Support for compressed source extent will be added later.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/ordered-data.c | 36 ++++++++++++++++++++++++++++++++----
fs/btrfs/ordered-data.h | 13 +++++++++++++
2 files changed, 45 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 8c27292..830c0bd 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -25,6 +25,7 @@
#include "btrfs_inode.h"
#include "extent_io.h"
#include "disk-io.h"
+#include "dedup.h"
static struct kmem_cache *btrfs_ordered_extent_cache;
@@ -183,12 +184,14 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
*/
static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len,
- int type, int dio, int compress_type)
+ int type, int dio, int compress_type,
+ struct btrfs_dedup_hash *hash)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry;
+ struct btrfs_dedup_info *dedup_info;
tree = &BTRFS_I(inode)->ordered_tree;
entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
@@ -203,6 +206,23 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->inode = igrab(inode);
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
+ entry->hash = NULL;
+ dedup_info = btrfs_dedup_get_info(root->fs_info);
+ if (hash && dedup_info) {
+ entry->hash = btrfs_dedup_alloc_hash(dedup_info->hash_type);
+ if (!entry->hash) {
+ kmem_cache_free(btrfs_ordered_extent_cache, entry);
+ btrfs_dedup_put_info(dedup_info);
+ return -ENOMEM;
+ }
+ /* Hash contains locks, only copy what we need */
+ entry->hash->bytenr = hash->bytenr;
+ entry->hash->num_bytes = hash->num_bytes;
+ memcpy(entry->hash->hash, hash->hash,
+ btrfs_dedup_sizes[dedup_info->hash_type]);
+ }
+ btrfs_dedup_put_info(dedup_info);
+
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
@@ -249,15 +269,23 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
{
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
disk_len, type, 0,
- BTRFS_COMPRESS_NONE);
+ BTRFS_COMPRESS_NONE, NULL);
}
+int btrfs_add_ordered_extent_dedup(struct inode *inode, u64 file_offset,
+ u64 start, u64 len, u64 disk_len, int type,
+ struct btrfs_dedup_hash *hash)
+{
+ return __btrfs_add_ordered_extent(inode, file_offset, start, len,
+ disk_len, type, 0,
+ BTRFS_COMPRESS_NONE, hash);
+}
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type)
{
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
disk_len, type, 1,
- BTRFS_COMPRESS_NONE);
+ BTRFS_COMPRESS_NONE, NULL);
}
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
@@ -266,7 +294,7 @@ int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
{
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
disk_len, type, 0,
- compress_type);
+ compress_type, NULL);
}
/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 23c9605..58519ce 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -139,6 +139,16 @@ struct btrfs_ordered_extent {
struct completion completion;
struct btrfs_work flush_work;
struct list_head work_list;
+
+ /*
+ * For inband deduplication
+ * If hash is NULL, no deduplication.
+ * If hash->bytenr is zero, means this is a dedup miss, hash will
+ * be added into dedup tree.
+ * If hash->bytenr is non-zero, this is a dedup hit. Extent ref is
+ * *ALREADY* increased.
+ */
+ struct btrfs_dedup_hash *hash;
};
/*
@@ -172,6 +182,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
int uptodate);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
+int btrfs_add_ordered_extent_dedup(struct inode *inode, u64 file_offset,
+ u64 start, u64 len, u64 disk_len, int type,
+ struct btrfs_dedup_hash *hash);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 09/19] btrfs: dedup: Inband in-memory only de-duplication implement
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (7 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 08/19] btrfs: ordered-extent: Add support for dedup Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 10/19] btrfs: dedup: Add basic tree structure for on-disk dedup method Qu Wenruo
` (9 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
Core implement for inband de-duplication.
It reuse the async_cow_start() facility to do the calculate dedup hash.
And use dedup hash to do inband de-duplication at extent level.
The work flow is as below:
1) Run delalloc range for an inode
2) Calculate hash for the delalloc range at the unit of dedup_bs
3) For hash match(duplicated) case, just increase source extent ref
and insert file extent.
For hash mismatch case, go through the normal cow_file_range()
fallback, and add hash into dedup_tree.
Compress for hash miss case is not supported yet.
Current implement restore all dedup hash in memory rb-tree, with LRU
behavior to control the limit.
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
fs/btrfs/extent-tree.c | 24 +++++++
fs/btrfs/inode.c | 174 ++++++++++++++++++++++++++++++++++++++++++-------
2 files changed, 174 insertions(+), 24 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e2287c7..f9fc25c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -37,6 +37,7 @@
#include "math.h"
#include "sysfs.h"
#include "qgroup.h"
+#include "dedup.h"
#undef SCRAMBLE_DELAYED_REFS
@@ -2399,6 +2400,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
if (btrfs_delayed_ref_is_head(node)) {
struct btrfs_delayed_ref_head *head;
+ struct btrfs_dedup_info *dedup_info;
/*
* we've hit the end of the chain and we were supposed
* to insert this extent into the tree. But, it got
@@ -2409,15 +2411,27 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
head = btrfs_delayed_node_to_head(node);
trace_run_delayed_ref_head(node, head, node->action);
+ dedup_info = btrfs_dedup_get_info(root->fs_info);
if (insert_reserved) {
btrfs_pin_extent(root, node->bytenr,
node->num_bytes, 1);
if (head->is_data) {
+ /*
+ * If insert_reserved is given, it means
+ * a new extent is revered, then deleted
+ * in one tran, and inc/dec get merged to 0.
+ *
+ * In this case, we need to remove its dedup
+ * hash.
+ */
+ btrfs_dedup_del(trans, dedup_info,
+ node->bytenr);
ret = btrfs_del_csums(trans, root,
node->bytenr,
node->num_bytes);
}
}
+ btrfs_dedup_put_info(dedup_info);
/* Also free its reserved qgroup space */
btrfs_qgroup_free_delayed_ref(root->fs_info,
@@ -6707,6 +6721,16 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
if (is_data) {
+ struct btrfs_dedup_info *dedup_info;
+
+ dedup_info = btrfs_dedup_get_info(info);
+ ret = btrfs_dedup_del(trans, dedup_info, bytenr);
+ btrfs_dedup_put_info(dedup_info);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, extent_root,
+ ret);
+ goto out;
+ }
ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
if (ret) {
btrfs_abort_transaction(trans, extent_root, ret);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e456545..1e27a71 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -60,6 +60,7 @@
#include "hash.h"
#include "props.h"
#include "qgroup.h"
+#include "dedup.h"
struct btrfs_iget_args {
struct btrfs_key *location;
@@ -106,7 +107,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
- unsigned long *nr_written, int unlock);
+ unsigned long *nr_written, int unlock,
+ struct btrfs_dedup_hash *hash);
static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
u64 len, u64 orig_start,
u64 block_start, u64 block_len,
@@ -335,6 +337,7 @@ struct async_extent {
struct page **pages;
unsigned long nr_pages;
int compress_type;
+ struct btrfs_dedup_hash *hash;
struct list_head list;
};
@@ -353,7 +356,8 @@ static noinline int add_async_extent(struct async_cow *cow,
u64 compressed_size,
struct page **pages,
unsigned long nr_pages,
- int compress_type)
+ int compress_type,
+ struct btrfs_dedup_hash *hash)
{
struct async_extent *async_extent;
@@ -365,6 +369,7 @@ static noinline int add_async_extent(struct async_cow *cow,
async_extent->pages = pages;
async_extent->nr_pages = nr_pages;
async_extent->compress_type = compress_type;
+ async_extent->hash = hash;
list_add_tail(&async_extent->list, &cow->extents);
return 0;
}
@@ -616,7 +621,7 @@ cont:
*/
add_async_extent(async_cow, start, num_bytes,
total_compressed, pages, nr_pages_ret,
- compress_type);
+ compress_type, NULL);
if (start + num_bytes < end) {
start += num_bytes;
@@ -641,7 +646,7 @@ cleanup_and_bail_uncompressed:
if (redirty)
extent_range_redirty_for_io(inode, start, end);
add_async_extent(async_cow, start, end - start + 1,
- 0, NULL, 0, BTRFS_COMPRESS_NONE);
+ 0, NULL, 0, BTRFS_COMPRESS_NONE, NULL);
*num_added += 1;
}
@@ -712,7 +717,8 @@ retry:
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- &page_started, &nr_written, 0);
+ &page_started, &nr_written, 0,
+ async_extent->hash);
/* JDM XXX */
@@ -925,7 +931,7 @@ static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written,
- int unlock)
+ int unlock, struct btrfs_dedup_hash *hash)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 alloc_hint = 0;
@@ -984,11 +990,16 @@ static noinline int cow_file_range(struct inode *inode,
unsigned long op;
cur_alloc_size = disk_num_bytes;
- ret = btrfs_reserve_extent(root, cur_alloc_size,
+ if (hash && hash->bytenr) {
+ ins.objectid = hash->bytenr;
+ ins.offset = hash->num_bytes;
+ } else {
+ ret = btrfs_reserve_extent(root, cur_alloc_size,
root->sectorsize, 0, alloc_hint,
&ins, 1, 1);
- if (ret < 0)
- goto out_unlock;
+ if (ret < 0)
+ goto out_unlock;
+ }
em = alloc_extent_map();
if (!em) {
@@ -1025,8 +1036,9 @@ static noinline int cow_file_range(struct inode *inode,
goto out_reserve;
cur_alloc_size = ins.offset;
- ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
- ram_size, cur_alloc_size, 0);
+ ret = btrfs_add_ordered_extent_dedup(inode, start,
+ ins.objectid, cur_alloc_size, ins.offset,
+ 0, hash);
if (ret)
goto out_drop_extent_cache;
@@ -1076,6 +1088,67 @@ out_unlock:
goto out;
}
+static int hash_file_ranges(struct inode *inode, u64 start, u64 end,
+ struct async_cow *async_cow, int *num_added)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_dedup_info *dedup_info;
+ struct page *locked_page = async_cow->locked_page;
+ unsigned long nr_pages;
+ u64 actual_end;
+ u64 isize = i_size_read(inode);
+ u64 dedup_bs;
+ u64 cur_offset = start;
+ int ret = 0;
+
+ actual_end = min_t(u64, isize, end + 1);
+ dedup_info = btrfs_dedup_get_info(root->fs_info);
+ if (dedup_info)
+ dedup_bs = dedup_info->blocksize;
+ else
+ dedup_bs = SZ_128M;
+
+ nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
+ nr_pages = min_t(unsigned long, nr_pages, dedup_bs / PAGE_CACHE_SIZE);
+
+ while (cur_offset < end) {
+ struct btrfs_dedup_hash *hash = NULL;
+ u64 len;
+
+ len = min(end + 1 - cur_offset, dedup_bs);
+ if (len < dedup_bs)
+ goto next;
+
+ hash = btrfs_dedup_alloc_hash(dedup_info->hash_type);
+ if (!hash) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = btrfs_dedup_calc_hash(dedup_info, inode, cur_offset, hash);
+ if (ret < 0)
+ goto out;
+
+ ret = btrfs_dedup_search(dedup_info, inode, cur_offset, hash);
+ if (ret < 0)
+ goto out;
+ ret = 0;
+
+next:
+ /* Redirty the locked page if it corresponds to our extent */
+ if (page_offset(locked_page) >= start &&
+ page_offset(locked_page) <= end)
+ __set_page_dirty_nobuffers(locked_page);
+
+ add_async_extent(async_cow, cur_offset, len, 0, NULL, 0,
+ BTRFS_COMPRESS_NONE, hash);
+ cur_offset += len;
+ (*num_added)++;
+ }
+out:
+ btrfs_dedup_put_info(dedup_info);
+ return ret;
+}
+
/*
* work queue call back to started compression on a file and pages
*/
@@ -1083,11 +1156,18 @@ static noinline void async_cow_start(struct btrfs_work *work)
{
struct async_cow *async_cow;
int num_added = 0;
+ int ret = 0;
async_cow = container_of(work, struct async_cow, work);
- compress_file_range(async_cow->inode, async_cow->locked_page,
- async_cow->start, async_cow->end, async_cow,
- &num_added);
+ if (inode_need_compress(async_cow->inode))
+ compress_file_range(async_cow->inode, async_cow->locked_page,
+ async_cow->start, async_cow->end, async_cow,
+ &num_added);
+ else
+ ret = hash_file_ranges(async_cow->inode, async_cow->start,
+ async_cow->end, async_cow, &num_added);
+ WARN_ON(ret);
+
if (num_added == 0) {
btrfs_add_delayed_iput(async_cow->inode);
async_cow->inode = NULL;
@@ -1134,6 +1214,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written)
{
+ struct btrfs_dedup_info *dedup_info;
struct async_cow *async_cow;
struct btrfs_root *root = BTRFS_I(inode)->root;
unsigned long nr_pages;
@@ -1150,11 +1231,17 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
async_cow->locked_page = locked_page;
async_cow->start = start;
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
+ dedup_info = btrfs_dedup_get_info(root->fs_info);
+ if (dedup_info) {
+ u64 len = max_t(u64, SZ_512K, dedup_info->blocksize);
+
+ cur_end = min(end, start + len - 1);
+ } else if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
!btrfs_test_opt(root, FORCE_COMPRESS))
cur_end = end;
else
cur_end = min(end, start + SZ_512K - 1);
+ btrfs_dedup_put_info(dedup_info);
async_cow->end = cur_end;
INIT_LIST_HEAD(&async_cow->extents);
@@ -1407,7 +1494,7 @@ out_check:
if (cow_start != (u64)-1) {
ret = cow_file_range(inode, locked_page,
cow_start, found_key.offset - 1,
- page_started, nr_written, 1);
+ page_started, nr_written, 1, NULL);
if (ret) {
if (!nolock && nocow)
btrfs_end_write_no_snapshoting(root);
@@ -1486,7 +1573,7 @@ out_check:
if (cow_start != (u64)-1) {
ret = cow_file_range(inode, locked_page, cow_start, end,
- page_started, nr_written, 1);
+ page_started, nr_written, 1, NULL);
if (ret)
goto error;
}
@@ -1537,22 +1624,26 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
{
int ret;
int force_cow = need_force_cow(inode, start, end);
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_dedup_info *dedup_info;
+ dedup_info = btrfs_dedup_get_info(root->fs_info);
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 1, nr_written);
} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
- } else if (!inode_need_compress(inode)) {
+ } else if (!inode_need_compress(inode) && !dedup_info) {
ret = cow_file_range(inode, locked_page, start, end,
- page_started, nr_written, 1);
+ page_started, nr_written, 1, NULL);
} else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags);
ret = cow_file_range_async(inode, locked_page, start, end,
page_started, nr_written);
}
+ btrfs_dedup_put_info(dedup_info);
return ret;
}
@@ -2075,9 +2166,11 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 disk_bytenr, u64 disk_num_bytes,
u64 num_bytes, u64 ram_bytes,
u8 compression, u8 encryption,
- u16 other_encoding, int extent_type)
+ u16 other_encoding, int extent_type,
+ struct btrfs_dedup_hash *hash)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_dedup_info *dedup_info;
struct btrfs_file_extent_item *fi;
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -2137,10 +2230,39 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
ins.objectid = disk_bytenr;
ins.offset = disk_num_bytes;
ins.type = BTRFS_EXTENT_ITEM_KEY;
- ret = btrfs_alloc_reserved_file_extent(trans, root,
+
+ /*
+ * Only for no-dedup or hash miss case, we need to increase
+ * extent reference
+ * For hash hit case, reference is already increased
+ */
+ if (!hash || hash->bytenr == 0)
+ ret = btrfs_alloc_reserved_file_extent(trans, root,
root->root_key.objectid,
btrfs_ino(inode), file_pos,
ram_bytes, &ins);
+ if (ret < 0)
+ goto out_qgroup;
+
+ dedup_info = btrfs_dedup_get_info(root->fs_info);
+ /*
+ * Hash hit won't create a new file extent, so its reserved quota
+ * space won't be freed by new delayed_ref_head.
+ * Need to free it here.
+ */
+ if (hash && hash->bytenr)
+ btrfs_qgroup_free_data(inode, file_pos, ram_bytes);
+
+ /* Add missed hash into dedup tree */
+ if (hash && hash->bytenr == 0) {
+ hash->bytenr = ins.objectid;
+ hash->num_bytes = ins.offset;
+ ret = btrfs_dedup_add(trans, dedup_info, hash);
+ }
+ btrfs_dedup_put_info(dedup_info);
+
+out_qgroup:
+
/*
* Release the reserved range from inode dirty range map, as it is
* already moved into delayed_ref_head
@@ -2924,7 +3046,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ordered_extent->disk_len,
logical_len, logical_len,
compress_type, 0, 0,
- BTRFS_FILE_EXTENT_REG);
+ BTRFS_FILE_EXTENT_REG,
+ ordered_extent->hash);
if (!ret)
btrfs_release_delalloc_bytes(root,
ordered_extent->start,
@@ -2953,6 +3076,9 @@ out_unlock:
ordered_extent->file_offset +
ordered_extent->len - 1, &cached_state, GFP_NOFS);
out:
+ /* free dedup hash */
+ kfree(ordered_extent->hash);
+
if (root != root->fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, ordered_extent->len);
if (trans)
@@ -2984,7 +3110,6 @@ out:
ordered_extent->disk_len, 1);
}
-
/*
* This needs to be done to make sure anybody waiting knows we are done
* updating everything for this ordered extent.
@@ -9805,7 +9930,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
cur_offset, ins.objectid,
ins.offset, ins.offset,
ins.offset, 0, 0, 0,
- BTRFS_FILE_EXTENT_PREALLOC);
+ BTRFS_FILE_EXTENT_PREALLOC,
+ NULL);
if (ret) {
btrfs_free_reserved_extent(root, ins.objectid,
ins.offset, 0);
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 10/19] btrfs: dedup: Add basic tree structure for on-disk dedup method
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (8 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 09/19] btrfs: dedup: Inband in-memory only de-duplication implement Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 11/19] btrfs: dedup: Introduce interfaces to resume and cleanup dedup info Qu Wenruo
` (8 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Liu Bo, Wang Xiaoguang
Introduce a new tree, dedup tree to record on-disk dedup hash.
As a persist hash storage instead of in-memeory only implement.
Unlike Liu Bo's implement, in this version we won't do hack for
bytenr -> hash search, but add a new type, DEDUP_BYTENR_ITEM for such
search case, just like in-memory backend.
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
fs/btrfs/ctree.h | 67 +++++++++++++++++++++++++++++++++++++++++++-
fs/btrfs/dedup.h | 5 ++++
fs/btrfs/disk-io.c | 1 +
include/trace/events/btrfs.h | 3 +-
4 files changed, 74 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 034216e..8c44093 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -100,6 +100,9 @@ struct btrfs_ordered_sum;
/* tracks free space in block groups. */
#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+/* on-disk dedup tree (EXPERIMENTAL) */
+#define BTRFS_DEDUP_TREE_OBJECTID 11ULL
+
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
@@ -505,6 +508,7 @@ struct btrfs_super_block {
* ones specified below then we will fail to mount
*/
#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0)
+#define BTRFS_FEATURE_COMPAT_RO_DEDUP (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
@@ -534,7 +538,8 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP \
- (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)
+ (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \
+ BTRFS_FEATURE_COMPAT_RO_DEDUP)
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
@@ -964,6 +969,46 @@ struct btrfs_csum_item {
u8 csum;
} __attribute__ ((__packed__));
+/*
+ * Objectid: 0
+ * Type: BTRFS_DEDUP_STATUS_ITEM_KEY
+ * Offset: 0
+ */
+struct btrfs_dedup_status_item {
+ __le64 blocksize;
+ __le64 limit_nr;
+ __le16 hash_type;
+ __le16 backend;
+} __attribute__ ((__packed__));
+
+/*
+ * Objectid: Last 64 bit of the hash
+ * Type: BTRFS_DEDUP_HASH_ITEM_KEY
+ * Offset: Bytenr of the hash
+ *
+ * Used for hash <-> bytenr search
+ * XXX: On-disk format not stable yet, see the unsed one
+ */
+struct btrfs_dedup_hash_item {
+ /* on disk length of dedup range */
+ __le64 len;
+
+ /* Spare space */
+ u8 __unused[16];
+
+ /* Hash follows */
+} __attribute__ ((__packed__));
+
+/*
+ * Objectid: bytenr
+ * Type: BTRFS_DEDUP_BYTENR_ITEM_KEY
+ * offset: Last 64 bit of the hash
+ *
+ * Used for bytenr <-> hash search (for free_extent)
+ * all its content is hash.
+ * So no special item struct is needed.
+ */
+
struct btrfs_dev_stats_item {
/*
* grow this item struct at the end for future enhancements and keep
@@ -2167,6 +2212,13 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_CHUNK_ITEM_KEY 228
/*
+ * Dedup item and status
+ */
+#define BTRFS_DEDUP_STATUS_ITEM_KEY 230
+#define BTRFS_DEDUP_HASH_ITEM_KEY 231
+#define BTRFS_DEDUP_BYTENR_ITEM_KEY 232
+
+/*
* Records the overall state of the qgroups.
* There's only one instance of this key present,
* (0, BTRFS_QGROUP_STATUS_KEY, 0)
@@ -3229,6 +3281,19 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
return offsetof(struct btrfs_leaf, items);
}
+/* btrfs_dedup_status */
+BTRFS_SETGET_FUNCS(dedup_status_blocksize, struct btrfs_dedup_status_item,
+ blocksize, 64);
+BTRFS_SETGET_FUNCS(dedup_status_limit, struct btrfs_dedup_status_item,
+ limit_nr, 64);
+BTRFS_SETGET_FUNCS(dedup_status_hash_type, struct btrfs_dedup_status_item,
+ hash_type, 16);
+BTRFS_SETGET_FUNCS(dedup_status_backend, struct btrfs_dedup_status_item,
+ backend, 16);
+
+/* btrfs_dedup_hash_item */
+BTRFS_SETGET_FUNCS(dedup_hash_len, struct btrfs_dedup_hash_item, len, 64);
+
/* struct btrfs_file_extent_item */
BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr,
diff --git a/fs/btrfs/dedup.h b/fs/btrfs/dedup.h
index 8fed1ce..834d66a 100644
--- a/fs/btrfs/dedup.h
+++ b/fs/btrfs/dedup.h
@@ -58,6 +58,8 @@ struct btrfs_dedup_hash {
u8 hash[];
};
+struct btrfs_root;
+
struct btrfs_dedup_info {
/* dedup blocksize */
u64 blocksize;
@@ -77,6 +79,9 @@ struct btrfs_dedup_info {
struct list_head lru_list;
u64 limit_nr;
u64 current_nr;
+
+ /* for persist data like dedup-hash and dedup status */
+ struct btrfs_root *dedup_root;
};
struct btrfs_trans_handle;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 84825e5..7893851 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -183,6 +183,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
{ .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
+ { .id = BTRFS_DEDUP_TREE_OBJECTID, .name_stem = "dedup" },
{ .id = 0, .name_stem = "tree" },
};
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index d866f21..44d5e0f 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -47,12 +47,13 @@ struct btrfs_qgroup_operation;
{ BTRFS_TREE_RELOC_OBJECTID, "TREE_RELOC" }, \
{ BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" }, \
{ BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" }, \
+ { BTRFS_DEDUP_TREE_OBJECTID, "DEDUP_TREE" }, \
{ BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" })
#define show_root_type(obj) \
obj, ((obj >= BTRFS_DATA_RELOC_TREE_OBJECTID) || \
(obj >= BTRFS_ROOT_TREE_OBJECTID && \
- obj <= BTRFS_QUOTA_TREE_OBJECTID)) ? __show_root_type(obj) : "-"
+ obj <= BTRFS_DEDUP_TREE_OBJECTID)) ? __show_root_type(obj) : "-"
#define BTRFS_GROUP_FLAGS \
{ BTRFS_BLOCK_GROUP_DATA, "DATA"}, \
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 11/19] btrfs: dedup: Introduce interfaces to resume and cleanup dedup info
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (9 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 10/19] btrfs: dedup: Add basic tree structure for on-disk dedup method Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 12/19] btrfs: dedup: Add support for on-disk hash search Qu Wenruo
` (7 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
Since we will introduce a new on-disk based dedup method, introduce new
interfaces to resume previous dedup setup.
And since we introduce a new tree for status, also add disable handler
for it.
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
fs/btrfs/dedup.c | 277 ++++++++++++++++++++++++++++++++++++++++++++++++-----
fs/btrfs/dedup.h | 13 +++
fs/btrfs/disk-io.c | 21 +++-
fs/btrfs/disk-io.h | 1 +
4 files changed, 287 insertions(+), 25 deletions(-)
diff --git a/fs/btrfs/dedup.c b/fs/btrfs/dedup.c
index f6b518a..4dd07b7 100644
--- a/fs/btrfs/dedup.c
+++ b/fs/btrfs/dedup.c
@@ -21,6 +21,8 @@
#include "transaction.h"
#include "delayed-ref.h"
#include "qgroup.h"
+#include "disk-io.h"
+#include "locking.h"
struct inmem_hash {
struct rb_node hash_node;
@@ -41,10 +43,105 @@ static inline struct inmem_hash *inmem_alloc_hash(u16 type)
GFP_NOFS);
}
+static int init_dedup_info(struct btrfs_dedup_info **ret_info, u16 type,
+ u16 backend, u64 blocksize, u64 limit)
+{
+ struct btrfs_dedup_info *dedup_info;
+
+ dedup_info = kzalloc(sizeof(*dedup_info), GFP_NOFS);
+ if (!dedup_info)
+ return -ENOMEM;
+
+ dedup_info->hash_type = type;
+ dedup_info->backend = backend;
+ dedup_info->blocksize = blocksize;
+ dedup_info->limit_nr = limit;
+
+ /* only support SHA256 yet */
+ dedup_info->dedup_driver = crypto_alloc_shash("sha256", 0, 0);
+ if (IS_ERR(dedup_info->dedup_driver)) {
+ int ret;
+
+ ret = PTR_ERR(dedup_info->dedup_driver);
+ kfree(dedup_info);
+ return ret;
+ }
+
+ dedup_info->hash_root = RB_ROOT;
+ dedup_info->bytenr_root = RB_ROOT;
+ dedup_info->current_nr = 0;
+ INIT_LIST_HEAD(&dedup_info->lru_list);
+ mutex_init(&dedup_info->lock);
+ init_waitqueue_head(&dedup_info->refs_wq);
+ atomic_set(&dedup_info->refs, 0);
+
+ *ret_info = dedup_info;
+ return 0;
+}
+
+static int init_dedup_tree(struct btrfs_fs_info *fs_info,
+ struct btrfs_dedup_info *dedup_info)
+{
+ struct btrfs_root *dedup_root;
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ struct btrfs_dedup_status_item *status;
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(fs_info->tree_root, 2);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+ dedup_root = btrfs_create_tree(trans, fs_info,
+ BTRFS_DEDUP_TREE_OBJECTID);
+ if (IS_ERR(dedup_root)) {
+ ret = PTR_ERR(dedup_root);
+ btrfs_abort_transaction(trans, fs_info->tree_root, ret);
+ goto out;
+ }
+ dedup_info->dedup_root = dedup_root;
+
+ key.objectid = 0;
+ key.type = BTRFS_DEDUP_STATUS_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_insert_empty_item(trans, dedup_root, path, &key,
+ sizeof(*status));
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, fs_info->tree_root, ret);
+ goto out;
+ }
+
+ status = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_dedup_status_item);
+ btrfs_set_dedup_status_blocksize(path->nodes[0], status,
+ dedup_info->blocksize);
+ btrfs_set_dedup_status_limit(path->nodes[0], status,
+ dedup_info->limit_nr);
+ btrfs_set_dedup_status_hash_type(path->nodes[0], status,
+ dedup_info->hash_type);
+ btrfs_set_dedup_status_backend(path->nodes[0], status,
+ dedup_info->backend);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+out:
+ btrfs_free_path(path);
+ if (ret == 0)
+ btrfs_commit_transaction(trans, fs_info->tree_root);
+ return ret;
+}
+
int btrfs_dedup_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend,
u64 blocksize, u64 limit_nr)
{
struct btrfs_dedup_info *dedup_info;
+ int create_tree;
+ u64 compat_ro_flag = btrfs_super_compat_ro_flags(fs_info->super_copy);
u64 limit = limit_nr;
int ret = 0;
@@ -63,10 +160,17 @@ int btrfs_dedup_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend,
limit = 4096; /* default value */
if (backend == BTRFS_DEDUP_BACKEND_ONDISK && limit_nr != 0)
limit = 0;
+ /* Ondisk backend needs DEDUP RO compat feature */
+ if (!(compat_ro_flag & BTRFS_FEATURE_COMPAT_RO_DEDUP) &&
+ backend == BTRFS_DEDUP_BACKEND_ONDISK)
+ return -EOPNOTSUPP;
+
+ /* Meaningless and unable to enable dedup for RO fs */
+ if (fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
dedup_info = btrfs_dedup_get_info(fs_info);
if (dedup_info) {
-
/* Check if we are re-enable for different dedup config */
if (dedup_info->blocksize != blocksize ||
dedup_info->hash_type != type ||
@@ -83,42 +187,107 @@ int btrfs_dedup_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend,
btrfs_dedup_put_info(dedup_info);
return 0;
}
-
+ dedup_info = NULL;
enable:
- dedup_info = kzalloc(sizeof(*dedup_info), GFP_NOFS);
- if (dedup_info)
+ create_tree = compat_ro_flag & BTRFS_FEATURE_COMPAT_RO_DEDUP;
+
+ ret = init_dedup_info(&dedup_info, type, backend, blocksize, limit);
+ if (ret < 0)
+ return ret;
+ if (create_tree) {
+ ret = init_dedup_tree(fs_info, dedup_info);
+ if (ret < 0)
+ goto out;
+ }
+
+ spin_lock(&fs_info->dedup_ref_lock);
+ fs_info->dedup_info = dedup_info;
+ spin_unlock(&fs_info->dedup_ref_lock);
+out:
+ if (ret < 0) {
+ crypto_free_shash(dedup_info->dedup_driver);
+ kfree(dedup_info);
+ }
+ return ret;
+}
+
+int btrfs_dedup_resume(struct btrfs_fs_info *fs_info,
+ struct btrfs_root *dedup_root)
+{
+ struct btrfs_dedup_info *dedup_info;
+ struct btrfs_dedup_status_item *status;
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ u64 blocksize;
+ u64 limit;
+ u16 type;
+ u16 backend;
+ int ret = 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
return -ENOMEM;
- dedup_info->hash_type = type;
- dedup_info->backend = backend;
- dedup_info->blocksize = blocksize;
- dedup_info->limit_nr = limit;
+ key.objectid = 0;
+ key.type = BTRFS_DEDUP_STATUS_ITEM_KEY;
+ key.offset = 0;
- /* Only support SHA256 yet */
- dedup_info->dedup_driver = crypto_alloc_shash("sha256", 0, 0);
- if (IS_ERR(dedup_info->dedup_driver)) {
- btrfs_err(fs_info, "failed to init sha256 driver");
- ret = PTR_ERR(dedup_info->dedup_driver);
+ ret = btrfs_search_slot(NULL, dedup_root, &key, path, 0, 0);
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ } else if (ret < 0) {
goto out;
}
- dedup_info->hash_root = RB_ROOT;
- dedup_info->bytenr_root = RB_ROOT;
- dedup_info->current_nr = 0;
- INIT_LIST_HEAD(&dedup_info->lru_list);
- mutex_init(&dedup_info->lock);
- init_waitqueue_head(&dedup_info->refs_wq);
- atomic_set(&dedup_info->refs, 0);
+ status = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_dedup_status_item);
+ blocksize = btrfs_dedup_status_blocksize(path->nodes[0], status);
+ limit = btrfs_dedup_status_limit(path->nodes[0], status);
+ type = btrfs_dedup_status_hash_type(path->nodes[0], status);
+ backend = btrfs_dedup_status_backend(path->nodes[0], status);
+
+ ret = init_dedup_info(&dedup_info, type, backend, blocksize, limit);
+ if (ret < 0)
+ goto out;
+ dedup_info->dedup_root = dedup_root;
spin_lock(&fs_info->dedup_ref_lock);
fs_info->dedup_info = dedup_info;
spin_unlock(&fs_info->dedup_ref_lock);
+
out:
- if (ret < 0)
- kfree(dedup_info);
+ btrfs_free_path(path);
return ret;
}
+static void inmem_destroy(struct btrfs_dedup_info *dedup_info);
+int btrfs_dedup_cleanup(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_dedup_info *dedup_info;
+
+ /* same as disable */
+ spin_lock(&fs_info->dedup_ref_lock);
+ dedup_info = fs_info->dedup_info;
+ fs_info->dedup_info = NULL;
+ spin_unlock(&fs_info->dedup_ref_lock);
+
+ if (!dedup_info)
+ return 0;
+
+ wait_event(dedup_info->refs_wq, atomic_read(&dedup_info->refs) == 0);
+
+ if (dedup_info->backend == BTRFS_DEDUP_BACKEND_INMEMORY)
+ inmem_destroy(dedup_info);
+ if (dedup_info->dedup_root) {
+ free_root_extent_buffers(dedup_info->dedup_root);
+ kfree(dedup_info->dedup_root);
+ }
+ crypto_free_shash(dedup_info->dedup_driver);
+ kfree(dedup_info);
+ return 0;
+}
+
static int inmem_insert_hash(struct rb_root *root,
struct inmem_hash *hash, int hash_len)
{
@@ -318,9 +487,69 @@ static void inmem_destroy(struct btrfs_dedup_info *dedup_info)
mutex_unlock(&dedup_info->lock);
}
+static int remove_dedup_tree(struct btrfs_root *dedup_root)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_fs_info *fs_info = dedup_root->fs_info;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct extent_buffer *node;
+ int ret;
+ int nr;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ trans = btrfs_start_transaction(fs_info->tree_root, 2);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
+ path->leave_spinning = 1;
+ key.objectid = 0;
+ key.offset = 0;
+ key.type = 0;
+
+ while (1) {
+ ret = btrfs_search_slot(trans, dedup_root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+ node = path->nodes[0];
+ nr = btrfs_header_nritems(node);
+ if (nr == 0) {
+ btrfs_release_path(path);
+ break;
+ }
+ path->slots[0] = 0;
+ ret = btrfs_del_items(trans, dedup_root, path, 0, nr);
+ if (ret)
+ goto out;
+ btrfs_release_path(path);
+ }
+
+ ret = btrfs_del_root(trans, fs_info->tree_root, &dedup_root->root_key);
+ if (ret)
+ goto out;
+
+ list_del(&dedup_root->dirty_list);
+ btrfs_tree_lock(dedup_root->node);
+ clean_tree_block(trans, fs_info, dedup_root->node);
+ btrfs_tree_unlock(dedup_root->node);
+ btrfs_free_tree_block(trans, dedup_root, dedup_root->node, 0 , 1);
+ free_extent_buffer(dedup_root->node);
+ free_extent_buffer(dedup_root->commit_root);
+ kfree(dedup_root);
+ ret = btrfs_commit_transaction(trans, fs_info->tree_root);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
int btrfs_dedup_disable(struct btrfs_fs_info *fs_info)
{
struct btrfs_dedup_info *dedup_info;
+ int ret = 0;
/* Here we don't want to increase refs of dedup_info */
spin_lock(&fs_info->dedup_ref_lock);
@@ -339,10 +568,12 @@ int btrfs_dedup_disable(struct btrfs_fs_info *fs_info)
/* now we are OK to clean up everything */
if (dedup_info->backend == BTRFS_DEDUP_BACKEND_INMEMORY)
inmem_destroy(dedup_info);
+ if (dedup_info->dedup_root)
+ ret = remove_dedup_tree(dedup_info->dedup_root);
crypto_free_shash(dedup_info->dedup_driver);
kfree(dedup_info);
- return 0;
+ return ret;
}
/*
diff --git a/fs/btrfs/dedup.h b/fs/btrfs/dedup.h
index 834d66a..cf8eae6 100644
--- a/fs/btrfs/dedup.h
+++ b/fs/btrfs/dedup.h
@@ -113,6 +113,19 @@ int btrfs_dedup_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend,
int btrfs_dedup_disable(struct btrfs_fs_info *fs_info);
/*
+ * Restore previous dedup setup from disk
+ * Called at mount time
+ */
+int btrfs_dedup_resume(struct btrfs_fs_info *fs_info,
+ struct btrfs_root *dedup_root);
+
+/*
+ * Cleanup current btrfs_dedup_info
+ * Called in umount time
+ */
+int btrfs_dedup_cleanup(struct btrfs_fs_info *fs_info);
+
+/*
* Caller need to grab a valid dedup_info by this function,
* not grab it from fs_info directly.
*/
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7893851..c775bec 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -50,6 +50,7 @@
#include "raid56.h"
#include "sysfs.h"
#include "qgroup.h"
+#include "dedup.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
@@ -2155,7 +2156,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->extent_workers);
}
-static void free_root_extent_buffers(struct btrfs_root *root)
+void free_root_extent_buffers(struct btrfs_root *root)
{
if (root) {
free_extent_buffer(root->node);
@@ -2487,7 +2488,21 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
fs_info->free_space_root = root;
}
- return 0;
+ location.objectid = BTRFS_DEDUP_TREE_OBJECTID;
+ root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ if (ret != -ENOENT)
+ return ret;
+ return 0;
+ }
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+ ret = btrfs_dedup_resume(fs_info, root);
+ if (ret < 0) {
+ free_root_extent_buffers(root);
+ kfree(root);
+ }
+ return ret;
}
int open_ctree(struct super_block *sb,
@@ -3875,6 +3890,8 @@ void close_ctree(struct btrfs_root *root)
btrfs_free_qgroup_config(fs_info);
+ btrfs_dedup_cleanup(fs_info);
+
if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
btrfs_info(fs_info, "at unmount delalloc count %lld",
percpu_counter_sum(&fs_info->delalloc_bytes));
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 8e79d00..42c4ff2 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -70,6 +70,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
int btrfs_init_fs_root(struct btrfs_root *root);
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
+void free_root_extent_buffers(struct btrfs_root *root);
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 12/19] btrfs: dedup: Add support for on-disk hash search
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (10 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 11/19] btrfs: dedup: Introduce interfaces to resume and cleanup dedup info Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 13/19] btrfs: dedup: Add support to delete hash for on-disk backend Qu Wenruo
` (6 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
Now on-disk backend should be able to search hash now.
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
fs/btrfs/dedup.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++-------
fs/btrfs/dedup.h | 1 +
2 files changed, 118 insertions(+), 16 deletions(-)
diff --git a/fs/btrfs/dedup.c b/fs/btrfs/dedup.c
index 4dd07b7..c2b6a03 100644
--- a/fs/btrfs/dedup.c
+++ b/fs/btrfs/dedup.c
@@ -576,6 +576,79 @@ int btrfs_dedup_disable(struct btrfs_fs_info *fs_info)
return ret;
}
+ /*
+ * Return 0 for not found
+ * Return >0 for found and set bytenr_ret
+ * Return <0 for error
+ */
+static int ondisk_search_hash(struct btrfs_dedup_info *dedup_info, u8 *hash,
+ u64 *bytenr_ret, u32 *num_bytes_ret)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_root *dedup_root = dedup_info->dedup_root;
+ u8 *buf = NULL;
+ u64 hash_key;
+ int hash_len = btrfs_dedup_sizes[dedup_info->hash_type];
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ buf = kmalloc(hash_len, GFP_NOFS);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(&hash_key, hash + hash_len - 8, 8);
+ key.objectid = hash_key;
+ key.type = BTRFS_DEDUP_HASH_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, dedup_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ WARN_ON(ret == 0);
+ while (1) {
+ struct extent_buffer *node;
+ struct btrfs_dedup_hash_item *hash_item;
+ int slot;
+
+ ret = btrfs_previous_item(dedup_root, path, hash_key,
+ BTRFS_DEDUP_HASH_ITEM_KEY);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+
+ node = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(node, &key, slot);
+
+ if (key.type != BTRFS_DEDUP_HASH_ITEM_KEY ||
+ memcmp(&key.objectid, hash + hash_len - 8, 8))
+ break;
+ hash_item = btrfs_item_ptr(node, slot,
+ struct btrfs_dedup_hash_item);
+ read_extent_buffer(node, buf, (unsigned long)(hash_item + 1),
+ hash_len);
+ if (!memcmp(buf, hash, hash_len)) {
+ ret = 1;
+ *bytenr_ret = key.offset;
+ *num_bytes_ret = btrfs_dedup_hash_len(node, hash_item);
+ break;
+ }
+ }
+out:
+ kfree(buf);
+ btrfs_free_path(path);
+ return ret;
+}
+
/*
* Caller must ensure the corresponding ref head is not being run.
*/
@@ -606,9 +679,36 @@ inmem_search_hash(struct btrfs_dedup_info *dedup_info, u8 *hash)
return NULL;
}
-static int inmem_search(struct btrfs_dedup_info *dedup_info,
- struct inode *inode, u64 file_pos,
- struct btrfs_dedup_hash *hash)
+/* Wapper for different backends, caller needs to hold dedup_info->lock */
+static inline int generic_search_hash(struct btrfs_dedup_info *dedup_info,
+ u8 *hash, u64 *bytenr_ret,
+ u32 *num_bytes_ret)
+{
+ if (dedup_info->backend == BTRFS_DEDUP_BACKEND_INMEMORY) {
+ struct inmem_hash *found_hash;
+ int ret;
+
+ found_hash = inmem_search_hash(dedup_info, hash);
+ if (found_hash) {
+ ret = 1;
+ *bytenr_ret = found_hash->bytenr;
+ *num_bytes_ret = found_hash->num_bytes;
+ } else {
+ ret = 0;
+ *bytenr_ret = 0;
+ *num_bytes_ret = 0;
+ }
+ return ret;
+ } else if (dedup_info->backend == BTRFS_DEDUP_BACKEND_ONDISK) {
+ return ondisk_search_hash(dedup_info, hash, bytenr_ret,
+ num_bytes_ret);
+ }
+ return -EINVAL;
+}
+
+static int generic_search(struct btrfs_dedup_info *dedup_info,
+ struct inode *inode, u64 file_pos,
+ struct btrfs_dedup_hash *hash)
{
int ret;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -618,9 +718,9 @@ static int inmem_search(struct btrfs_dedup_info *dedup_info,
struct btrfs_delayed_ref_head *insert_head;
struct btrfs_delayed_data_ref *insert_dref;
struct btrfs_qgroup_extent_record *insert_qrecord = NULL;
- struct inmem_hash *found_hash;
int free_insert = 1;
u64 bytenr;
+ u64 tmp_bytenr;
u32 num_bytes;
insert_head = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
@@ -652,14 +752,9 @@ static int inmem_search(struct btrfs_dedup_info *dedup_info,
again:
mutex_lock(&dedup_info->lock);
- found_hash = inmem_search_hash(dedup_info, hash->hash);
- /* If we don't find a duplicated extent, just return. */
- if (!found_hash) {
- ret = 0;
+ ret = generic_search_hash(dedup_info, hash->hash, &bytenr, &num_bytes);
+ if (ret <= 0)
goto out;
- }
- bytenr = found_hash->bytenr;
- num_bytes = found_hash->num_bytes;
delayed_refs = &trans->transaction->delayed_refs;
@@ -698,12 +793,17 @@ again:
mutex_lock(&dedup_info->lock);
/* Search again to ensure the hash is still here */
- found_hash = inmem_search_hash(dedup_info, hash->hash);
- if (!found_hash) {
- ret = 0;
+ ret = generic_search_hash(dedup_info, hash->hash, &tmp_bytenr,
+ &num_bytes);
+ if (ret <= 0) {
mutex_unlock(&head->mutex);
goto out;
}
+ if (tmp_bytenr != bytenr) {
+ mutex_unlock(&head->mutex);
+ mutex_unlock(&dedup_info->lock);
+ goto again;
+ }
hash->bytenr = bytenr;
hash->num_bytes = num_bytes;
@@ -737,8 +837,9 @@ int btrfs_dedup_search(struct btrfs_dedup_info *dedup_info,
if (WARN_ON(!dedup_info || !hash))
return 0;
- if (dedup_info->backend == BTRFS_DEDUP_BACKEND_INMEMORY)
- ret = inmem_search(dedup_info, inode, file_pos, hash);
+ if (dedup_info->backend == BTRFS_DEDUP_BACKEND_INMEMORY ||
+ dedup_info->backend == BTRFS_DEDUP_BACKEND_ONDISK)
+ ret = generic_search(dedup_info, inode, file_pos, hash);
/* It's possible hash->bytenr/num_bytenr already changed */
if (ret == 0) {
diff --git a/fs/btrfs/dedup.h b/fs/btrfs/dedup.h
index cf8eae6..1661fe6 100644
--- a/fs/btrfs/dedup.h
+++ b/fs/btrfs/dedup.h
@@ -172,6 +172,7 @@ int btrfs_dedup_calc_hash(struct btrfs_dedup_info *dedup_info,
* *INCREASED*, and hash->bytenr/num_bytes will record the existing
* extent data.
* Return 0 for a hash miss. Nothing is done
+ * Return < 0 for error
*/
int btrfs_dedup_search(struct btrfs_dedup_info *dedup_info,
struct inode *inode, u64 file_pos,
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 13/19] btrfs: dedup: Add support to delete hash for on-disk backend
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (11 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 12/19] btrfs: dedup: Add support for on-disk hash search Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 14/19] btrfs: dedup: Add support for adding " Qu Wenruo
` (5 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
Now on-disk backend can delete hash now.
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
fs/btrfs/dedup.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 100 insertions(+)
diff --git a/fs/btrfs/dedup.c b/fs/btrfs/dedup.c
index c2b6a03..309cbc3 100644
--- a/fs/btrfs/dedup.c
+++ b/fs/btrfs/dedup.c
@@ -465,6 +465,104 @@ static int inmem_del(struct btrfs_dedup_info *dedup_info, u64 bytenr)
return 0;
}
+/*
+ * If prepare_del is given, this will setup search_slot() for delete.
+ * Caller needs to do proper locking.
+ *
+ * Return > 0 for found.
+ * Return 0 for not found.
+ * Return < 0 for error.
+ */
+static int ondisk_search_bytenr(struct btrfs_trans_handle *trans,
+ struct btrfs_dedup_info *dedup_info,
+ struct btrfs_path *path, u64 bytenr,
+ int prepare_del)
+{
+ struct btrfs_key key;
+ struct btrfs_root *dedup_root = dedup_info->dedup_root;
+ int ret;
+ int ins_len = 0;
+ int cow = 0;
+
+ if (prepare_del) {
+ if (WARN_ON(trans == NULL))
+ return -EINVAL;
+ cow = 1;
+ ins_len = -1;
+ }
+
+ key.objectid = bytenr;
+ key.type = BTRFS_DEDUP_BYTENR_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(trans, dedup_root, &key, path,
+ ins_len, cow);
+
+ if (ret < 0)
+ return ret;
+ /*
+ * Although it's almost impossible, it's still possible that
+ * the last 64bits are all 1.
+ */
+ if (ret == 0)
+ return 1;
+
+ ret = btrfs_previous_item(dedup_root, path, bytenr,
+ BTRFS_DEDUP_BYTENR_ITEM_KEY);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return 0;
+ return 1;
+}
+
+static int ondisk_del(struct btrfs_trans_handle *trans,
+ struct btrfs_dedup_info *dedup_info, u64 bytenr)
+{
+ struct btrfs_root *dedup_root = dedup_info->dedup_root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = bytenr;
+ key.type = BTRFS_DEDUP_BYTENR_ITEM_KEY;
+ key.offset = 0;
+
+ mutex_lock(&dedup_info->lock);
+
+ ret = ondisk_search_bytenr(trans, dedup_info, path, bytenr, 1);
+ if (ret <= 0)
+ goto out;
+
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ ret = btrfs_del_item(trans, dedup_root, path);
+ btrfs_release_path(path);
+ if (ret < 0)
+ goto out;
+ /* Search for hash item and delete it */
+ key.objectid = key.offset;
+ key.type = BTRFS_DEDUP_HASH_ITEM_KEY;
+ key.offset = bytenr;
+
+ ret = btrfs_search_slot(trans, dedup_root, &key, path, -1, 1);
+ if (WARN_ON(ret > 0)) {
+ ret = -ENOENT;
+ goto out;
+ }
+ if (ret < 0)
+ goto out;
+ ret = btrfs_del_item(trans, dedup_root, path);
+
+out:
+ btrfs_free_path(path);
+ mutex_unlock(&dedup_info->lock);
+ return ret;
+}
+
/* Remove a dedup hash from dedup tree */
int btrfs_dedup_del(struct btrfs_trans_handle *trans,
struct btrfs_dedup_info *dedup_info, u64 bytenr)
@@ -474,6 +572,8 @@ int btrfs_dedup_del(struct btrfs_trans_handle *trans,
if (dedup_info->backend == BTRFS_DEDUP_BACKEND_INMEMORY)
return inmem_del(dedup_info, bytenr);
+ if (dedup_info->backend == BTRFS_DEDUP_BACKEND_ONDISK)
+ return ondisk_del(trans, dedup_info, bytenr);
return -EINVAL;
}
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 14/19] btrfs: dedup: Add support for adding hash for on-disk backend
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (12 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 13/19] btrfs: dedup: Add support to delete hash for on-disk backend Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 15/19] btrfs: dedup: Add ioctl for inband deduplication Qu Wenruo
` (4 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
Now on-disk backend can add hash now.
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
fs/btrfs/dedup.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 83 insertions(+)
diff --git a/fs/btrfs/dedup.c b/fs/btrfs/dedup.c
index 309cbc3..073726d 100644
--- a/fs/btrfs/dedup.c
+++ b/fs/btrfs/dedup.c
@@ -407,6 +407,87 @@ out:
return 0;
}
+static int ondisk_search_bytenr(struct btrfs_trans_handle *trans,
+ struct btrfs_dedup_info *dedup_info,
+ struct btrfs_path *path, u64 bytenr,
+ int prepare_del);
+static int ondisk_search_hash(struct btrfs_dedup_info *dedup_info, u8 *hash,
+ u64 *bytenr_ret, u32 *num_bytes_ret);
+static int ondisk_add(struct btrfs_trans_handle *trans,
+ struct btrfs_dedup_info *dedup_info,
+ struct btrfs_dedup_hash *hash)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *dedup_root = dedup_info->dedup_root;
+ struct btrfs_key key;
+ struct btrfs_dedup_hash_item *hash_item;
+ u64 bytenr;
+ u32 num_bytes;
+ int hash_len = btrfs_dedup_sizes[dedup_info->hash_type];
+ int ret;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ mutex_lock(&dedup_info->lock);
+
+ ret = ondisk_search_bytenr(NULL, dedup_info, path, hash->bytenr, 0);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ btrfs_release_path(path);
+
+ ret = ondisk_search_hash(dedup_info, hash->hash, &bytenr, &num_bytes);
+ if (ret < 0)
+ goto out;
+ /* Same hash found, don't re-add to save dedup tree space */
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Insert hash->bytenr item */
+ memcpy(&key.objectid, hash->hash + hash_len - 8, 8);
+ key.type = BTRFS_DEDUP_HASH_ITEM_KEY;
+ key.offset = hash->bytenr;
+
+ ret = btrfs_insert_empty_item(trans, dedup_root, path, &key,
+ sizeof(*hash_item) + hash_len);
+ WARN_ON(ret == -EEXIST);
+ if (ret < 0)
+ goto out;
+ hash_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_dedup_hash_item);
+ btrfs_set_dedup_hash_len(path->nodes[0], hash_item, hash->num_bytes);
+ write_extent_buffer(path->nodes[0], hash->hash,
+ (unsigned long)(hash_item + 1), hash_len);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+ btrfs_release_path(path);
+
+ /* Then bytenr->hash item */
+ key.objectid = hash->bytenr;
+ key.type = BTRFS_DEDUP_BYTENR_ITEM_KEY;
+ memcpy(&key.offset, hash->hash + hash_len - 8, 8);
+
+ ret = btrfs_insert_empty_item(trans, dedup_root, path, &key, hash_len);
+ WARN_ON(ret == -EEXIST);
+ if (ret < 0)
+ goto out;
+ write_extent_buffer(path->nodes[0], hash->hash,
+ btrfs_item_ptr_offset(path->nodes[0], path->slots[0]),
+ hash_len);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+
+out:
+ mutex_unlock(&dedup_info->lock);
+ btrfs_free_path(path);
+ return ret;
+}
+
int btrfs_dedup_add(struct btrfs_trans_handle *trans,
struct btrfs_dedup_info *dedup_info,
struct btrfs_dedup_hash *hash)
@@ -423,6 +504,8 @@ int btrfs_dedup_add(struct btrfs_trans_handle *trans,
if (dedup_info->backend == BTRFS_DEDUP_BACKEND_INMEMORY)
return inmem_add(dedup_info, hash);
+ if (dedup_info->backend == BTRFS_DEDUP_BACKEND_ONDISK)
+ return ondisk_add(trans, dedup_info, hash);
return -EINVAL;
}
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 15/19] btrfs: dedup: Add ioctl for inband deduplication
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (13 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 14/19] btrfs: dedup: Add support for adding " Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 16/19] btrfs: dedup: add an inode nodedup flag Qu Wenruo
` (3 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Add ioctl interface for inband deduplication, which includes:
1) enable
2) disable
3) status
We will later add ioctl to disable inband dedup for given file/dir.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/dedup.c | 52 +++++++++++++++++++++++++++++++++++++++++-----
fs/btrfs/dedup.h | 10 ++++++++-
fs/btrfs/ioctl.c | 51 +++++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/sysfs.c | 2 ++
include/uapi/linux/btrfs.h | 24 +++++++++++++++++++++
5 files changed, 133 insertions(+), 6 deletions(-)
diff --git a/fs/btrfs/dedup.c b/fs/btrfs/dedup.c
index 073726d..54d3477 100644
--- a/fs/btrfs/dedup.c
+++ b/fs/btrfs/dedup.c
@@ -137,12 +137,12 @@ out:
}
int btrfs_dedup_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend,
- u64 blocksize, u64 limit_nr)
+ u64 blocksize, u64 limit_nr, u64 limit_mem)
{
struct btrfs_dedup_info *dedup_info;
int create_tree;
u64 compat_ro_flag = btrfs_super_compat_ro_flags(fs_info->super_copy);
- u64 limit = limit_nr;
+ u64 limit;
int ret = 0;
/* Sanity check */
@@ -155,11 +155,22 @@ int btrfs_dedup_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend,
return -EINVAL;
if (backend >= BTRFS_DEDUP_BACKEND_LAST)
return -EINVAL;
+ /* Only one limit is accept */
+ if (limit_nr && limit_mem)
+ return -EINVAL;
- if (backend == BTRFS_DEDUP_BACKEND_INMEMORY && limit_nr == 0)
- limit = 4096; /* default value */
- if (backend == BTRFS_DEDUP_BACKEND_ONDISK && limit_nr != 0)
+ if (backend == BTRFS_DEDUP_BACKEND_INMEMORY) {
+ if (!limit_nr && !limit_mem)
+ limit = BTRFS_DEDUP_LIMIT_NR_DEFAULT;
+ else if (limit_nr)
+ limit = limit_nr;
+ else
+ limit = limit_mem / (sizeof(struct inmem_hash) +
+ btrfs_dedup_sizes[type]);
+ }
+ if (backend == BTRFS_DEDUP_BACKEND_ONDISK)
limit = 0;
+
/* Ondisk backend needs DEDUP RO compat feature */
if (!(compat_ro_flag & BTRFS_FEATURE_COMPAT_RO_DEDUP) &&
backend == BTRFS_DEDUP_BACKEND_ONDISK)
@@ -211,6 +222,37 @@ out:
return ret;
}
+void btrfs_dedup_status(struct btrfs_fs_info *fs_info,
+ struct btrfs_ioctl_dedup_args *dargs)
+{
+ struct btrfs_dedup_info *dedup_info;
+
+ dedup_info = btrfs_dedup_get_info(fs_info);
+ if (!dedup_info) {
+ dargs->status = 0;
+ dargs->blocksize = 0;
+ dargs->backend = 0;
+ dargs->hash_type = 0;
+ dargs->limit_nr = 0;
+ dargs->current_nr = 0;
+ btrfs_dedup_put_info(dedup_info);
+ return;
+ }
+ mutex_lock(&dedup_info->lock);
+ dargs->status = 1;
+ dargs->blocksize = dedup_info->blocksize;
+ dargs->backend = dedup_info->backend;
+ dargs->hash_type = dedup_info->hash_type;
+ dargs->limit_nr = dedup_info->limit_nr;
+ dargs->limit_mem = dedup_info->limit_nr *
+ (sizeof(struct inmem_hash) +
+ btrfs_dedup_sizes[dedup_info->hash_type]);
+ dargs->current_nr = dedup_info->current_nr;
+ mutex_unlock(&dedup_info->lock);
+ btrfs_dedup_put_info(dedup_info);
+ return;
+}
+
int btrfs_dedup_resume(struct btrfs_fs_info *fs_info,
struct btrfs_root *dedup_root)
{
diff --git a/fs/btrfs/dedup.h b/fs/btrfs/dedup.h
index 1661fe6..9db4907 100644
--- a/fs/btrfs/dedup.h
+++ b/fs/btrfs/dedup.h
@@ -104,7 +104,15 @@ static inline struct btrfs_dedup_hash *btrfs_dedup_alloc_hash(u16 type)
* Called at dedup enable time.
*/
int btrfs_dedup_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend,
- u64 blocksize, u64 limit_nr);
+ u64 blocksize, u64 limit_nr, u64 limit_mem);
+
+/*
+ * Get inband dedup info
+ * Since it needs to access different backends' hash size, which
+ * is not exported, we need such simple function.
+ */
+void btrfs_dedup_status(struct btrfs_fs_info *fs_info,
+ struct btrfs_ioctl_dedup_args *dargs);
/*
* Disable dedup and invalidate all its dedup data.
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1568f57..2bd1a97 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -59,6 +59,7 @@
#include "props.h"
#include "sysfs.h"
#include "qgroup.h"
+#include "dedup.h"
#ifdef CONFIG_64BIT
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
@@ -3220,6 +3221,54 @@ out:
return ret;
}
+static long btrfs_ioctl_dedup_ctl(struct btrfs_root *root, void __user *args)
+{
+ struct btrfs_ioctl_dedup_args *dargs;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ dargs = memdup_user(args, sizeof(*dargs));
+ if (IS_ERR(dargs)) {
+ ret = PTR_ERR(dargs);
+ return ret;
+ }
+
+ if (dargs->cmd >= BTRFS_DEDUP_CTL_LAST) {
+ ret = -EINVAL;
+ goto out;
+ }
+ switch (dargs->cmd) {
+ case BTRFS_DEDUP_CTL_ENABLE:
+ mutex_lock(&fs_info->dedup_ioctl_lock);
+ ret = btrfs_dedup_enable(fs_info, dargs->hash_type,
+ dargs->backend, dargs->blocksize,
+ dargs->limit_nr, dargs->limit_mem);
+ mutex_unlock(&fs_info->dedup_ioctl_lock);
+ break;
+ case BTRFS_DEDUP_CTL_DISABLE:
+ mutex_lock(&fs_info->dedup_ioctl_lock);
+ ret = btrfs_dedup_disable(fs_info);
+ mutex_unlock(&fs_info->dedup_ioctl_lock);
+ break;
+ case BTRFS_DEDUP_CTL_STATUS:
+ btrfs_dedup_status(fs_info, dargs);
+ if (copy_to_user(args, dargs, sizeof(*dargs)))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+out:
+ kfree(dargs);
+ return ret;
+}
+
static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
struct inode *inode,
u64 endoff,
@@ -5584,6 +5633,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_set_fslabel(file, argp);
case BTRFS_IOC_FILE_EXTENT_SAME:
return btrfs_ioctl_file_extent_same(file, argp);
+ case BTRFS_IOC_DEDUP_CTL:
+ return btrfs_ioctl_dedup_ctl(root, argp);
case BTRFS_IOC_GET_SUPPORTED_FEATURES:
return btrfs_ioctl_get_supported_features(file, argp);
case BTRFS_IOC_GET_FEATURES:
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 539e7b5..75e5ca85 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -203,6 +203,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
+BTRFS_FEAT_ATTR_COMPAT_RO(dedup, DEDUP);
static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -215,6 +216,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(skinny_metadata),
BTRFS_FEAT_ATTR_PTR(no_holes),
BTRFS_FEAT_ATTR_PTR(free_space_tree),
+ BTRFS_FEAT_ATTR_PTR(dedup),
NULL
};
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index dea8931..666f940 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -445,6 +445,28 @@ struct btrfs_ioctl_get_dev_stats {
__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
};
+/*
+ * de-duplication control modes
+ * For re-config, re-enable will handle it
+ * TODO: Add support to disable per-file/dir dedup operation
+ */
+#define BTRFS_DEDUP_CTL_ENABLE 1
+#define BTRFS_DEDUP_CTL_DISABLE 2
+#define BTRFS_DEDUP_CTL_STATUS 3
+#define BTRFS_DEDUP_CTL_LAST 4
+struct btrfs_ioctl_dedup_args {
+ __u16 cmd; /* In: command(see above macro) */
+ __u64 blocksize; /* In/Out: For enable/status */
+ __u64 limit_nr; /* In/Out: For enable/status */
+ __u64 limit_mem; /* In/Out: For enable/status */
+ __u64 current_nr; /* Out: For status output */
+ __u16 backend; /* In/Out: For enable/status */
+ __u16 hash_type; /* In/Out: For enable/status */
+ u8 status; /* Out: For status output */
+ /* pad to 512 bytes */
+ u8 __unused[473];
+};
+
#define BTRFS_QUOTA_CTL_ENABLE 1
#define BTRFS_QUOTA_CTL_DISABLE 2
#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED 3
@@ -653,6 +675,8 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
struct btrfs_ioctl_dev_replace_args)
#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \
struct btrfs_ioctl_same_args)
+#define BTRFS_IOC_DEDUP_CTL _IOWR(BTRFS_IOCTL_MAGIC, 55, \
+ struct btrfs_ioctl_dedup_args)
#define BTRFS_IOC_GET_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
struct btrfs_ioctl_feature_flags)
#define BTRFS_IOC_SET_FEATURES _IOW(BTRFS_IOCTL_MAGIC, 57, \
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 16/19] btrfs: dedup: add an inode nodedup flag
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (14 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 15/19] btrfs: dedup: Add ioctl for inband deduplication Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 17/19] btrfs: dedup: add a property handler for online dedup Qu Wenruo
` (2 subsequent siblings)
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Introduce BTRFS_INODE_NODEDUP flag, then we can explicitly disable
online data deduplication for specified files.
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/ctree.h | 1 +
fs/btrfs/ioctl.c | 6 +++++-
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8c44093..54e81f3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2401,6 +2401,7 @@ do { \
#define BTRFS_INODE_NOATIME (1 << 9)
#define BTRFS_INODE_DIRSYNC (1 << 10)
#define BTRFS_INODE_COMPRESS (1 << 11)
+#define BTRFS_INODE_NODEDUP (1 << 12)
#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2bd1a97..0dab40c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -159,7 +159,8 @@ void btrfs_update_iflags(struct inode *inode)
/*
* Inherit flags from the parent inode.
*
- * Currently only the compression flags and the cow flags are inherited.
+ * Currently only the compression flags, the dedup flags and the cow
+ * flags are inherited.
*/
void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
{
@@ -184,6 +185,9 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
}
+ if (flags & BTRFS_INODE_NODEDUP)
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NODEDUP;
+
btrfs_update_iflags(inode);
}
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 17/19] btrfs: dedup: add a property handler for online dedup
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (15 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 16/19] btrfs: dedup: add an inode nodedup flag Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 18/19] btrfs: dedup: add per-file online dedup control Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 19/19] btrfs: try more times to alloc metadata reserve space Qu Wenruo
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
We use btrfs extended attribute "btrfs.dedup" to record per-file online
dedup status, so add a dedup property handler.
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/props.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index f9e6023..fb82080 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -41,6 +41,10 @@ static int prop_compression_apply(struct inode *inode,
size_t len);
static const char *prop_compression_extract(struct inode *inode);
+static int prop_dedup_validate(const char *value, size_t len);
+static int prop_dedup_apply(struct inode *inode, const char *value, size_t len);
+static const char *prop_dedup_extract(struct inode *inode);
+
static struct prop_handler prop_handlers[] = {
{
.xattr_name = XATTR_BTRFS_PREFIX "compression",
@@ -49,6 +53,13 @@ static struct prop_handler prop_handlers[] = {
.extract = prop_compression_extract,
.inheritable = 1
},
+ {
+ .xattr_name = XATTR_BTRFS_PREFIX "dedup",
+ .validate = prop_dedup_validate,
+ .apply = prop_dedup_apply,
+ .extract = prop_dedup_extract,
+ .inheritable = 1
+ },
};
void __init btrfs_props_init(void)
@@ -425,4 +436,33 @@ static const char *prop_compression_extract(struct inode *inode)
return NULL;
}
+static int prop_dedup_validate(const char *value, size_t len)
+{
+ if (!strncmp("disable", value, len))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int prop_dedup_apply(struct inode *inode, const char *value, size_t len)
+{
+ if (len == 0) {
+ BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODEDUP;
+ return 0;
+ }
+
+ if (!strncmp("disable", value, len)) {
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NODEDUP;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static const char *prop_dedup_extract(struct inode *inode)
+{
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NODEDUP)
+ return "disable";
+ return NULL;
+}
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 18/19] btrfs: dedup: add per-file online dedup control
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (16 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 17/19] btrfs: dedup: add a property handler for online dedup Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
2016-02-02 3:05 ` [PATCH v5 19/19] btrfs: try more times to alloc metadata reserve space Qu Wenruo
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Introduce inode_need_dedup() to implement per-file online dedup control.
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/inode.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1e27a71..1973f86 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -676,6 +676,18 @@ static void free_async_extent_pages(struct async_extent *async_extent)
async_extent->pages = NULL;
}
+static inline int inode_need_dedup(struct btrfs_dedup_info *dedup_info,
+ struct inode *inode)
+{
+ if (!dedup_info)
+ return 0;
+
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NODEDUP)
+ return 0;
+
+ return 1;
+}
+
/*
* phase two of compressed writeback. This is the ordered portion
* of the code, which only gets called in the order the work was
@@ -1634,7 +1646,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
- } else if (!inode_need_compress(inode) && !dedup_info) {
+ } else if (!inode_need_compress(inode) &&
+ !inode_need_dedup(dedup_info, inode)) {
ret = cow_file_range(inode, locked_page, start, end,
page_started, nr_written, 1, NULL);
} else {
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH v5 19/19] btrfs: try more times to alloc metadata reserve space
2016-02-02 3:05 [PATCH v5 00/19][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
` (17 preceding siblings ...)
2016-02-02 3:05 ` [PATCH v5 18/19] btrfs: dedup: add per-file online dedup control Qu Wenruo
@ 2016-02-02 3:05 ` Qu Wenruo
18 siblings, 0 replies; 20+ messages in thread
From: Qu Wenruo @ 2016-02-02 3:05 UTC (permalink / raw)
To: linux-btrfs; +Cc: Wang Xiaoguang
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
In btrfs_delalloc_reserve_metadata(), the number of metadata bytes we try
to reserve is calculated by the difference between outstanding_extents and
reserved_extents.
When reserve_metadata_bytes() fails to reserve desited metadata space,
it has already done some reclaim work, such as write ordered extents.
In that case, outstanding_extents and reserved_extents may already
changed, and we may reserve enough metadata space then.
So this patch will try to call reserve_metadata_bytes() at most 3 times
to ensure we really run out of space.
Such false ENOSPC is mainly caused by small file extents and time
consuming delalloc functions, which mainly affects in-band
de-duplication. (Compress should also be affected, but LZO/zlib is
faster than SHA256, so still harder to trigger than dedup).
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
fs/btrfs/extent-tree.c | 23 +++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f9fc25c..0a6d172 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5671,6 +5671,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
bool delalloc_lock = true;
u64 to_free = 0;
unsigned dropped;
+ int loops = 0;
/* If we are a free space inode we need to not flush since we will be in
* the middle of a transaction commit. We also don't need the delalloc
@@ -5686,11 +5687,12 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);
+ num_bytes = ALIGN(num_bytes, root->sectorsize);
+
+again:
if (delalloc_lock)
mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
- num_bytes = ALIGN(num_bytes, root->sectorsize);
-
spin_lock(&BTRFS_I(inode)->lock);
nr_extents = (unsigned)div64_u64(num_bytes +
BTRFS_MAX_EXTENT_SIZE - 1,
@@ -5811,6 +5813,23 @@ out_fail:
}
if (delalloc_lock)
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
+ /*
+ * The number of metadata bytes is calculated by the difference
+ * between outstanding_extents and reserved_extents. Sometimes though
+ * reserve_metadata_bytes() fails to reserve the wanted metadata bytes,
+ * indeed it has already done some work to reclaim metadata space, hence
+ * both outstanding_extents and reserved_extents would have changed and
+ * the bytes we try to reserve would also has changed(may be smaller).
+ * So here we try to reserve again. This is much useful for online
+ * dedup, which will easily eat almost all meta space.
+ *
+ * XXX: Indeed here 3 is arbitrarily choosed, it's a good workaround for
+ * online dedup, later we should find a better method to avoid dedup
+ * enospc issue.
+ */
+ if (unlikely(ret == -ENOSPC && loops++ < 3))
+ goto again;
+
return ret;
}
--
2.7.0
^ permalink raw reply related [flat|nested] 20+ messages in thread