All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
To: <linux-btrfs@vger.kernel.org>
Cc: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Subject: [PATCH v3 01/16] btrfs: dedup: Introduce dedup framework and its header
Date: Thu, 7 Jan 2016 09:08:02 +0800	[thread overview]
Message-ID: <1452128897-5433-2-git-send-email-quwenruo@cn.fujitsu.com> (raw)
In-Reply-To: <1452128897-5433-1-git-send-email-quwenruo@cn.fujitsu.com>

From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>

Introduce the header for btrfs online(write time) de-duplication
framework and needed header.

The new de-duplication framework is going to support 2 different dedup
method and 1 dedup hash.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
v3:
  Reduce the size of struct btrfs_dedup_hash.
  Increase max dedup size to 8M for better performance.
---
 fs/btrfs/ctree.h |   3 ++
 fs/btrfs/dedup.h | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 fs/btrfs/dedup.h

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4c23f34..62fed1d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1816,6 +1816,9 @@ struct btrfs_fs_info {
 	 * and will be latter freed. Protected by fs_info->chunk_mutex.
 	 */
 	struct list_head pinned_chunks;
+
+	/* reference to inband de-duplication info */
+	struct btrfs_dedup_info *dedup_info;
 };
 
 struct btrfs_subvolume_writers {
diff --git a/fs/btrfs/dedup.h b/fs/btrfs/dedup.h
new file mode 100644
index 0000000..1e04d89
--- /dev/null
+++ b/fs/btrfs/dedup.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2015 Fujitsu.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_DEDUP__
+#define __BTRFS_DEDUP__
+
+#include <linux/btrfs.h>
+#include <crypto/hash.h>
+
+/*
+ * Dedup storage backend
+ * On disk is persist storage but overhead is large
+ * In memory is fast but will lose all its hash on umount
+ */
+#define BTRFS_DEDUP_BACKEND_INMEMORY		0
+#define BTRFS_DEDUP_BACKEND_ONDISK		1
+#define BTRFS_DEDUP_BACKEND_LAST		2
+
+/* Dedup block size limit and default value */
+#define BTRFS_DEDUP_BLOCKSIZE_MAX	(8 * 1024 * 1024)
+#define BTRFS_DEDUP_BLOCKSIZE_MIN	(16 * 1024)
+#define BTRFS_DEDUP_BLOCKSIZE_DEFAULT	(32 * 1024)
+
+/* Hash algorithm, only support SHA256 yet */
+#define BTRFS_DEDUP_HASH_SHA256		0
+
+static int btrfs_dedup_sizes[] = { 32 };
+
+/*
+ * For caller outside of dedup.c
+ *
+ * Different dedup backends should have their own hash structure
+ */
+struct btrfs_dedup_hash {
+	u64 bytenr;
+	u32 num_bytes;
+
+	/* last field is a variable length array of dedup hash */
+	u8 hash[];
+};
+
+struct btrfs_dedup_info {
+	/* dedup blocksize */
+	u64 blocksize;
+	u16 backend;
+	u16 hash_type;
+
+	/* Hash driver */
+	struct crypto_shash *dedup_driver;
+
+	/* following members are only used in in-memory dedup mode */
+	struct rb_root hash_root;
+	struct rb_root bytenr_root;
+	struct list_head lru_list;
+	spinlock_t lock;
+	u64 limit_nr;
+	u64 current_nr;
+};
+
+struct btrfs_trans_handle;
+
+int btrfs_dedup_hash_size(u16 type);
+struct btrfs_dedup_hash *btrfs_dedup_alloc_hash(u16 type);
+
+/*
+ * Initial inband dedup info
+ * Called at either dedup enable or mount time.
+ */
+int btrfs_dedup_enable(struct btrfs_fs_info *fs_info, u16 type, u16 backend,
+		       u64 blocksize, u64 limit);
+
+/*
+ * Disable dedup and invalidate all its dedup data.
+ * Called at dedup disable time.
+ */
+int btrfs_dedup_disable(struct btrfs_fs_info *fs_info);
+
+/*
+ * Calculate hash for dedup.
+ * Caller must ensure [start, start + dedup_bs) has valid data.
+ */
+int btrfs_dedup_calc_hash(struct btrfs_root *root, struct inode *inode,
+			  u64 start, struct btrfs_dedup_hash *hash);
+
+/*
+ * Search for duplicated extents by calculated hash
+ * Caller must call btrfs_dedup_calc_hash() first to get the hash.
+ *
+ * @inode: the inode for we are writing
+ * @file_pos: offset inside the inode
+ * As we will increase extent ref immediately after a hash match,
+ * we need @file_pos and @inode in this case.
+ *
+ * Return > 0 for a hash match, and the extent ref will be
+ * INCREASED.
+ * Return 0 for a hash miss. Nothing is done
+ */
+int btrfs_dedup_search(struct inode *inode, u64 file_pos,
+		       struct btrfs_dedup_hash *hash);
+
+/* Add a dedup hash into dedup tree */
+int btrfs_dedup_add(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		    struct btrfs_dedup_hash *hash);
+
+/* Remove a dedup hash from dedup tree */
+int btrfs_dedup_del(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+		    u64 bytenr);
+#endif
-- 
2.6.4




  reply	other threads:[~2016-01-07  1:08 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-07  1:08 [PATCH v3 00/14][For 4.6] Btrfs: Add inband (write time) de-duplication framework Qu Wenruo
2016-01-07  1:08 ` Qu Wenruo [this message]
2016-01-07  1:08 ` [PATCH v3 02/16] btrfs: dedup: Introduce function to initialize dedup info Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 03/16] btrfs: dedup: Introduce function to add hash into in-memory tree Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 04/16] btrfs: dedup: Introduce function to remove hash from " Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 05/16] btrfs: delayed-ref: Add support for atomic increasing extent ref Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 06/16] btrfs: delayed_ref: Add support for handle dedup hash Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 07/16] btrfs: dedup: Introduce function to search for an existing hash Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 08/16] btrfs: dedup: Implement btrfs_dedup_calc_hash interface Qu Wenruo
2016-01-07 13:21   ` kbuild test robot
2016-01-07  1:08 ` [PATCH v3 09/16] btrfs: ordered-extent: Add support for dedup Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 10/16] btrfs: dedup: Inband in-memory only de-duplication implement Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 11/16] btrfs: dedup: Add basic tree structure for on-disk dedup method Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 12/16] btrfs: dedup: Introduce interfaces to resume and cleanup dedup info Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 13/16] btrfs: dedup: Add support for on-disk hash search Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 14/16] btrfs: dedup: Add support to delete hash for on-disk backend Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 15/16] btrfs: dedup: Add support for adding " Qu Wenruo
2016-01-07  1:08 ` [PATCH v3 16/16] btrfs: dedup: Add ioctl for inband deduplication Qu Wenruo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1452128897-5433-2-git-send-email-quwenruo@cn.fujitsu.com \
    --to=quwenruo@cn.fujitsu.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=wangxg.fnst@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.