From: Andiry Xu <jix024@eng.ucsd.edu>
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-nvdimm@lists.01.org
Cc: coughlan@redhat.com, miklos@szeredi.hu,
Andiry Xu <jix024@cs.ucsd.edu>,
david@fromorbit.com, jack@suse.com, swanson@cs.ucsd.edu,
swhiteho@redhat.com, andiry.xu@gmail.com
Subject: [RFC v2 80/83] Failure recovery: bitmap operations.
Date: Sat, 10 Mar 2018 10:19:01 -0800 [thread overview]
Message-ID: <1520705944-6723-81-git-send-email-jix024@eng.ucsd.edu> (raw)
In-Reply-To: <1520705944-6723-1-git-send-email-jix024@eng.ucsd.edu>
From: Andiry Xu <jix024@cs.ucsd.edu>
Upon system failure, NOVA needs to scan all the inode logs
to rebuild the allocator. During the scanning, NOVA stores allocated
log/data pages in a bitmap, and uses the bitmap to rebuild the allocator
once scan finishes.
Signed-off-by: Andiry Xu <jix024@cs.ucsd.edu>
---
fs/nova/bbuild.c | 252 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nova/bbuild.h | 18 ++++
2 files changed, 270 insertions(+)
diff --git a/fs/nova/bbuild.c b/fs/nova/bbuild.c
index ca51dca..35c661a 100644
--- a/fs/nova/bbuild.c
+++ b/fs/nova/bbuild.c
@@ -414,6 +414,258 @@ void nova_save_blocknode_mappings_to_log(struct super_block *sb)
pi->log_head, pi->log_tail);
}
+/************************** Bitmap operations ****************************/
+
+static inline void set_scan_bm(unsigned long bit,
+ struct single_scan_bm *scan_bm)
+{
+ set_bit(bit, scan_bm->bitmap);
+}
+
+inline void set_bm(unsigned long bit, struct scan_bitmap *bm,
+ enum bm_type type)
+{
+ switch (type) {
+ case BM_4K:
+ set_scan_bm(bit, &bm->scan_bm_4K);
+ break;
+ case BM_2M:
+ set_scan_bm(bit, &bm->scan_bm_2M);
+ break;
+ case BM_1G:
+ set_scan_bm(bit, &bm->scan_bm_1G);
+ break;
+ default:
+ break;
+ }
+}
+
+static int nova_insert_blocknode_map(struct super_block *sb,
+ int cpuid, unsigned long low, unsigned long high)
+{
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+ struct free_list *free_list;
+ struct rb_root *tree;
+ struct nova_range_node *blknode = NULL;
+ unsigned long num_blocks = 0;
+ int ret;
+
+ num_blocks = high - low + 1;
+ nova_dbgv("%s: cpu %d, low %lu, high %lu, num %lu\n",
+ __func__, cpuid, low, high, num_blocks);
+ free_list = nova_get_free_list(sb, cpuid);
+ tree = &(free_list->block_free_tree);
+
+ blknode = nova_alloc_blocknode(sb);
+ if (blknode == NULL)
+ return -ENOMEM;
+ blknode->range_low = low;
+ blknode->range_high = high;
+ ret = nova_insert_blocktree(sbi, tree, blknode);
+ if (ret) {
+ nova_err(sb, "%s failed\n", __func__);
+ nova_free_blocknode(sb, blknode);
+ goto out;
+ }
+ if (!free_list->first_node)
+ free_list->first_node = blknode;
+ free_list->last_node = blknode;
+ free_list->num_blocknode++;
+ free_list->num_free_blocks += num_blocks;
+out:
+ return ret;
+}
+
+static int __nova_build_blocknode_map(struct super_block *sb,
+ unsigned long *bitmap, unsigned long bsize, unsigned long scale)
+{
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+ struct free_list *free_list;
+ unsigned long next = 0;
+ unsigned long low = 0;
+ unsigned long start, end;
+ int cpuid = 0;
+
+ free_list = nova_get_free_list(sb, cpuid);
+ start = free_list->block_start;
+ end = free_list->block_end + 1;
+ while (1) {
+ next = find_next_zero_bit(bitmap, end, start);
+ if (next == bsize)
+ break;
+ if (next == end) {
+ if (cpuid == sbi->cpus - 1)
+ break;
+
+ cpuid++;
+ free_list = nova_get_free_list(sb, cpuid);
+ start = free_list->block_start;
+ end = free_list->block_end + 1;
+ continue;
+ }
+
+ low = next;
+ next = find_next_bit(bitmap, end, next);
+ if (nova_insert_blocknode_map(sb, cpuid,
+ low << scale, (next << scale) - 1)) {
+ nova_dbg("Error: could not insert %lu - %lu\n",
+ low << scale, ((next << scale) - 1));
+ }
+ start = next;
+ if (next == bsize)
+ break;
+ if (next == end) {
+ if (cpuid == sbi->cpus - 1)
+ break;
+
+ cpuid++;
+ free_list = nova_get_free_list(sb, cpuid);
+ start = free_list->block_start;
+ end = free_list->block_end + 1;
+ }
+ }
+ return 0;
+}
+
+static void nova_update_4K_map(struct super_block *sb,
+ struct scan_bitmap *bm, unsigned long *bitmap,
+ unsigned long bsize, unsigned long scale)
+{
+ unsigned long next = 0;
+ unsigned long low = 0;
+ int i;
+
+ while (1) {
+ next = find_next_bit(bitmap, bsize, next);
+ if (next == bsize)
+ break;
+ low = next;
+ next = find_next_zero_bit(bitmap, bsize, next);
+ for (i = (low << scale); i < (next << scale); i++)
+ set_bm(i, bm, BM_4K);
+ if (next == bsize)
+ break;
+ }
+}
+
+struct scan_bitmap *global_bm[MAX_CPUS];
+
+static int nova_build_blocknode_map(struct super_block *sb,
+ unsigned long initsize)
+{
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+ struct scan_bitmap *bm;
+ struct scan_bitmap *final_bm;
+ unsigned long *src, *dst;
+ int i, j;
+ int num;
+ int ret;
+
+ final_bm = kzalloc(sizeof(struct scan_bitmap), GFP_KERNEL);
+ if (!final_bm)
+ return -ENOMEM;
+
+ final_bm->scan_bm_4K.bitmap_size =
+ (initsize >> (PAGE_SHIFT + 0x3));
+
+ /* Alloc memory to hold the block alloc bitmap */
+ final_bm->scan_bm_4K.bitmap = kzalloc(final_bm->scan_bm_4K.bitmap_size,
+ GFP_KERNEL);
+
+ if (!final_bm->scan_bm_4K.bitmap) {
+ kfree(final_bm);
+ return -ENOMEM;
+ }
+
+ /*
+ * We are using free lists. Set 2M and 1G blocks in 4K map,
+ * and use 4K map to rebuild block map.
+ */
+ for (i = 0; i < sbi->cpus; i++) {
+ bm = global_bm[i];
+ nova_update_4K_map(sb, bm, bm->scan_bm_2M.bitmap,
+ bm->scan_bm_2M.bitmap_size * 8, PAGE_SHIFT_2M - 12);
+ nova_update_4K_map(sb, bm, bm->scan_bm_1G.bitmap,
+ bm->scan_bm_1G.bitmap_size * 8, PAGE_SHIFT_1G - 12);
+ }
+
+ /* Merge per-CPU bms to the final single bm */
+ num = final_bm->scan_bm_4K.bitmap_size / sizeof(unsigned long);
+ if (final_bm->scan_bm_4K.bitmap_size % sizeof(unsigned long))
+ num++;
+
+ for (i = 0; i < sbi->cpus; i++) {
+ bm = global_bm[i];
+ src = (unsigned long *)bm->scan_bm_4K.bitmap;
+ dst = (unsigned long *)final_bm->scan_bm_4K.bitmap;
+
+ for (j = 0; j < num; j++)
+ dst[j] |= src[j];
+ }
+
+ ret = __nova_build_blocknode_map(sb, final_bm->scan_bm_4K.bitmap,
+ final_bm->scan_bm_4K.bitmap_size * 8, PAGE_SHIFT - 12);
+
+ kfree(final_bm->scan_bm_4K.bitmap);
+ kfree(final_bm);
+
+ return ret;
+}
+
+static void free_bm(struct super_block *sb)
+{
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+ struct scan_bitmap *bm;
+ int i;
+
+ for (i = 0; i < sbi->cpus; i++) {
+ bm = global_bm[i];
+ if (bm) {
+ kfree(bm->scan_bm_4K.bitmap);
+ kfree(bm->scan_bm_2M.bitmap);
+ kfree(bm->scan_bm_1G.bitmap);
+ kfree(bm);
+ }
+ }
+}
+
+static int alloc_bm(struct super_block *sb, unsigned long initsize)
+{
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+ struct scan_bitmap *bm;
+ int i;
+
+ for (i = 0; i < sbi->cpus; i++) {
+ bm = kzalloc(sizeof(struct scan_bitmap), GFP_KERNEL);
+ if (!bm)
+ return -ENOMEM;
+
+ global_bm[i] = bm;
+
+ bm->scan_bm_4K.bitmap_size =
+ (initsize >> (PAGE_SHIFT + 0x3));
+ bm->scan_bm_2M.bitmap_size =
+ (initsize >> (PAGE_SHIFT_2M + 0x3));
+ bm->scan_bm_1G.bitmap_size =
+ (initsize >> (PAGE_SHIFT_1G + 0x3));
+
+ /* Alloc memory to hold the block alloc bitmap */
+ bm->scan_bm_4K.bitmap = kzalloc(bm->scan_bm_4K.bitmap_size,
+ GFP_KERNEL);
+ bm->scan_bm_2M.bitmap = kzalloc(bm->scan_bm_2M.bitmap_size,
+ GFP_KERNEL);
+ bm->scan_bm_1G.bitmap = kzalloc(bm->scan_bm_1G.bitmap_size,
+ GFP_KERNEL);
+
+ if (!bm->scan_bm_4K.bitmap || !bm->scan_bm_2M.bitmap ||
+ !bm->scan_bm_1G.bitmap)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+
/*********************** Recovery entrance *************************/
/* Return TRUE if we can do a normal unmount recovery */
diff --git a/fs/nova/bbuild.h b/fs/nova/bbuild.h
index 2c3deb0..b093e05 100644
--- a/fs/nova/bbuild.h
+++ b/fs/nova/bbuild.h
@@ -1,6 +1,24 @@
#ifndef __BBUILD_H
#define __BBUILD_H
+enum bm_type {
+ BM_4K = 0,
+ BM_2M,
+ BM_1G,
+};
+
+struct single_scan_bm {
+ unsigned long bitmap_size;
+ unsigned long *bitmap;
+};
+
+struct scan_bitmap {
+ struct single_scan_bm scan_bm_4K;
+ struct single_scan_bm scan_bm_2M;
+ struct single_scan_bm scan_bm_1G;
+};
+
+
void nova_init_header(struct super_block *sb,
struct nova_inode_info_header *sih, u16 i_mode);
void nova_save_inode_list_to_log(struct super_block *sb);
--
2.7.4
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm
next prev parent reply other threads:[~2018-03-10 18:15 UTC|newest]
Thread overview: 119+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-10 18:17 [RFC v2 00/83] NOVA: a new file system for persistent memory Andiry Xu
2018-03-10 18:17 ` [RFC v2 01/83] Introduction and documentation of NOVA filesystem Andiry Xu
2018-03-19 20:43 ` Randy Dunlap
2018-03-19 23:00 ` Andiry Xu
2018-04-22 8:05 ` Pavel Machek
2018-03-10 18:17 ` [RFC v2 02/83] Add nova_def.h Andiry Xu
2018-03-10 18:17 ` [RFC v2 03/83] Add super.h Andiry Xu
2018-03-15 4:54 ` Darrick J. Wong
2018-03-15 6:11 ` Andiry Xu
2018-03-15 9:05 ` Arnd Bergmann
2018-03-15 17:51 ` Andiry Xu
2018-03-15 20:04 ` Andreas Dilger
2018-03-15 20:38 ` Arnd Bergmann
2018-03-16 2:59 ` Theodore Y. Ts'o
2018-03-16 6:17 ` Andiry Xu
2018-03-16 6:30 ` Darrick J. Wong
2018-03-16 9:19 ` Arnd Bergmann
2018-03-10 18:17 ` [RFC v2 04/83] NOVA inode definition Andiry Xu
2018-03-15 5:06 ` Darrick J. Wong
2018-03-15 6:16 ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 05/83] Add NOVA filesystem definitions and useful helper routines Andiry Xu
2018-03-11 12:00 ` Nikolay Borisov
2018-03-11 19:22 ` Eric Biggers
2018-03-11 21:45 ` Andiry Xu
2018-03-19 19:39 ` Andiry Xu
2018-03-19 20:30 ` Eric Biggers
2018-03-19 21:59 ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 06/83] Add inode get/read methods Andiry Xu
2018-04-23 6:12 ` Darrick J. Wong
2018-04-23 15:55 ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 07/83] Initialize inode_info and rebuild inode information in nova_iget() Andiry Xu
2018-03-10 18:17 ` [RFC v2 08/83] NOVA superblock operations Andiry Xu
2018-03-10 18:17 ` [RFC v2 09/83] Add Kconfig and Makefile Andiry Xu
2018-03-11 12:15 ` Nikolay Borisov
2018-03-11 21:32 ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 10/83] Add superblock integrity check Andiry Xu
2018-03-10 18:17 ` [RFC v2 11/83] Add timing and I/O statistics for performance analysis and profiling Andiry Xu
2018-03-10 18:17 ` [RFC v2 12/83] Add timing for mount and init Andiry Xu
2018-03-10 18:17 ` [RFC v2 13/83] Add remount_fs and show_options methods Andiry Xu
2018-03-10 18:17 ` [RFC v2 14/83] Add range node kmem cache Andiry Xu
2018-03-11 11:55 ` Nikolay Borisov
2018-03-11 21:31 ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 15/83] Add free list data structure Andiry Xu
2018-03-10 18:17 ` [RFC v2 16/83] Initialize block map and free lists in nova_init() Andiry Xu
2018-03-11 12:12 ` Nikolay Borisov
2018-03-11 21:30 ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 17/83] Add statfs support Andiry Xu
2018-03-10 18:17 ` [RFC v2 18/83] Add freelist statistics printing Andiry Xu
2018-03-10 18:18 ` [RFC v2 19/83] Add pmem block free routines Andiry Xu
2018-03-10 18:18 ` [RFC v2 20/83] Pmem block allocation routines Andiry Xu
2018-03-10 18:18 ` [RFC v2 21/83] Add log structure Andiry Xu
2018-03-10 18:18 ` [RFC v2 22/83] Inode log pages allocation and reclaimation Andiry Xu
2018-03-10 18:18 ` [RFC v2 23/83] Save allocator to pmem in put_super Andiry Xu
2018-03-10 18:18 ` [RFC v2 24/83] Initialize and allocate inode table Andiry Xu
2018-03-10 18:18 ` [RFC v2 25/83] Support get normal inode address and inode table extentsion Andiry Xu
2018-03-10 18:18 ` [RFC v2 26/83] Add inode_map to track inuse inodes Andiry Xu
2018-03-10 18:18 ` [RFC v2 27/83] Save the inode inuse list to pmem upon umount Andiry Xu
2018-03-10 18:18 ` [RFC v2 28/83] Add NOVA address space operations Andiry Xu
2018-03-10 18:18 ` [RFC v2 29/83] Add write_inode and dirty_inode routines Andiry Xu
2018-03-10 18:18 ` [RFC v2 30/83] New NOVA inode allocation Andiry Xu
2018-03-10 18:18 ` [RFC v2 31/83] Add new vfs " Andiry Xu
2018-03-10 18:18 ` [RFC v2 32/83] Add log entry definitions Andiry Xu
2018-03-10 18:18 ` [RFC v2 33/83] Inode log and entry printing for debug purpose Andiry Xu
2018-03-10 18:18 ` [RFC v2 34/83] Journal: NOVA light weight journal definitions Andiry Xu
2018-03-10 18:18 ` [RFC v2 35/83] Journal: Lite journal helper routines Andiry Xu
2018-03-10 18:18 ` [RFC v2 36/83] Journal: Lite journal recovery Andiry Xu
2018-03-10 18:18 ` [RFC v2 37/83] Journal: Lite journal create and commit Andiry Xu
2018-03-10 18:18 ` [RFC v2 38/83] Journal: NOVA lite journal initialization Andiry Xu
2018-03-10 18:18 ` [RFC v2 39/83] Log operation: dentry append Andiry Xu
2018-03-10 18:18 ` [RFC v2 40/83] Log operation: file write entry append Andiry Xu
2018-03-10 18:18 ` [RFC v2 41/83] Log operation: setattr " Andiry Xu
2018-03-10 18:18 ` [RFC v2 42/83] Log operation: link change append Andiry Xu
2018-03-10 18:18 ` [RFC v2 43/83] Log operation: in-place update log entry Andiry Xu
2018-03-10 18:18 ` [RFC v2 44/83] Log operation: invalidate log entries Andiry Xu
2018-03-10 18:18 ` [RFC v2 45/83] Log operation: file inode log lookup and assign Andiry Xu
2018-03-10 18:18 ` [RFC v2 46/83] Dir: Add Directory radix tree insert/remove methods Andiry Xu
2018-03-10 18:18 ` [RFC v2 47/83] Dir: Add initial dentries when initializing a directory inode log Andiry Xu
2018-03-10 18:18 ` [RFC v2 48/83] Dir: Readdir operation Andiry Xu
2018-03-10 18:18 ` [RFC v2 49/83] Dir: Append create/remove dentry Andiry Xu
2018-03-10 18:18 ` [RFC v2 50/83] Inode: Add nova_evict_inode Andiry Xu
2018-03-10 18:18 ` [RFC v2 51/83] Rebuild: directory inode Andiry Xu
2018-03-10 18:18 ` [RFC v2 52/83] Rebuild: file inode Andiry Xu
2018-03-10 18:18 ` [RFC v2 53/83] Namei: lookup Andiry Xu
2018-03-10 18:18 ` [RFC v2 54/83] Namei: create and mknod Andiry Xu
2018-03-10 18:18 ` [RFC v2 55/83] Namei: mkdir Andiry Xu
2018-03-10 18:18 ` [RFC v2 56/83] Namei: link and unlink Andiry Xu
2018-03-10 18:18 ` [RFC v2 57/83] Namei: rmdir Andiry Xu
2018-03-10 18:18 ` [RFC v2 58/83] Namei: rename Andiry Xu
2018-03-10 18:18 ` [RFC v2 59/83] Namei: setattr Andiry Xu
2018-03-10 18:18 ` [RFC v2 60/83] Add special inode operations Andiry Xu
2018-03-10 18:18 ` [RFC v2 61/83] Super: Add nova_export_ops Andiry Xu
2018-03-10 18:18 ` [RFC v2 62/83] File: getattr and file inode operations Andiry Xu
2018-03-10 18:18 ` [RFC v2 63/83] File operation: llseek Andiry Xu
2018-03-10 18:18 ` [RFC v2 64/83] File operation: open, fsync, flush Andiry Xu
2018-03-10 18:18 ` [RFC v2 65/83] File operation: read Andiry Xu
2018-03-10 18:18 ` [RFC v2 66/83] Super: Add file write item cache Andiry Xu
2018-03-10 18:18 ` [RFC v2 67/83] Dax: commit list of file write items to log Andiry Xu
2018-03-10 18:18 ` [RFC v2 68/83] File operation: copy-on-write write Andiry Xu
2018-03-10 18:18 ` [RFC v2 69/83] Super: Add module param inplace_data_updates Andiry Xu
2018-03-10 18:18 ` [RFC v2 70/83] File operation: Inplace write Andiry Xu
2018-03-10 18:18 ` [RFC v2 71/83] Symlink support Andiry Xu
2018-03-10 18:18 ` [RFC v2 72/83] File operation: fallocate Andiry Xu
2018-03-10 18:18 ` [RFC v2 73/83] Dax: Add iomap operations Andiry Xu
2018-03-10 18:18 ` [RFC v2 74/83] File operation: Mmap Andiry Xu
2018-03-10 18:18 ` [RFC v2 75/83] File operation: read/write iter Andiry Xu
2018-03-10 18:18 ` [RFC v2 76/83] Ioctl support Andiry Xu
2018-03-10 18:18 ` [RFC v2 77/83] GC: Fast garbage collection Andiry Xu
2018-03-10 18:18 ` [RFC v2 78/83] GC: Thorough " Andiry Xu
2018-03-10 18:19 ` [RFC v2 79/83] Normal recovery Andiry Xu
2018-03-10 18:19 ` Andiry Xu [this message]
2018-03-10 18:19 ` [RFC v2 81/83] Failure recovery: Inode pages recovery routines Andiry Xu
2018-03-10 18:19 ` [RFC v2 82/83] Failure recovery: Per-CPU recovery Andiry Xu
2018-03-10 18:19 ` [RFC v2 83/83] Sysfs support Andiry Xu
2018-03-15 0:33 ` Randy Dunlap
2018-03-15 6:07 ` Andiry Xu
2018-03-22 15:00 ` David Sterba
2018-03-23 0:31 ` Andiry Xu
2018-03-11 2:14 ` [RFC v2 00/83] NOVA: a new file system for persistent memory Theodore Y. Ts'o
2018-03-11 4:58 ` Andiry Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1520705944-6723-81-git-send-email-jix024@eng.ucsd.edu \
--to=jix024@eng.ucsd.edu \
--cc=andiry.xu@gmail.com \
--cc=coughlan@redhat.com \
--cc=david@fromorbit.com \
--cc=jack@suse.com \
--cc=jix024@cs.ucsd.edu \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nvdimm@lists.01.org \
--cc=miklos@szeredi.hu \
--cc=swanson@cs.ucsd.edu \
--cc=swhiteho@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).