From: Andiry Xu <jix024@eng.ucsd.edu> To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org Cc: coughlan@redhat.com, miklos@szeredi.hu, Andiry Xu <jix024@cs.ucsd.edu>, david@fromorbit.com, jack@suse.com, swanson@cs.ucsd.edu, swhiteho@redhat.com, andiry.xu@gmail.com Subject: [RFC v2 82/83] Failure recovery: Per-CPU recovery. Date: Sat, 10 Mar 2018 10:19:03 -0800 [thread overview] Message-ID: <1520705944-6723-83-git-send-email-jix024@eng.ucsd.edu> (raw) In-Reply-To: <1520705944-6723-1-git-send-email-jix024@eng.ucsd.edu> From: Andiry Xu <jix024@cs.ucsd.edu> NOVA starts a recovery thread on each CPU, and scans all the inodes in a parallel way. It recovers the inode inuse list during the scan as well. Signed-off-by: Andiry Xu <jix024@cs.ucsd.edu> --- fs/nova/bbuild.c | 396 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 396 insertions(+) diff --git a/fs/nova/bbuild.c b/fs/nova/bbuild.c index 75dfcba..3271166 100644 --- a/fs/nova/bbuild.c +++ b/fs/nova/bbuild.c @@ -677,6 +677,11 @@ struct task_ring { u64 *nvmm_array; }; +static struct task_ring *task_rings; +static struct task_struct **threads; +wait_queue_head_t finish_wq; +int *finished; + static int nova_traverse_inode_log(struct super_block *sb, struct nova_inode *pi, struct scan_bitmap *bm, u64 head) { @@ -973,6 +978,378 @@ static int nova_recover_inode_pages(struct super_block *sb, } +static void free_resources(struct super_block *sb) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct task_ring *ring; + int i; + + if (task_rings) { + for (i = 0; i < sbi->cpus; i++) { + ring = &task_rings[i]; + vfree(ring->entry_array); + vfree(ring->nvmm_array); + ring->entry_array = NULL; + ring->nvmm_array = NULL; + } + } + + kfree(task_rings); + kfree(threads); + kfree(finished); +} + +static int failure_thread_func(void *data); + +static int allocate_resources(struct super_block *sb, int cpus) +{ + struct task_ring *ring; + int i; + + task_rings = kcalloc(cpus, sizeof(struct task_ring), GFP_KERNEL); + if (!task_rings) + goto fail; + + for (i = 0; i < cpus; i++) { + ring = &task_rings[i]; + + ring->nvmm_array = vzalloc(sizeof(u64) * MAX_PGOFF); + if (!ring->nvmm_array) + goto fail; + + ring->entry_array = vmalloc(sizeof(u64) * MAX_PGOFF); + if (!ring->entry_array) + goto fail; + } + + threads = kcalloc(cpus, sizeof(struct task_struct *), GFP_KERNEL); + if (!threads) + goto fail; + + finished = kcalloc(cpus, sizeof(int), GFP_KERNEL); + if (!finished) + goto fail; + + init_waitqueue_head(&finish_wq); + + for (i = 0; i < cpus; i++) { + threads[i] = kthread_create(failure_thread_func, + sb, "recovery thread"); + kthread_bind(threads[i], i); + } + + return 0; + +fail: + free_resources(sb); + return -ENOMEM; +} + +static void wait_to_finish(int cpus) +{ + int i; + + for (i = 0; i < cpus; i++) { + while (finished[i] == 0) { + wait_event_interruptible_timeout(finish_wq, false, + msecs_to_jiffies(1)); + } + } +} + +/*********************** Failure recovery *************************/ + +static int nova_failure_insert_inodetree(struct super_block *sb, + unsigned long ino_low, unsigned long ino_high) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct inode_map *inode_map; + struct nova_range_node *prev = NULL, *next = NULL; + struct nova_range_node *new_node; + unsigned long internal_low, internal_high; + int cpu; + struct rb_root *tree; + int ret; + + if (ino_low > ino_high) { + nova_err(sb, "%s: ino low %lu, ino high %lu\n", + __func__, ino_low, ino_high); + return -EINVAL; + } + + cpu = ino_low % sbi->cpus; + if (ino_high % sbi->cpus != cpu) { + nova_err(sb, "%s: ino low %lu, ino high %lu\n", + __func__, ino_low, ino_high); + return -EINVAL; + } + + internal_low = ino_low / sbi->cpus; + internal_high = ino_high / sbi->cpus; + inode_map = &sbi->inode_maps[cpu]; + tree = &inode_map->inode_inuse_tree; + mutex_lock(&inode_map->inode_table_mutex); + + ret = nova_find_free_slot(sbi, tree, internal_low, internal_high, + &prev, &next); + if (ret) { + nova_dbg("%s: ino %lu - %lu already exists!: %d\n", + __func__, ino_low, ino_high, ret); + mutex_unlock(&inode_map->inode_table_mutex); + return ret; + } + + if (prev && next && (internal_low == prev->range_high + 1) && + (internal_high + 1 == next->range_low)) { + /* fits the hole */ + rb_erase(&next->node, tree); + inode_map->num_range_node_inode--; + prev->range_high = next->range_high; + nova_free_inode_node(sb, next); + goto finish; + } + if (prev && (internal_low == prev->range_high + 1)) { + /* Aligns left */ + prev->range_high += internal_high - internal_low + 1; + goto finish; + } + if (next && (internal_high + 1 == next->range_low)) { + /* Aligns right */ + next->range_low -= internal_high - internal_low + 1; + goto finish; + } + + /* Aligns somewhere in the middle */ + new_node = nova_alloc_inode_node(sb); + NOVA_ASSERT(new_node); + new_node->range_low = internal_low; + new_node->range_high = internal_high; + ret = nova_insert_inodetree(sbi, new_node, cpu); + if (ret) { + nova_err(sb, "%s failed\n", __func__); + nova_free_inode_node(sb, new_node); + goto finish; + } + inode_map->num_range_node_inode++; + +finish: + mutex_unlock(&inode_map->inode_table_mutex); + return ret; +} + +static inline int nova_failure_update_inodetree(struct super_block *sb, + struct nova_inode *pi, unsigned long *ino_low, unsigned long *ino_high) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + + if (*ino_low == 0) { + *ino_low = *ino_high = pi->nova_ino; + } else { + if (pi->nova_ino == *ino_high + sbi->cpus) { + *ino_high = pi->nova_ino; + } else { + /* A new start */ + nova_failure_insert_inodetree(sb, *ino_low, *ino_high); + *ino_low = *ino_high = pi->nova_ino; + } + } + + return 0; +} + +static int failure_thread_func(void *data) +{ + struct super_block *sb = data; + struct nova_inode_info_header sih; + struct task_ring *ring; + struct nova_inode *pi, fake_pi; + unsigned long num_inodes_per_page; + unsigned long ino_low, ino_high; + unsigned long last_blocknr; + unsigned int data_bits; + u64 curr; + int cpuid = smp_processor_id(); + unsigned long i; + unsigned long max_size = 0; + u64 pi_addr = 0; + int ret = 0; + int count; + + pi = nova_get_inode_by_ino(sb, NOVA_INODETABLE_INO); + data_bits = blk_type_to_shift[pi->i_blk_type]; + num_inodes_per_page = 1 << (data_bits - NOVA_INODE_BITS); + + ring = &task_rings[cpuid]; + nova_init_header(sb, &sih, 0); + + for (count = 0; count < ring->num; count++) { + curr = ring->addr0[count]; + ino_low = ino_high = 0; + + /* + * Note: The inode log page is allocated in 2MB + * granularity, but not aligned on 2MB boundary. + */ + for (i = 0; i < 512; i++) + set_bm((curr >> PAGE_SHIFT) + i, + global_bm[cpuid], BM_4K); + + for (i = 0; i < num_inodes_per_page; i++) { + pi_addr = curr + i * NOVA_INODE_SIZE; + ret = nova_get_reference(sb, pi_addr, &fake_pi, + (void **)&pi, sizeof(struct nova_inode)); + if (ret) { + nova_dbg("Recover pi @ 0x%llx failed\n", + pi_addr); + continue; + } + /* FIXME: Check inode checksum */ + if (fake_pi.i_mode && fake_pi.deleted == 0) { + if (fake_pi.valid == 0) { + /* Deleteable */ + pi->deleted = 1; + fake_pi.deleted = 1; + continue; + } + + nova_recover_inode_pages(sb, &sih, ring, + &fake_pi, global_bm[cpuid]); + nova_failure_update_inodetree(sb, pi, + &ino_low, &ino_high); + if (sih.i_size > max_size) + max_size = sih.i_size; + } + } + + if (ino_low && ino_high) + nova_failure_insert_inodetree(sb, ino_low, ino_high); + } + + /* Free radix tree */ + if (max_size) { + last_blocknr = (max_size - 1) >> PAGE_SHIFT; + nova_delete_file_tree(sb, &sih, 0, last_blocknr, + false, false, 0); + } + + finished[cpuid] = 1; + wake_up_interruptible(&finish_wq); + do_exit(ret); + return ret; +} + +static int nova_failure_recovery_crawl(struct super_block *sb) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct nova_inode_info_header sih; + struct inode_table *inode_table; + struct task_ring *ring; + struct nova_inode *pi, fake_pi; + unsigned long curr_addr; + u64 root_addr; + u64 curr; + int ret = 0; + int count; + int cpuid; + + root_addr = nova_get_reserved_inode_addr(sb, NOVA_ROOT_INO); + + for (cpuid = 0; cpuid < sbi->cpus; cpuid++) { + ring = &task_rings[cpuid]; + inode_table = nova_get_inode_table(sb, cpuid); + if (!inode_table) + return -EINVAL; + + count = 0; + curr = inode_table->log_head; + while (curr) { + if (ring->num >= 512) { + nova_err(sb, "%s: ring size too small\n", + __func__); + return -EINVAL; + } + + ring->addr0[count] = curr; + + count++; + + curr_addr = (unsigned long)nova_get_block(sb, + curr); + /* Next page resides at the last 8 bytes */ + curr_addr += 2097152 - 8; + curr = *(u64 *)(curr_addr); + } + + if (count > ring->num) + ring->num = count; + } + + for (cpuid = 0; cpuid < sbi->cpus; cpuid++) + wake_up_process(threads[cpuid]); + + nova_init_header(sb, &sih, 0); + /* Recover the root iode */ + ret = nova_get_reference(sb, root_addr, &fake_pi, + (void **)&pi, sizeof(struct nova_inode)); + if (ret) { + nova_dbg("Recover root pi failed\n"); + return ret; + } + + nova_recover_inode_pages(sb, &sih, &task_rings[0], + &fake_pi, global_bm[1]); + + return ret; +} + +int nova_failure_recovery(struct super_block *sb) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct task_ring *ring; + struct nova_inode *pi; + struct journal_ptr_pair *pair; + int ret; + int i; + + sbi->s_inodes_used_count = 0; + + /* Initialize inuse inode list */ + if (nova_init_inode_inuse_list(sb) < 0) + return -EINVAL; + + /* Handle special inodes */ + pi = nova_get_inode_by_ino(sb, NOVA_BLOCKNODE_INO); + pi->log_head = pi->log_tail = 0; + nova_flush_buffer(&pi->log_head, CACHELINE_SIZE, 0); + + for (i = 0; i < sbi->cpus; i++) { + pair = nova_get_journal_pointers(sb, i); + + set_bm(pair->journal_head >> PAGE_SHIFT, global_bm[i], BM_4K); + } + + PERSISTENT_BARRIER(); + + ret = allocate_resources(sb, sbi->cpus); + if (ret) + return ret; + + ret = nova_failure_recovery_crawl(sb); + + wait_to_finish(sbi->cpus); + + for (i = 0; i < sbi->cpus; i++) { + ring = &task_rings[i]; + sbi->s_inodes_used_count += ring->inodes_used_count; + } + + free_resources(sb); + + nova_dbg("Failure recovery total recovered %lu\n", + sbi->s_inodes_used_count - NOVA_NORMAL_INODE_START); + return ret; +} + /*********************** Recovery entrance *************************/ /* Return TRUE if we can do a normal unmount recovery */ @@ -1027,7 +1404,23 @@ int nova_recovery(struct super_block *sb) nova_init_blockmap(sb, 1); value = nova_try_normal_recovery(sb); + if (value) { + nova_dbg("NOVA: Normal shutdown\n"); + } else { + nova_dbg("NOVA: Failure recovery\n"); + ret = alloc_bm(sb, initsize); + if (ret) + goto out; + + sbi->s_inodes_used_count = 0; + ret = nova_failure_recovery(sb); + if (ret) + goto out; + ret = nova_build_blocknode_map(sb, initsize); + } + +out: NOVA_END_TIMING(recovery_t, start); if (measure_timing == 0) { getrawmonotonic(&end); @@ -1036,6 +1429,9 @@ int nova_recovery(struct super_block *sb) (end.tv_nsec - start.tv_nsec); } + if (!value) + free_bm(sb); + sbi->s_epoch_id = le64_to_cpu(super->s_epoch_id); return ret; } -- 2.7.4 _______________________________________________ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
WARNING: multiple messages have this Message-ID (diff)
From: Andiry Xu <jix024@eng.ucsd.edu> To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org Cc: dan.j.williams@intel.com, andy.rudoff@intel.com, coughlan@redhat.com, swanson@cs.ucsd.edu, david@fromorbit.com, jack@suse.com, swhiteho@redhat.com, miklos@szeredi.hu, andiry.xu@gmail.com, Andiry Xu <jix024@cs.ucsd.edu> Subject: [RFC v2 82/83] Failure recovery: Per-CPU recovery. Date: Sat, 10 Mar 2018 10:19:03 -0800 [thread overview] Message-ID: <1520705944-6723-83-git-send-email-jix024@eng.ucsd.edu> (raw) In-Reply-To: <1520705944-6723-1-git-send-email-jix024@eng.ucsd.edu> From: Andiry Xu <jix024@cs.ucsd.edu> NOVA starts a recovery thread on each CPU, and scans all the inodes in a parallel way. It recovers the inode inuse list during the scan as well. Signed-off-by: Andiry Xu <jix024@cs.ucsd.edu> --- fs/nova/bbuild.c | 396 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 396 insertions(+) diff --git a/fs/nova/bbuild.c b/fs/nova/bbuild.c index 75dfcba..3271166 100644 --- a/fs/nova/bbuild.c +++ b/fs/nova/bbuild.c @@ -677,6 +677,11 @@ struct task_ring { u64 *nvmm_array; }; +static struct task_ring *task_rings; +static struct task_struct **threads; +wait_queue_head_t finish_wq; +int *finished; + static int nova_traverse_inode_log(struct super_block *sb, struct nova_inode *pi, struct scan_bitmap *bm, u64 head) { @@ -973,6 +978,378 @@ static int nova_recover_inode_pages(struct super_block *sb, } +static void free_resources(struct super_block *sb) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct task_ring *ring; + int i; + + if (task_rings) { + for (i = 0; i < sbi->cpus; i++) { + ring = &task_rings[i]; + vfree(ring->entry_array); + vfree(ring->nvmm_array); + ring->entry_array = NULL; + ring->nvmm_array = NULL; + } + } + + kfree(task_rings); + kfree(threads); + kfree(finished); +} + +static int failure_thread_func(void *data); + +static int allocate_resources(struct super_block *sb, int cpus) +{ + struct task_ring *ring; + int i; + + task_rings = kcalloc(cpus, sizeof(struct task_ring), GFP_KERNEL); + if (!task_rings) + goto fail; + + for (i = 0; i < cpus; i++) { + ring = &task_rings[i]; + + ring->nvmm_array = vzalloc(sizeof(u64) * MAX_PGOFF); + if (!ring->nvmm_array) + goto fail; + + ring->entry_array = vmalloc(sizeof(u64) * MAX_PGOFF); + if (!ring->entry_array) + goto fail; + } + + threads = kcalloc(cpus, sizeof(struct task_struct *), GFP_KERNEL); + if (!threads) + goto fail; + + finished = kcalloc(cpus, sizeof(int), GFP_KERNEL); + if (!finished) + goto fail; + + init_waitqueue_head(&finish_wq); + + for (i = 0; i < cpus; i++) { + threads[i] = kthread_create(failure_thread_func, + sb, "recovery thread"); + kthread_bind(threads[i], i); + } + + return 0; + +fail: + free_resources(sb); + return -ENOMEM; +} + +static void wait_to_finish(int cpus) +{ + int i; + + for (i = 0; i < cpus; i++) { + while (finished[i] == 0) { + wait_event_interruptible_timeout(finish_wq, false, + msecs_to_jiffies(1)); + } + } +} + +/*********************** Failure recovery *************************/ + +static int nova_failure_insert_inodetree(struct super_block *sb, + unsigned long ino_low, unsigned long ino_high) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct inode_map *inode_map; + struct nova_range_node *prev = NULL, *next = NULL; + struct nova_range_node *new_node; + unsigned long internal_low, internal_high; + int cpu; + struct rb_root *tree; + int ret; + + if (ino_low > ino_high) { + nova_err(sb, "%s: ino low %lu, ino high %lu\n", + __func__, ino_low, ino_high); + return -EINVAL; + } + + cpu = ino_low % sbi->cpus; + if (ino_high % sbi->cpus != cpu) { + nova_err(sb, "%s: ino low %lu, ino high %lu\n", + __func__, ino_low, ino_high); + return -EINVAL; + } + + internal_low = ino_low / sbi->cpus; + internal_high = ino_high / sbi->cpus; + inode_map = &sbi->inode_maps[cpu]; + tree = &inode_map->inode_inuse_tree; + mutex_lock(&inode_map->inode_table_mutex); + + ret = nova_find_free_slot(sbi, tree, internal_low, internal_high, + &prev, &next); + if (ret) { + nova_dbg("%s: ino %lu - %lu already exists!: %d\n", + __func__, ino_low, ino_high, ret); + mutex_unlock(&inode_map->inode_table_mutex); + return ret; + } + + if (prev && next && (internal_low == prev->range_high + 1) && + (internal_high + 1 == next->range_low)) { + /* fits the hole */ + rb_erase(&next->node, tree); + inode_map->num_range_node_inode--; + prev->range_high = next->range_high; + nova_free_inode_node(sb, next); + goto finish; + } + if (prev && (internal_low == prev->range_high + 1)) { + /* Aligns left */ + prev->range_high += internal_high - internal_low + 1; + goto finish; + } + if (next && (internal_high + 1 == next->range_low)) { + /* Aligns right */ + next->range_low -= internal_high - internal_low + 1; + goto finish; + } + + /* Aligns somewhere in the middle */ + new_node = nova_alloc_inode_node(sb); + NOVA_ASSERT(new_node); + new_node->range_low = internal_low; + new_node->range_high = internal_high; + ret = nova_insert_inodetree(sbi, new_node, cpu); + if (ret) { + nova_err(sb, "%s failed\n", __func__); + nova_free_inode_node(sb, new_node); + goto finish; + } + inode_map->num_range_node_inode++; + +finish: + mutex_unlock(&inode_map->inode_table_mutex); + return ret; +} + +static inline int nova_failure_update_inodetree(struct super_block *sb, + struct nova_inode *pi, unsigned long *ino_low, unsigned long *ino_high) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + + if (*ino_low == 0) { + *ino_low = *ino_high = pi->nova_ino; + } else { + if (pi->nova_ino == *ino_high + sbi->cpus) { + *ino_high = pi->nova_ino; + } else { + /* A new start */ + nova_failure_insert_inodetree(sb, *ino_low, *ino_high); + *ino_low = *ino_high = pi->nova_ino; + } + } + + return 0; +} + +static int failure_thread_func(void *data) +{ + struct super_block *sb = data; + struct nova_inode_info_header sih; + struct task_ring *ring; + struct nova_inode *pi, fake_pi; + unsigned long num_inodes_per_page; + unsigned long ino_low, ino_high; + unsigned long last_blocknr; + unsigned int data_bits; + u64 curr; + int cpuid = smp_processor_id(); + unsigned long i; + unsigned long max_size = 0; + u64 pi_addr = 0; + int ret = 0; + int count; + + pi = nova_get_inode_by_ino(sb, NOVA_INODETABLE_INO); + data_bits = blk_type_to_shift[pi->i_blk_type]; + num_inodes_per_page = 1 << (data_bits - NOVA_INODE_BITS); + + ring = &task_rings[cpuid]; + nova_init_header(sb, &sih, 0); + + for (count = 0; count < ring->num; count++) { + curr = ring->addr0[count]; + ino_low = ino_high = 0; + + /* + * Note: The inode log page is allocated in 2MB + * granularity, but not aligned on 2MB boundary. + */ + for (i = 0; i < 512; i++) + set_bm((curr >> PAGE_SHIFT) + i, + global_bm[cpuid], BM_4K); + + for (i = 0; i < num_inodes_per_page; i++) { + pi_addr = curr + i * NOVA_INODE_SIZE; + ret = nova_get_reference(sb, pi_addr, &fake_pi, + (void **)&pi, sizeof(struct nova_inode)); + if (ret) { + nova_dbg("Recover pi @ 0x%llx failed\n", + pi_addr); + continue; + } + /* FIXME: Check inode checksum */ + if (fake_pi.i_mode && fake_pi.deleted == 0) { + if (fake_pi.valid == 0) { + /* Deleteable */ + pi->deleted = 1; + fake_pi.deleted = 1; + continue; + } + + nova_recover_inode_pages(sb, &sih, ring, + &fake_pi, global_bm[cpuid]); + nova_failure_update_inodetree(sb, pi, + &ino_low, &ino_high); + if (sih.i_size > max_size) + max_size = sih.i_size; + } + } + + if (ino_low && ino_high) + nova_failure_insert_inodetree(sb, ino_low, ino_high); + } + + /* Free radix tree */ + if (max_size) { + last_blocknr = (max_size - 1) >> PAGE_SHIFT; + nova_delete_file_tree(sb, &sih, 0, last_blocknr, + false, false, 0); + } + + finished[cpuid] = 1; + wake_up_interruptible(&finish_wq); + do_exit(ret); + return ret; +} + +static int nova_failure_recovery_crawl(struct super_block *sb) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct nova_inode_info_header sih; + struct inode_table *inode_table; + struct task_ring *ring; + struct nova_inode *pi, fake_pi; + unsigned long curr_addr; + u64 root_addr; + u64 curr; + int ret = 0; + int count; + int cpuid; + + root_addr = nova_get_reserved_inode_addr(sb, NOVA_ROOT_INO); + + for (cpuid = 0; cpuid < sbi->cpus; cpuid++) { + ring = &task_rings[cpuid]; + inode_table = nova_get_inode_table(sb, cpuid); + if (!inode_table) + return -EINVAL; + + count = 0; + curr = inode_table->log_head; + while (curr) { + if (ring->num >= 512) { + nova_err(sb, "%s: ring size too small\n", + __func__); + return -EINVAL; + } + + ring->addr0[count] = curr; + + count++; + + curr_addr = (unsigned long)nova_get_block(sb, + curr); + /* Next page resides at the last 8 bytes */ + curr_addr += 2097152 - 8; + curr = *(u64 *)(curr_addr); + } + + if (count > ring->num) + ring->num = count; + } + + for (cpuid = 0; cpuid < sbi->cpus; cpuid++) + wake_up_process(threads[cpuid]); + + nova_init_header(sb, &sih, 0); + /* Recover the root iode */ + ret = nova_get_reference(sb, root_addr, &fake_pi, + (void **)&pi, sizeof(struct nova_inode)); + if (ret) { + nova_dbg("Recover root pi failed\n"); + return ret; + } + + nova_recover_inode_pages(sb, &sih, &task_rings[0], + &fake_pi, global_bm[1]); + + return ret; +} + +int nova_failure_recovery(struct super_block *sb) +{ + struct nova_sb_info *sbi = NOVA_SB(sb); + struct task_ring *ring; + struct nova_inode *pi; + struct journal_ptr_pair *pair; + int ret; + int i; + + sbi->s_inodes_used_count = 0; + + /* Initialize inuse inode list */ + if (nova_init_inode_inuse_list(sb) < 0) + return -EINVAL; + + /* Handle special inodes */ + pi = nova_get_inode_by_ino(sb, NOVA_BLOCKNODE_INO); + pi->log_head = pi->log_tail = 0; + nova_flush_buffer(&pi->log_head, CACHELINE_SIZE, 0); + + for (i = 0; i < sbi->cpus; i++) { + pair = nova_get_journal_pointers(sb, i); + + set_bm(pair->journal_head >> PAGE_SHIFT, global_bm[i], BM_4K); + } + + PERSISTENT_BARRIER(); + + ret = allocate_resources(sb, sbi->cpus); + if (ret) + return ret; + + ret = nova_failure_recovery_crawl(sb); + + wait_to_finish(sbi->cpus); + + for (i = 0; i < sbi->cpus; i++) { + ring = &task_rings[i]; + sbi->s_inodes_used_count += ring->inodes_used_count; + } + + free_resources(sb); + + nova_dbg("Failure recovery total recovered %lu\n", + sbi->s_inodes_used_count - NOVA_NORMAL_INODE_START); + return ret; +} + /*********************** Recovery entrance *************************/ /* Return TRUE if we can do a normal unmount recovery */ @@ -1027,7 +1404,23 @@ int nova_recovery(struct super_block *sb) nova_init_blockmap(sb, 1); value = nova_try_normal_recovery(sb); + if (value) { + nova_dbg("NOVA: Normal shutdown\n"); + } else { + nova_dbg("NOVA: Failure recovery\n"); + ret = alloc_bm(sb, initsize); + if (ret) + goto out; + + sbi->s_inodes_used_count = 0; + ret = nova_failure_recovery(sb); + if (ret) + goto out; + ret = nova_build_blocknode_map(sb, initsize); + } + +out: NOVA_END_TIMING(recovery_t, start); if (measure_timing == 0) { getrawmonotonic(&end); @@ -1036,6 +1429,9 @@ int nova_recovery(struct super_block *sb) (end.tv_nsec - start.tv_nsec); } + if (!value) + free_bm(sb); + sbi->s_epoch_id = le64_to_cpu(super->s_epoch_id); return ret; } -- 2.7.4
next prev parent reply other threads:[~2018-03-10 18:15 UTC|newest] Thread overview: 236+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-03-10 18:17 [RFC v2 00/83] NOVA: a new file system for persistent memory Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 01/83] Introduction and documentation of NOVA filesystem Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-19 20:43 ` Randy Dunlap 2018-03-19 20:43 ` Randy Dunlap 2018-03-19 23:00 ` Andiry Xu 2018-03-19 23:00 ` Andiry Xu 2018-04-22 8:05 ` Pavel Machek 2018-03-10 18:17 ` [RFC v2 02/83] Add nova_def.h Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 03/83] Add super.h Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-15 4:54 ` Darrick J. Wong 2018-03-15 4:54 ` Darrick J. Wong 2018-03-15 6:11 ` Andiry Xu 2018-03-15 6:11 ` Andiry Xu 2018-03-15 9:05 ` Arnd Bergmann 2018-03-15 9:05 ` Arnd Bergmann 2018-03-15 17:51 ` Andiry Xu 2018-03-15 17:51 ` Andiry Xu 2018-03-15 20:04 ` Andreas Dilger 2018-03-15 20:38 ` Arnd Bergmann 2018-03-15 20:38 ` Arnd Bergmann 2018-03-16 2:59 ` Theodore Y. Ts'o 2018-03-16 2:59 ` Theodore Y. Ts'o 2018-03-16 6:17 ` Andiry Xu 2018-03-16 6:17 ` Andiry Xu 2018-03-16 6:30 ` Darrick J. Wong 2018-03-16 6:30 ` Darrick J. Wong 2018-03-16 9:19 ` Arnd Bergmann 2018-03-16 9:19 ` Arnd Bergmann 2018-03-10 18:17 ` [RFC v2 04/83] NOVA inode definition Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-15 5:06 ` Darrick J. Wong 2018-03-15 5:06 ` Darrick J. Wong 2018-03-15 6:16 ` Andiry Xu 2018-03-15 6:16 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 05/83] Add NOVA filesystem definitions and useful helper routines Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-11 12:00 ` Nikolay Borisov 2018-03-11 12:00 ` Nikolay Borisov 2018-03-11 19:22 ` Eric Biggers 2018-03-11 19:22 ` Eric Biggers 2018-03-11 21:45 ` Andiry Xu 2018-03-11 21:45 ` Andiry Xu 2018-03-19 19:39 ` Andiry Xu 2018-03-19 19:39 ` Andiry Xu 2018-03-19 20:30 ` Eric Biggers 2018-03-19 20:30 ` Eric Biggers 2018-03-19 21:59 ` Andiry Xu 2018-03-19 21:59 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 06/83] Add inode get/read methods Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-04-23 6:12 ` Darrick J. Wong 2018-04-23 6:12 ` Darrick J. Wong 2018-04-23 15:55 ` Andiry Xu 2018-04-23 15:55 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 07/83] Initialize inode_info and rebuild inode information in nova_iget() Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 08/83] NOVA superblock operations Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 09/83] Add Kconfig and Makefile Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-11 12:15 ` Nikolay Borisov 2018-03-11 12:15 ` Nikolay Borisov 2018-03-11 21:32 ` Andiry Xu 2018-03-11 21:32 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 10/83] Add superblock integrity check Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 11/83] Add timing and I/O statistics for performance analysis and profiling Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 12/83] Add timing for mount and init Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 13/83] Add remount_fs and show_options methods Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 14/83] Add range node kmem cache Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-11 11:55 ` Nikolay Borisov 2018-03-11 11:55 ` Nikolay Borisov 2018-03-11 21:31 ` Andiry Xu 2018-03-11 21:31 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 15/83] Add free list data structure Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 16/83] Initialize block map and free lists in nova_init() Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-11 12:12 ` Nikolay Borisov 2018-03-11 12:12 ` Nikolay Borisov 2018-03-11 21:30 ` Andiry Xu 2018-03-11 21:30 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 17/83] Add statfs support Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-10 18:17 ` [RFC v2 18/83] Add freelist statistics printing Andiry Xu 2018-03-10 18:17 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 19/83] Add pmem block free routines Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 20/83] Pmem block allocation routines Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 21/83] Add log structure Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 22/83] Inode log pages allocation and reclaimation Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 23/83] Save allocator to pmem in put_super Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 24/83] Initialize and allocate inode table Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 25/83] Support get normal inode address and inode table extentsion Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 26/83] Add inode_map to track inuse inodes Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 27/83] Save the inode inuse list to pmem upon umount Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 28/83] Add NOVA address space operations Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 29/83] Add write_inode and dirty_inode routines Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 30/83] New NOVA inode allocation Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 31/83] Add new vfs " Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 32/83] Add log entry definitions Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 33/83] Inode log and entry printing for debug purpose Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 34/83] Journal: NOVA light weight journal definitions Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 35/83] Journal: Lite journal helper routines Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 36/83] Journal: Lite journal recovery Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 37/83] Journal: Lite journal create and commit Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 38/83] Journal: NOVA lite journal initialization Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 39/83] Log operation: dentry append Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 40/83] Log operation: file write entry append Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 41/83] Log operation: setattr " Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 42/83] Log operation: link change append Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 43/83] Log operation: in-place update log entry Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 44/83] Log operation: invalidate log entries Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 45/83] Log operation: file inode log lookup and assign Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 46/83] Dir: Add Directory radix tree insert/remove methods Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 47/83] Dir: Add initial dentries when initializing a directory inode log Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 48/83] Dir: Readdir operation Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 49/83] Dir: Append create/remove dentry Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 50/83] Inode: Add nova_evict_inode Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 51/83] Rebuild: directory inode Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 52/83] Rebuild: file inode Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 53/83] Namei: lookup Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 54/83] Namei: create and mknod Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 55/83] Namei: mkdir Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 56/83] Namei: link and unlink Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 57/83] Namei: rmdir Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 58/83] Namei: rename Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 59/83] Namei: setattr Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 60/83] Add special inode operations Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 61/83] Super: Add nova_export_ops Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 62/83] File: getattr and file inode operations Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 63/83] File operation: llseek Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 64/83] File operation: open, fsync, flush Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 65/83] File operation: read Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 66/83] Super: Add file write item cache Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 67/83] Dax: commit list of file write items to log Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 68/83] File operation: copy-on-write write Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 69/83] Super: Add module param inplace_data_updates Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 70/83] File operation: Inplace write Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 71/83] Symlink support Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 72/83] File operation: fallocate Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 73/83] Dax: Add iomap operations Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 74/83] File operation: Mmap Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 75/83] File operation: read/write iter Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 76/83] Ioctl support Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 77/83] GC: Fast garbage collection Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:18 ` [RFC v2 78/83] GC: Thorough " Andiry Xu 2018-03-10 18:18 ` Andiry Xu 2018-03-10 18:19 ` [RFC v2 79/83] Normal recovery Andiry Xu 2018-03-10 18:19 ` Andiry Xu 2018-03-10 18:19 ` [RFC v2 80/83] Failure recovery: bitmap operations Andiry Xu 2018-03-10 18:19 ` Andiry Xu 2018-03-10 18:19 ` [RFC v2 81/83] Failure recovery: Inode pages recovery routines Andiry Xu 2018-03-10 18:19 ` Andiry Xu 2018-03-10 18:19 ` Andiry Xu [this message] 2018-03-10 18:19 ` [RFC v2 82/83] Failure recovery: Per-CPU recovery Andiry Xu 2018-03-10 18:19 ` [RFC v2 83/83] Sysfs support Andiry Xu 2018-03-10 18:19 ` Andiry Xu 2018-03-15 0:33 ` Randy Dunlap 2018-03-15 0:33 ` Randy Dunlap 2018-03-15 6:07 ` Andiry Xu 2018-03-15 6:07 ` Andiry Xu 2018-03-22 15:00 ` David Sterba 2018-03-22 15:00 ` David Sterba 2018-03-23 0:31 ` Andiry Xu 2018-03-23 0:31 ` Andiry Xu 2018-03-11 2:14 ` [RFC v2 00/83] NOVA: a new file system for persistent memory Theodore Y. Ts'o 2018-03-11 2:14 ` Theodore Y. Ts'o 2018-03-11 4:58 ` Andiry Xu 2018-03-11 4:58 ` Andiry Xu
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1520705944-6723-83-git-send-email-jix024@eng.ucsd.edu \ --to=jix024@eng.ucsd.edu \ --cc=andiry.xu@gmail.com \ --cc=coughlan@redhat.com \ --cc=david@fromorbit.com \ --cc=jack@suse.com \ --cc=jix024@cs.ucsd.edu \ --cc=linux-fsdevel@vger.kernel.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-nvdimm@lists.01.org \ --cc=miklos@szeredi.hu \ --cc=swanson@cs.ucsd.edu \ --cc=swhiteho@redhat.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.