Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: Andiry Xu <jix024@eng.ucsd.edu>
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-nvdimm@lists.01.org
Cc: dan.j.williams@intel.com, andy.rudoff@intel.com,
	coughlan@redhat.com, swanson@cs.ucsd.edu, david@fromorbit.com,
	jack@suse.com, swhiteho@redhat.com, miklos@szeredi.hu,
	andiry.xu@gmail.com, Andiry Xu <jix024@cs.ucsd.edu>
Subject: [RFC v2 77/83] GC: Fast garbage collection.
Date: Sat, 10 Mar 2018 10:18:58 -0800
Message-ID: <1520705944-6723-78-git-send-email-jix024@eng.ucsd.edu> (raw)
In-Reply-To: <1520705944-6723-1-git-send-email-jix024@eng.ucsd.edu>

From: Andiry Xu <jix024@cs.ucsd.edu>

NOVA cleans and compacts the log when the log is full.
The log is a linked list of 4KB pmem pages, and NOVA performs
fast garbage collection by deleting dead log pages (all the entries are invalid)
from the linked list.

Example:
I = Invalid, V = Valid

VIIV -> IIII -> VVII

	||
	||  fast gc
	\/

VIIV -> VVII

Signed-off-by: Andiry Xu <jix024@cs.ucsd.edu>
---
 fs/nova/Makefile |   2 +-
 fs/nova/gc.c     | 186 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nova/log.c    |   3 +
 fs/nova/nova.h   |   7 +++
 4 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 fs/nova/gc.c

diff --git a/fs/nova/Makefile b/fs/nova/Makefile
index 87e56c6..7a5fb6d 100644
--- a/fs/nova/Makefile
+++ b/fs/nova/Makefile
@@ -4,5 +4,5 @@
 
 obj-$(CONFIG_NOVA_FS) += nova.o
 
-nova-y := balloc.o bbuild.o dax.o dir.o file.o inode.o ioctl.o journal.o\
+nova-y := balloc.o bbuild.o dax.o dir.o file.o gc.o inode.o ioctl.o journal.o\
 	  log.o namei.o rebuild.o stats.o super.o symlink.o
diff --git a/fs/nova/gc.c b/fs/nova/gc.c
new file mode 100644
index 0000000..1634c04
--- /dev/null
+++ b/fs/nova/gc.c
@@ -0,0 +1,186 @@
+/*
+ * BRIEF DESCRIPTION
+ *
+ * Garbage collection methods
+ *
+ * Copyright 2015-2016 Regents of the University of California,
+ * UCSD Non-Volatile Systems Lab, Andiry Xu <jix024@cs.ucsd.edu>
+ * Copyright 2012-2013 Intel Corporation
+ * Copyright 2009-2011 Marco Stornelli <marco.stornelli@gmail.com>
+ * Copyright 2003 Sony Corporation
+ * Copyright 2003 Matsushita Electric Industrial Co., Ltd.
+ * 2003-2004 (c) MontaVista Software, Inc. , Steve Longerbeam
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include "nova.h"
+#include "inode.h"
+
+
+static bool curr_page_invalid(struct super_block *sb,
+	struct nova_inode *pi, struct nova_inode_info_header *sih,
+	u64 page_head)
+{
+	struct nova_inode_log_page *curr_page;
+	struct nova_inode_page_tail page_tail;
+	unsigned int num_entries;
+	unsigned int invalid_entries;
+	bool ret;
+	timing_t check_time;
+	int rc;
+
+	NOVA_START_TIMING(check_invalid_t, check_time);
+
+	curr_page = (struct nova_inode_log_page *)
+					nova_get_block(sb, page_head);
+	rc = memcpy_mcsafe(&page_tail, &curr_page->page_tail,
+					sizeof(struct nova_inode_page_tail));
+	if (rc) {
+		nova_err(sb, "check page failed\n");
+		return false;
+	}
+
+	num_entries = le32_to_cpu(page_tail.num_entries);
+	invalid_entries = le32_to_cpu(page_tail.invalid_entries);
+
+	ret = (invalid_entries == num_entries);
+	if (!ret) {
+		sih->num_entries += num_entries;
+		sih->valid_entries += num_entries - invalid_entries;
+	}
+
+	NOVA_END_TIMING(check_invalid_t, check_time);
+	return ret;
+}
+
+static void free_curr_page(struct super_block *sb,
+	struct nova_inode_info_header *sih,
+	struct nova_inode_log_page *curr_page,
+	struct nova_inode_log_page *last_page, u64 curr_head)
+{
+	u8 btype = sih->i_blk_type;
+
+	nova_set_next_page_address(sb, last_page,
+			curr_page->page_tail.next_page, 1);
+	nova_free_log_blocks(sb, sih,
+			nova_get_blocknr(sb, curr_head, btype), 1);
+}
+
+
+/*
+ * Scan pages in the log and remove those with no valid log entries.
+ */
+int nova_inode_log_fast_gc(struct super_block *sb,
+	struct nova_inode *pi, struct nova_inode_info_header *sih,
+	u64 curr_tail, u64 new_block,
+	int num_pages, int force_thorough)
+{
+	u64 curr, next, possible_head = 0;
+	int found_head = 0;
+	struct nova_inode_log_page *last_page = NULL;
+	struct nova_inode_log_page *curr_page = NULL;
+	int first_need_free = 0;
+	int num_logs;
+	u8 btype = sih->i_blk_type;
+	unsigned long blocks;
+	unsigned long checked_pages = 0;
+	int freed_pages = 0;
+	timing_t gc_time;
+
+	NOVA_START_TIMING(fast_gc_t, gc_time);
+	curr = sih->log_head;
+	sih->valid_entries = 0;
+	sih->num_entries = 0;
+
+	num_logs = 1;
+
+	nova_dbgv("%s: log head 0x%llx, tail 0x%llx\n",
+				__func__, curr, curr_tail);
+	while (1) {
+		if (curr >> PAGE_SHIFT == sih->log_tail >> PAGE_SHIFT) {
+			/* Don't recycle tail page */
+			if (found_head == 0) {
+				possible_head = cpu_to_le64(curr);
+			}
+			break;
+		}
+
+		curr_page = (struct nova_inode_log_page *)
+					nova_get_block(sb, curr);
+		next = next_log_page(sb, curr);
+		if (next < 0)
+			break;
+
+		nova_dbg_verbose("curr 0x%llx, next 0x%llx\n", curr, next);
+		if (curr_page_invalid(sb, pi, sih, curr)) {
+			nova_dbg_verbose("curr page %p invalid\n", curr_page);
+			if (curr == sih->log_head) {
+				/* Free first page later */
+				first_need_free = 1;
+				last_page = curr_page;
+			} else {
+				nova_dbg_verbose("Free log block 0x%llx\n",
+						curr >> PAGE_SHIFT);
+				free_curr_page(sb, sih, curr_page, last_page,
+						curr);
+			}
+			NOVA_STATS_ADD(fast_gc_pages, 1);
+			freed_pages++;
+		} else {
+			if (found_head == 0) {
+				possible_head = cpu_to_le64(curr);
+				found_head = 1;
+			}
+			last_page = curr_page;
+		}
+
+		curr = next;
+		checked_pages++;
+		if (curr == 0)
+			break;
+	}
+
+	NOVA_STATS_ADD(fast_checked_pages, checked_pages);
+	nova_dbgv("checked pages %lu, freed %d\n", checked_pages, freed_pages);
+	checked_pages -= freed_pages;
+
+	// TODO:  I think this belongs in nova_extend_inode_log.
+	if (num_pages > 0) {
+		curr = BLOCK_OFF(curr_tail);
+		curr_page = (struct nova_inode_log_page *)
+						  nova_get_block(sb, curr);
+
+		nova_set_next_page_address(sb, curr_page, new_block, 1);
+	}
+
+	curr = sih->log_head;
+
+	pi->log_head = possible_head;
+	nova_persist_inode(pi);
+	sih->log_head = possible_head;
+	nova_dbgv("%s: %d new head 0x%llx\n", __func__,
+					found_head, possible_head);
+	sih->log_pages += (num_pages - freed_pages) * num_logs;
+	/* Don't update log tail pointer here */
+	nova_flush_buffer(&pi->log_head, CACHELINE_SIZE, 1);
+
+	if (first_need_free) {
+		nova_dbg_verbose("Free log head block 0x%llx\n",
+					curr >> PAGE_SHIFT);
+		nova_free_log_blocks(sb, sih,
+				nova_get_blocknr(sb, curr, btype), 1);
+	}
+
+	NOVA_END_TIMING(fast_gc_t, gc_time);
+
+	if (sih->num_entries == 0)
+		return 0;
+
+	blocks = (sih->valid_entries * checked_pages) / sih->num_entries;
+	if ((sih->valid_entries * checked_pages) % sih->num_entries)
+		blocks++;
+
+	return 0;
+}
diff --git a/fs/nova/log.c b/fs/nova/log.c
index 451be27..66bf98e 100644
--- a/fs/nova/log.c
+++ b/fs/nova/log.c
@@ -964,6 +964,9 @@ static u64 nova_extend_inode_log(struct super_block *sb, struct nova_inode *pi,
 	}
 
 	/* Perform GC */
+	nova_inode_log_fast_gc(sb, pi, sih, curr_p,
+			       new_block, allocated, 0);
+
 	return new_block;
 }
 
diff --git a/fs/nova/nova.h b/fs/nova/nova.h
index ab9153e..32b7b2f 100644
--- a/fs/nova/nova.h
+++ b/fs/nova/nova.h
@@ -515,6 +515,13 @@ int nova_remove_dentry(struct dentry *dentry, int dec_link,
 extern const struct file_operations nova_dax_file_operations;
 extern const struct inode_operations nova_file_inode_operations;
 
+
+/* gc.c */
+int nova_inode_log_fast_gc(struct super_block *sb,
+	struct nova_inode *pi, struct nova_inode_info_header *sih,
+	u64 curr_tail, u64 new_block, int num_pages,
+	int force_thorough);
+
 /* ioctl.c */
 extern long nova_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 #ifdef CONFIG_COMPAT
-- 
2.7.4

  parent reply index

Thread overview: 119+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-10 18:17 [RFC v2 00/83] NOVA: a new file system for persistent memory Andiry Xu
2018-03-10 18:17 ` [RFC v2 01/83] Introduction and documentation of NOVA filesystem Andiry Xu
2018-03-19 20:43   ` Randy Dunlap
2018-03-19 23:00     ` Andiry Xu
2018-04-22  8:05   ` Pavel Machek
2018-03-10 18:17 ` [RFC v2 02/83] Add nova_def.h Andiry Xu
2018-03-10 18:17 ` [RFC v2 03/83] Add super.h Andiry Xu
2018-03-15  4:54   ` Darrick J. Wong
2018-03-15  6:11     ` Andiry Xu
2018-03-15  9:05       ` Arnd Bergmann
2018-03-15 17:51         ` Andiry Xu
2018-03-15 20:04           ` Andreas Dilger
2018-03-15 20:38           ` Arnd Bergmann
2018-03-16  2:59             ` Theodore Y. Ts'o
2018-03-16  6:17               ` Andiry Xu
2018-03-16  6:30                 ` Darrick J. Wong
2018-03-16  9:19               ` Arnd Bergmann
2018-03-10 18:17 ` [RFC v2 04/83] NOVA inode definition Andiry Xu
2018-03-15  5:06   ` Darrick J. Wong
2018-03-15  6:16     ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 05/83] Add NOVA filesystem definitions and useful helper routines Andiry Xu
2018-03-11 12:00   ` Nikolay Borisov
2018-03-11 19:22     ` Eric Biggers
2018-03-11 21:45       ` Andiry Xu
2018-03-19 19:39       ` Andiry Xu
2018-03-19 20:30         ` Eric Biggers
2018-03-19 21:59           ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 06/83] Add inode get/read methods Andiry Xu
2018-04-23  6:12   ` Darrick J. Wong
2018-04-23 15:55     ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 07/83] Initialize inode_info and rebuild inode information in nova_iget() Andiry Xu
2018-03-10 18:17 ` [RFC v2 08/83] NOVA superblock operations Andiry Xu
2018-03-10 18:17 ` [RFC v2 09/83] Add Kconfig and Makefile Andiry Xu
2018-03-11 12:15   ` Nikolay Borisov
2018-03-11 21:32     ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 10/83] Add superblock integrity check Andiry Xu
2018-03-10 18:17 ` [RFC v2 11/83] Add timing and I/O statistics for performance analysis and profiling Andiry Xu
2018-03-10 18:17 ` [RFC v2 12/83] Add timing for mount and init Andiry Xu
2018-03-10 18:17 ` [RFC v2 13/83] Add remount_fs and show_options methods Andiry Xu
2018-03-10 18:17 ` [RFC v2 14/83] Add range node kmem cache Andiry Xu
2018-03-11 11:55   ` Nikolay Borisov
2018-03-11 21:31     ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 15/83] Add free list data structure Andiry Xu
2018-03-10 18:17 ` [RFC v2 16/83] Initialize block map and free lists in nova_init() Andiry Xu
2018-03-11 12:12   ` Nikolay Borisov
2018-03-11 21:30     ` Andiry Xu
2018-03-10 18:17 ` [RFC v2 17/83] Add statfs support Andiry Xu
2018-03-10 18:17 ` [RFC v2 18/83] Add freelist statistics printing Andiry Xu
2018-03-10 18:18 ` [RFC v2 19/83] Add pmem block free routines Andiry Xu
2018-03-10 18:18 ` [RFC v2 20/83] Pmem block allocation routines Andiry Xu
2018-03-10 18:18 ` [RFC v2 21/83] Add log structure Andiry Xu
2018-03-10 18:18 ` [RFC v2 22/83] Inode log pages allocation and reclaimation Andiry Xu
2018-03-10 18:18 ` [RFC v2 23/83] Save allocator to pmem in put_super Andiry Xu
2018-03-10 18:18 ` [RFC v2 24/83] Initialize and allocate inode table Andiry Xu
2018-03-10 18:18 ` [RFC v2 25/83] Support get normal inode address and inode table extentsion Andiry Xu
2018-03-10 18:18 ` [RFC v2 26/83] Add inode_map to track inuse inodes Andiry Xu
2018-03-10 18:18 ` [RFC v2 27/83] Save the inode inuse list to pmem upon umount Andiry Xu
2018-03-10 18:18 ` [RFC v2 28/83] Add NOVA address space operations Andiry Xu
2018-03-10 18:18 ` [RFC v2 29/83] Add write_inode and dirty_inode routines Andiry Xu
2018-03-10 18:18 ` [RFC v2 30/83] New NOVA inode allocation Andiry Xu
2018-03-10 18:18 ` [RFC v2 31/83] Add new vfs " Andiry Xu
2018-03-10 18:18 ` [RFC v2 32/83] Add log entry definitions Andiry Xu
2018-03-10 18:18 ` [RFC v2 33/83] Inode log and entry printing for debug purpose Andiry Xu
2018-03-10 18:18 ` [RFC v2 34/83] Journal: NOVA light weight journal definitions Andiry Xu
2018-03-10 18:18 ` [RFC v2 35/83] Journal: Lite journal helper routines Andiry Xu
2018-03-10 18:18 ` [RFC v2 36/83] Journal: Lite journal recovery Andiry Xu
2018-03-10 18:18 ` [RFC v2 37/83] Journal: Lite journal create and commit Andiry Xu
2018-03-10 18:18 ` [RFC v2 38/83] Journal: NOVA lite journal initialization Andiry Xu
2018-03-10 18:18 ` [RFC v2 39/83] Log operation: dentry append Andiry Xu
2018-03-10 18:18 ` [RFC v2 40/83] Log operation: file write entry append Andiry Xu
2018-03-10 18:18 ` [RFC v2 41/83] Log operation: setattr " Andiry Xu
2018-03-10 18:18 ` [RFC v2 42/83] Log operation: link change append Andiry Xu
2018-03-10 18:18 ` [RFC v2 43/83] Log operation: in-place update log entry Andiry Xu
2018-03-10 18:18 ` [RFC v2 44/83] Log operation: invalidate log entries Andiry Xu
2018-03-10 18:18 ` [RFC v2 45/83] Log operation: file inode log lookup and assign Andiry Xu
2018-03-10 18:18 ` [RFC v2 46/83] Dir: Add Directory radix tree insert/remove methods Andiry Xu
2018-03-10 18:18 ` [RFC v2 47/83] Dir: Add initial dentries when initializing a directory inode log Andiry Xu
2018-03-10 18:18 ` [RFC v2 48/83] Dir: Readdir operation Andiry Xu
2018-03-10 18:18 ` [RFC v2 49/83] Dir: Append create/remove dentry Andiry Xu
2018-03-10 18:18 ` [RFC v2 50/83] Inode: Add nova_evict_inode Andiry Xu
2018-03-10 18:18 ` [RFC v2 51/83] Rebuild: directory inode Andiry Xu
2018-03-10 18:18 ` [RFC v2 52/83] Rebuild: file inode Andiry Xu
2018-03-10 18:18 ` [RFC v2 53/83] Namei: lookup Andiry Xu
2018-03-10 18:18 ` [RFC v2 54/83] Namei: create and mknod Andiry Xu
2018-03-10 18:18 ` [RFC v2 55/83] Namei: mkdir Andiry Xu
2018-03-10 18:18 ` [RFC v2 56/83] Namei: link and unlink Andiry Xu
2018-03-10 18:18 ` [RFC v2 57/83] Namei: rmdir Andiry Xu
2018-03-10 18:18 ` [RFC v2 58/83] Namei: rename Andiry Xu
2018-03-10 18:18 ` [RFC v2 59/83] Namei: setattr Andiry Xu
2018-03-10 18:18 ` [RFC v2 60/83] Add special inode operations Andiry Xu
2018-03-10 18:18 ` [RFC v2 61/83] Super: Add nova_export_ops Andiry Xu
2018-03-10 18:18 ` [RFC v2 62/83] File: getattr and file inode operations Andiry Xu
2018-03-10 18:18 ` [RFC v2 63/83] File operation: llseek Andiry Xu
2018-03-10 18:18 ` [RFC v2 64/83] File operation: open, fsync, flush Andiry Xu
2018-03-10 18:18 ` [RFC v2 65/83] File operation: read Andiry Xu
2018-03-10 18:18 ` [RFC v2 66/83] Super: Add file write item cache Andiry Xu
2018-03-10 18:18 ` [RFC v2 67/83] Dax: commit list of file write items to log Andiry Xu
2018-03-10 18:18 ` [RFC v2 68/83] File operation: copy-on-write write Andiry Xu
2018-03-10 18:18 ` [RFC v2 69/83] Super: Add module param inplace_data_updates Andiry Xu
2018-03-10 18:18 ` [RFC v2 70/83] File operation: Inplace write Andiry Xu
2018-03-10 18:18 ` [RFC v2 71/83] Symlink support Andiry Xu
2018-03-10 18:18 ` [RFC v2 72/83] File operation: fallocate Andiry Xu
2018-03-10 18:18 ` [RFC v2 73/83] Dax: Add iomap operations Andiry Xu
2018-03-10 18:18 ` [RFC v2 74/83] File operation: Mmap Andiry Xu
2018-03-10 18:18 ` [RFC v2 75/83] File operation: read/write iter Andiry Xu
2018-03-10 18:18 ` [RFC v2 76/83] Ioctl support Andiry Xu
2018-03-10 18:18 ` Andiry Xu [this message]
2018-03-10 18:18 ` [RFC v2 78/83] GC: Thorough garbage collection Andiry Xu
2018-03-10 18:19 ` [RFC v2 79/83] Normal recovery Andiry Xu
2018-03-10 18:19 ` [RFC v2 80/83] Failure recovery: bitmap operations Andiry Xu
2018-03-10 18:19 ` [RFC v2 81/83] Failure recovery: Inode pages recovery routines Andiry Xu
2018-03-10 18:19 ` [RFC v2 82/83] Failure recovery: Per-CPU recovery Andiry Xu
2018-03-10 18:19 ` [RFC v2 83/83] Sysfs support Andiry Xu
2018-03-15  0:33   ` Randy Dunlap
2018-03-15  6:07     ` Andiry Xu
2018-03-22 15:00   ` David Sterba
2018-03-23  0:31     ` Andiry Xu
2018-03-11  2:14 ` [RFC v2 00/83] NOVA: a new file system for persistent memory Theodore Y. Ts'o
2018-03-11  4:58   ` Andiry Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1520705944-6723-78-git-send-email-jix024@eng.ucsd.edu \
    --to=jix024@eng.ucsd.edu \
    --cc=andiry.xu@gmail.com \
    --cc=andy.rudoff@intel.com \
    --cc=coughlan@redhat.com \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=jack@suse.com \
    --cc=jix024@cs.ucsd.edu \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=miklos@szeredi.hu \
    --cc=swanson@cs.ucsd.edu \
    --cc=swhiteho@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git