All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hyeongseok Kim <hyeongseok@gmail.com>
To: namjae.jeon@samsung.com, sj1557.seo@samsung.com
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Hyeongseok Kim <hyeongseok@gmail.com>
Subject: [PATCH] exfat: change i_pos based exfat hash to rb tree
Date: Mon, 29 Mar 2021 09:28:18 +0900	[thread overview]
Message-ID: <20210329002818.73178-1-hyeongseok@gmail.com> (raw)

Hashtable for identifying inode by i_pos (cluster+dentry) have slow
search performance when there are lots of cached inode. The slowness
is from the limited count of slots showing that, in average,
(inode count / slots) times hlist traverse, and it becomes worse when
cached inode count increases by such as directory iteration.
To improve this, change from hash table to rb tree to minimize
searching/iterate time which enables O(logN) time complexity.

Test : "time ls -l"
 +--------+------------+------------+-----------+
 |        | file count | fresh read |   cached  |
 +--------+------------+------------+-----------+
 | Before |     50,000 |   0m06.59s |  0m01.58s |
 +--------+------------+------------+-----------+
 | After  |     50,000 |   0m05.20s |  0m00.98s |
 +--------+------------+------------+-----------+
 | Before |    300,000 |   1m28.97s |  0m31.69s |
 +--------+------------+------------+-----------+
 | After  |    300,000 |   0m33.11s |  0m06.21s |
 +--------+------------+------------+-----------+

Signed-off-by: Hyeongseok Kim <hyeongseok@gmail.com>
---
 fs/exfat/exfat_fs.h | 12 +++---
 fs/exfat/inode.c    | 91 +++++++++++++++++++++++++++++++--------------
 fs/exfat/namei.c    | 10 ++---
 fs/exfat/super.c    | 16 ++++----
 4 files changed, 82 insertions(+), 47 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 1d6da61157c9..f8ad8cbf8499 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -243,8 +243,8 @@ struct exfat_sb_info {
 	struct nls_table *nls_io; /* Charset used for input and display */
 	struct ratelimit_state ratelimit;
 
-	spinlock_t inode_hash_lock;
-	struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
+	spinlock_t inode_tree_lock; /* lock for inode_tree structure */
+	struct rb_root inode_tree;
 
 	struct rcu_head rcu;
 };
@@ -289,8 +289,8 @@ struct exfat_inode_info {
 	loff_t i_size_aligned;
 	/* on-disk position of directory entry or 0 */
 	loff_t i_pos;
-	/* hash by i_location */
-	struct hlist_node i_hash_fat;
+	/* tree by i_location */
+	struct rb_node rbnode;
 	/* protect bmap against truncate */
 	struct rw_semaphore truncate_lock;
 	struct inode vfs_inode;
@@ -476,8 +476,8 @@ extern const struct inode_operations exfat_file_inode_operations;
 void exfat_sync_inode(struct inode *inode);
 struct inode *exfat_build_inode(struct super_block *sb,
 		struct exfat_dir_entry *info, loff_t i_pos);
-void exfat_hash_inode(struct inode *inode, loff_t i_pos);
-void exfat_unhash_inode(struct inode *inode);
+void exfat_inode_tree_insert(struct inode *inode, loff_t i_pos);
+void exfat_inode_tree_erase(struct inode *inode);
 struct inode *exfat_iget(struct super_block *sb, loff_t i_pos);
 int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
 void exfat_evict_inode(struct inode *inode);
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 1803ef3220fd..740a34f528ae 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -501,50 +501,87 @@ static const struct address_space_operations exfat_aops = {
 	.bmap		= exfat_aop_bmap
 };
 
-static inline unsigned long exfat_hash(loff_t i_pos)
+static struct exfat_inode_info *exfat_inode_tree_find(struct super_block *sb,
+						      loff_t i_pos)
 {
-	return hash_32(i_pos, EXFAT_HASH_BITS);
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	struct rb_node *node = sbi->inode_tree.rb_node;
+	struct exfat_inode_info *info;
+
+	spin_lock(&sbi->inode_tree_lock);
+	while (node) {
+		info = rb_entry(node, struct exfat_inode_info, rbnode);
+		WARN_ON(info->vfs_inode.i_sb != sb);
+
+		if (i_pos == info->i_pos) {
+			spin_unlock(&sbi->inode_tree_lock);
+			return info;
+		}
+
+		if (i_pos < info->i_pos)
+			node = node->rb_left;
+		else /* i_pos > info->i_pos */
+			node = node->rb_right;
+	}
+	spin_unlock(&sbi->inode_tree_lock);
+	return NULL;
 }
 
-void exfat_hash_inode(struct inode *inode, loff_t i_pos)
+void exfat_inode_tree_insert(struct inode *inode, loff_t i_pos)
 {
 	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
-	struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos);
+	struct exfat_inode_info *info, *ei = EXFAT_I(inode);
+	struct rb_root *root = &sbi->inode_tree;
+	struct rb_node **rb_ptr = &root->rb_node;
+	struct rb_node *parent = NULL;
+
+	spin_lock(&sbi->inode_tree_lock);
+	ei->i_pos = i_pos;
+	while (*rb_ptr) {
+		parent = *rb_ptr;
+		info = rb_entry(*rb_ptr, struct exfat_inode_info, rbnode);
+		if (i_pos == info->i_pos) {
+			/* already exists */
+			rb_replace_node(*rb_ptr, &ei->rbnode, &sbi->inode_tree);
+			RB_CLEAR_NODE(*rb_ptr);
+			spin_unlock(&sbi->inode_tree_lock);
+			return;
+		}
 
-	spin_lock(&sbi->inode_hash_lock);
-	EXFAT_I(inode)->i_pos = i_pos;
-	hlist_add_head(&EXFAT_I(inode)->i_hash_fat, head);
-	spin_unlock(&sbi->inode_hash_lock);
+		if (i_pos < info->i_pos)
+			rb_ptr = &(*rb_ptr)->rb_left;
+		else /* (i_pos > info->i_pos) */
+			rb_ptr = &(*rb_ptr)->rb_right;
+	}
+
+	rb_link_node(&ei->rbnode, parent, rb_ptr);
+	rb_insert_color(&ei->rbnode, root);
+	spin_unlock(&sbi->inode_tree_lock);
 }
 
-void exfat_unhash_inode(struct inode *inode)
+void exfat_inode_tree_erase(struct inode *inode)
 {
 	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
+	struct exfat_inode_info *ei = EXFAT_I(inode);
+	struct rb_root *root = &sbi->inode_tree;
 
-	spin_lock(&sbi->inode_hash_lock);
-	hlist_del_init(&EXFAT_I(inode)->i_hash_fat);
-	EXFAT_I(inode)->i_pos = 0;
-	spin_unlock(&sbi->inode_hash_lock);
+	spin_lock(&sbi->inode_tree_lock);
+	if (!RB_EMPTY_NODE(&ei->rbnode)) {
+		rb_erase(&ei->rbnode, root);
+		RB_CLEAR_NODE(&ei->rbnode);
+	}
+	spin_unlock(&sbi->inode_tree_lock);
 }
 
 struct inode *exfat_iget(struct super_block *sb, loff_t i_pos)
 {
-	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 	struct exfat_inode_info *info;
-	struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos);
 	struct inode *inode = NULL;
 
-	spin_lock(&sbi->inode_hash_lock);
-	hlist_for_each_entry(info, head, i_hash_fat) {
-		WARN_ON(info->vfs_inode.i_sb != sb);
-
-		if (i_pos != info->i_pos)
-			continue;
+	info = exfat_inode_tree_find(sb, i_pos);
+	if (info)
 		inode = igrab(&info->vfs_inode);
-		if (inode)
-			break;
-	}
-	spin_unlock(&sbi->inode_hash_lock);
+
 	return inode;
 }
 
@@ -634,7 +671,7 @@ struct inode *exfat_build_inode(struct super_block *sb,
 		inode = ERR_PTR(err);
 		goto out;
 	}
-	exfat_hash_inode(inode, i_pos);
+	exfat_inode_tree_insert(inode, i_pos);
 	insert_inode_hash(inode);
 out:
 	return inode;
@@ -654,5 +691,5 @@ void exfat_evict_inode(struct inode *inode)
 	invalidate_inode_buffers(inode);
 	clear_inode(inode);
 	exfat_cache_inval_inode(inode);
-	exfat_unhash_inode(inode);
+	exfat_inode_tree_erase(inode);
 }
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 24b41103d1cc..10ed9b35fd86 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -827,7 +827,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
 	clear_nlink(inode);
 	inode->i_mtime = inode->i_atime = current_time(inode);
 	exfat_truncate_atime(&inode->i_atime);
-	exfat_unhash_inode(inode);
+	exfat_inode_tree_erase(inode);
 	exfat_d_version_set(dentry, inode_query_iversion(dir));
 unlock:
 	mutex_unlock(&EXFAT_SB(sb)->s_lock);
@@ -993,7 +993,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
 	clear_nlink(inode);
 	inode->i_mtime = inode->i_atime = current_time(inode);
 	exfat_truncate_atime(&inode->i_atime);
-	exfat_unhash_inode(inode);
+	exfat_inode_tree_erase(inode);
 	exfat_d_version_set(dentry, inode_query_iversion(dir));
 unlock:
 	mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock);
@@ -1363,8 +1363,8 @@ static int exfat_rename(struct user_namespace *mnt_userns,
 
 	i_pos = ((loff_t)EXFAT_I(old_inode)->dir.dir << 32) |
 		(EXFAT_I(old_inode)->entry & 0xffffffff);
-	exfat_unhash_inode(old_inode);
-	exfat_hash_inode(old_inode, i_pos);
+	exfat_inode_tree_erase(old_inode);
+	exfat_inode_tree_insert(old_inode, i_pos);
 	if (IS_DIRSYNC(new_dir))
 		exfat_sync_inode(old_inode);
 	else
@@ -1384,7 +1384,7 @@ static int exfat_rename(struct user_namespace *mnt_userns,
 		mark_inode_dirty(old_dir);
 
 	if (new_inode) {
-		exfat_unhash_inode(new_inode);
+		exfat_inode_tree_erase(new_inode);
 
 		/* skip drop_nlink if new_inode already has been dropped */
 		if (new_inode->i_nlink) {
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index d38d17a77e76..8f197432c57b 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -317,14 +317,12 @@ static int exfat_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	return 0;
 }
 
-static void exfat_hash_init(struct super_block *sb)
+static void exfat_inode_tree_init(struct super_block *sb)
 {
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
-	int i;
 
-	spin_lock_init(&sbi->inode_hash_lock);
-	for (i = 0; i < EXFAT_HASH_SIZE; i++)
-		INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
+	spin_lock_init(&sbi->inode_tree_lock);
+	sbi->inode_tree = RB_ROOT;
 }
 
 static int exfat_read_root(struct inode *inode)
@@ -648,8 +646,8 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
 		goto check_nls_io;
 	}
 
-	/* set up enough so that it can read an inode */
-	exfat_hash_init(sb);
+	/* set up exfat inode tree */
+	exfat_inode_tree_init(sb);
 
 	if (!strcmp(sbi->options.iocharset, "utf8"))
 		opts->utf8 = 1;
@@ -683,7 +681,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
 		goto put_inode;
 	}
 
-	exfat_hash_inode(root_inode, EXFAT_I(root_inode)->i_pos);
+	exfat_inode_tree_insert(root_inode, EXFAT_I(root_inode)->i_pos);
 	insert_inode_hash(root_inode);
 
 	sb->s_root = d_make_root(root_inode);
@@ -786,7 +784,7 @@ static void exfat_inode_init_once(void *foo)
 	ei->nr_caches = 0;
 	ei->cache_valid_id = EXFAT_CACHE_VALID + 1;
 	INIT_LIST_HEAD(&ei->cache_lru);
-	INIT_HLIST_NODE(&ei->i_hash_fat);
+	RB_CLEAR_NODE(&ei->rbnode);
 	inode_init_once(&ei->vfs_inode);
 }
 
-- 
2.27.0.83.g0313f36


                 reply	other threads:[~2021-03-29  0:32 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210329002818.73178-1-hyeongseok@gmail.com \
    --to=hyeongseok@gmail.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=namjae.jeon@samsung.com \
    --cc=sj1557.seo@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.