LKML Archive on lore.kernel.org
 help / Atom feed
From: Davidlohr Bueso <dave@stgolabs.net>
To: akpm@linux-foundation.org
Cc: mingo@kernel.org, peterz@infradead.org, jack@suse.cz,
	torvalds@linux-foundation.org, kirill.shutemov@linux.intel.com,
	hch@infradead.org, ldufour@linux.vnet.ibm.com, mhocko@suse.com,
	mgorman@techsingularity.net, dave@stgolabs.net,
	linux-kernel@vger.kernel.org, tytso@mit.edu,
	Davidlohr Bueso <dbueso@suse.de>
Subject: [PATCH 15/17] fs/ext4: use cached rbtrees
Date: Tue, 18 Jul 2017 18:46:01 -0700
Message-ID: <20170719014603.19029-16-dave@stgolabs.net> (raw)
In-Reply-To: <20170719014603.19029-1-dave@stgolabs.net>

This patch replaces two uses of rbtrees to the cached version for
better rb_first() performance during:

* Tree traversals to mark blocks as used during ext4_mb_init_cache()
looping. (struct ext4_group_info)

* Storing ->curr_node for dx_readdir(). (struct dir_private_info)

Caching the leftmost node comes at negligible runtime cost, thus the
only downside is that the aforementioned structures are enlarged for
the extra pointer. No changes in semantics whatsoever.

Cc: tytso@mit.edu
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
This is part of the rbtree internal caching series:
https://lwn.net/Articles/726809/

 fs/ext4/dir.c     | 24 ++++++++++++++----------
 fs/ext4/ext4.h    |  4 ++--
 fs/ext4/mballoc.c | 22 ++++++++++++----------
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index e8b365000d73..011df6bba13c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -391,18 +391,18 @@ struct fname {
  * This functoin implements a non-recursive way of freeing all of the
  * nodes in the red-black tree.
  */
-static void free_rb_tree_fname(struct rb_root *root)
+static void free_rb_tree_fname(struct rb_root_cached *root)
 {
 	struct fname *fname, *next;
 
-	rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash)
+	rbtree_postorder_for_each_entry_safe(fname, next, &root->rb_root, rb_hash)
 		while (fname) {
 			struct fname *old = fname;
 			fname = fname->next;
 			kfree(old);
 		}
 
-	*root = RB_ROOT;
+	*root = RB_ROOT_CACHED;
 }
 
 
@@ -441,9 +441,10 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
 	struct fname *fname, *new_fn;
 	struct dir_private_info *info;
 	int len;
+	bool leftmost = true;
 
 	info = dir_file->private_data;
-	p = &info->root.rb_node;
+	p = &info->root.rb_root.rb_node;
 
 	/* Create and allocate the fname structure */
 	len = sizeof(struct fname) + ent_name->len + 1;
@@ -475,16 +476,19 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
 
 		if (new_fn->hash < fname->hash)
 			p = &(*p)->rb_left;
-		else if (new_fn->hash > fname->hash)
+		else if (new_fn->hash > fname->hash) {
 			p = &(*p)->rb_right;
-		else if (new_fn->minor_hash < fname->minor_hash)
+			leftmost = false;
+		} else if (new_fn->minor_hash < fname->minor_hash)
 			p = &(*p)->rb_left;
-		else /* if (new_fn->minor_hash > fname->minor_hash) */
+		else { /* if (new_fn->minor_hash > fname->minor_hash) */
 			p = &(*p)->rb_right;
+			leftmost = false;
+		}
 	}
 
 	rb_link_node(&new_fn->rb_hash, parent, p);
-	rb_insert_color(&new_fn->rb_hash, &info->root);
+	rb_insert_color_cached(&new_fn->rb_hash, &info->root, leftmost);
 	return 0;
 }
 
@@ -558,7 +562,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
 		info->extra_fname = NULL;
 		goto next_node;
 	} else if (!info->curr_node)
-		info->curr_node = rb_first(&info->root);
+		info->curr_node = rb_first_cached(&info->root);
 
 	while (1) {
 		/*
@@ -580,7 +584,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
 				ctx->pos = ext4_get_htree_eof(file);
 				break;
 			}
-			info->curr_node = rb_first(&info->root);
+			info->curr_node = rb_first_cached(&info->root);
 		}
 
 		fname = rb_entry(info->curr_node, struct fname, rb_hash);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9ebde0cd632e..0242c6801600 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2116,7 +2116,7 @@ static inline bool ext4_is_quota_file(struct inode *inode)
  * readdir operations in hash tree order.
  */
 struct dir_private_info {
-	struct rb_root	root;
+	struct          rb_root_cached	root;
 	struct rb_node	*curr_node;
 	struct fname	*extra_fname;
 	loff_t		last_pos;
@@ -2883,7 +2883,7 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
 
 struct ext4_group_info {
 	unsigned long   bb_state;
-	struct rb_root  bb_free_root;
+	struct          rb_root_cached  bb_free_root;
 	ext4_grpblk_t	bb_first_free;	/* first free block */
 	ext4_grpblk_t	bb_free;	/* total free blocks */
 	ext4_grpblk_t	bb_fragments;	/* nr of freespace fragments */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 581e357e8406..df36f9123a99 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2462,7 +2462,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 
 	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
 	init_rwsem(&meta_group_info[i]->alloc_sem);
-	meta_group_info[i]->bb_free_root = RB_ROOT;
+	meta_group_info[i]->bb_free_root = RB_ROOT_CACHED;
 	meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
 
 #ifdef DOUBLE_CHECK
@@ -2824,7 +2824,7 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
 	count2++;
 	ext4_lock_group(sb, entry->efd_group);
 	/* Take it out of per group rb tree */
-	rb_erase(&entry->efd_node, &(db->bb_free_root));
+	rb_erase_cached(&entry->efd_node, &(db->bb_free_root));
 	mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
 
 	/*
@@ -2836,7 +2836,7 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
 	if (!test_opt(sb, DISCARD))
 		EXT4_MB_GRP_CLEAR_TRIMMED(db);
 
-	if (!db->bb_free_root.rb_node) {
+	if (!db->bb_free_root.rb_root.rb_node) {
 		/* No more items in the per group rb tree
 		 * balance refcounts from ext4_mb_free_metadata()
 		 */
@@ -3519,7 +3519,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 	struct ext4_free_data *entry;
 
 	grp = ext4_get_group_info(sb, group);
-	n = rb_first(&(grp->bb_free_root));
+	n = rb_first_cached(&(grp->bb_free_root));
 
 	while (n) {
 		entry = rb_entry(n, struct ext4_free_data, efd_node);
@@ -4624,7 +4624,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
 					struct ext4_free_data *entry,
 					struct ext4_free_data *new_entry,
-					struct rb_root *entry_rb_root)
+					struct rb_root_cached *entry_rb_root)
 {
 	if ((entry->efd_tid != new_entry->efd_tid) ||
 	    (entry->efd_group != new_entry->efd_group))
@@ -4641,7 +4641,7 @@ static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
 	spin_lock(&sbi->s_md_lock);
 	list_del(&entry->efd_list);
 	spin_unlock(&sbi->s_md_lock);
-	rb_erase(&entry->efd_node, entry_rb_root);
+	rb_erase_cached(&entry->efd_node, entry_rb_root);
 	kmem_cache_free(ext4_free_data_cachep, entry);
 }
 
@@ -4656,8 +4656,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 	struct ext4_group_info *db = e4b->bd_info;
 	struct super_block *sb = e4b->bd_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct rb_node **n = &db->bb_free_root.rb_node, *node;
+	struct rb_node **n = &db->bb_free_root.rb_root.rb_node, *node;
 	struct rb_node *parent = NULL, *new_node;
+	bool leftmost = true;
 
 	BUG_ON(!ext4_handle_valid(handle));
 	BUG_ON(e4b->bd_bitmap_page == NULL);
@@ -4680,9 +4681,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 		entry = rb_entry(parent, struct ext4_free_data, efd_node);
 		if (cluster < entry->efd_start_cluster)
 			n = &(*n)->rb_left;
-		else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
+		else if (cluster >= (entry->efd_start_cluster + entry->efd_count)) {
 			n = &(*n)->rb_right;
-		else {
+			leftmost = false;
+		} else {
 			ext4_grp_locked_error(sb, group, 0,
 				ext4_group_first_block_no(sb, group) +
 				EXT4_C2B(sbi, cluster),
@@ -4692,7 +4694,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 	}
 
 	rb_link_node(new_node, parent, n);
-	rb_insert_color(new_node, &db->bb_free_root);
+	rb_insert_color_cached(new_node, &db->bb_free_root, leftmost);
 
 	/* Now try to see the extent can be merged to left and right */
 	node = rb_prev(new_node);
-- 
2.12.0

  parent reply index

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-19  1:45 [PATCH -next v4 00/17] rbtree: cache leftmost node internally Davidlohr Bueso
2017-07-19  1:45 ` [PATCH 01/17] " Davidlohr Bueso
2017-07-19  1:45 ` [PATCH 02/17] rbtree: optimize root-check during rebalancing loop Davidlohr Bueso
2017-07-19  1:45 ` [PATCH 03/17] rbtree: add some additional comments for rebalancing cases Davidlohr Bueso
2017-07-19  1:45 ` [PATCH 04/17] lib/rbtree_test.c: make input module parameters Davidlohr Bueso
2017-07-19  1:45 ` [PATCH 05/17] lib/rbtree_test.c: add (inorder) traversal test Davidlohr Bueso
2017-07-19  1:45 ` [PATCH 06/17] lib/rbtree_test.c: support rb_root_cached Davidlohr Bueso
2017-07-19  1:45 ` [PATCH 07/17] sched/fair: replace cfs_rq->rb_leftmost Davidlohr Bueso
2017-07-19  1:45 ` [PATCH 08/17] sched/deadline: replace earliest dl and rq leftmost caching Davidlohr Bueso
2017-07-19  1:45 ` [PATCH 09/17] locking/rtmutex: replace top-waiter and pi_waiters " Davidlohr Bueso
2017-07-19  1:45 ` [PATCH 10/17] block/cfq: replace cfq_rb_root " Davidlohr Bueso
2017-07-19  7:46   ` Jan Kara
2017-07-19  1:45 ` [PATCH 11/17] lib/interval_tree: fast overlap detection Davidlohr Bueso
2017-07-22 17:52   ` Doug Ledford
2017-08-01 17:16   ` Michael S. Tsirkin
2017-07-19  1:45 ` [PATCH 12/17] lib/interval-tree: correct comment wrt generic flavor Davidlohr Bueso
2017-07-19  1:45 ` [PATCH 13/17] procfs: use faster rb_first_cached() Davidlohr Bueso
2017-07-19  1:46 ` [PATCH 14/17] fs/epoll: " Davidlohr Bueso
2017-07-19  1:46 ` Davidlohr Bueso [this message]
2017-07-19  7:40   ` [PATCH 15/17] fs/ext4: use cached rbtrees Jan Kara
2017-07-19 22:50     ` Davidlohr Bueso
2017-07-19  1:46 ` [PATCH 16/17] mem/memcg: cache rightmost node Davidlohr Bueso
2017-07-19  7:50   ` Michal Hocko
2017-07-26 21:09     ` Andrew Morton
2017-07-27  7:06       ` Michal Hocko
2017-07-19  1:46 ` [PATCH 17/17] block/cfq: cache rightmost rb_node Davidlohr Bueso
2017-07-19  7:59   ` Jan Kara

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170719014603.19029-16-dave@stgolabs.net \
    --to=dave@stgolabs.net \
    --cc=akpm@linux-foundation.org \
    --cc=dbueso@suse.de \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=ldufour@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@suse.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=torvalds@linux-foundation.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org linux-kernel@archiver.kernel.org
	public-inbox-index lkml


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox