linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vasily Averin <vvs@virtuozzo.com>
To: Miklos Szeredi <miklos@szeredi.hu>
Cc: linux-fsdevel@vger.kernel.org
Subject: [PATCH RFC] fuse: optimize writepages search
Date: Thu, 19 Sep 2019 17:11:20 +0300	[thread overview]
Message-ID: <b762fcc4-1ddf-70c2-3189-544779186d3d@virtuozzo.com> (raw)

Dear Miklos,

This patch was originally developed for RHEL7-based 
Virtuozzo kernels and widely used few years
but seems it was not submitted upstream.
I've rebased it to v5.3, compiled but was not tested,
just to get feedback and then update it.

Author: Maxim Patlasov <mpatlasov@virtuozzo.com>

The patch re-works fi->writepages replacing list with rb-tree.
This improves performance because kernel fuse iterates through
fi->writepages for each writeback page and typical number of
entries is about 800 (for 100MB of fuse writeback).

Before patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s

 2  1      0 57445400  40416 6323676    0    0    33 374743 8633 19210  1  8 88  3  0

  29.86%  [kernel]               [k] _raw_spin_lock
  26.62%  [fuse]                 [k] fuse_page_is_writeback

After patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s

 2  9      0 53676040  31744 10265984    0    0    64 854790 10956 48387  1  6 88  6  0

  23.55%  [kernel]             [k] copy_user_enhanced_fast_string
   9.87%  [kernel]             [k] __memcpy
   3.10%  [kernel]             [k] _raw_spin_lock

Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
---
 fs/fuse/file.c   | 60 ++++++++++++++++++++++++++++++++++++++----------
 fs/fuse/fuse_i.h |  4 ++--
 2 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 91c99724dee0..ef7feb897ada 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -338,17 +338,23 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
 static struct fuse_req *fuse_find_writeback(struct fuse_inode *fi,
 					    pgoff_t idx_from, pgoff_t idx_to)
 {
-	struct fuse_req *req;
+	struct rb_node *n;
+
+	n = fi->writepages.rb_node;
 
-	list_for_each_entry(req, &fi->writepages, writepages_entry) {
+	while (n) {
+		struct fuse_req *req;
 		pgoff_t curr_index;
 
+		req = rb_entry(n, struct fuse_req, writepages_entry);
 		WARN_ON(get_fuse_inode(req->inode) != fi);
 		curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
-		if (idx_from < curr_index + req->num_pages &&
-		    curr_index <= idx_to) {
+		if (idx_from >= curr_index + req->num_pages)
+			n = n->rb_right;
+		else if (idx_to < curr_index)
+			n = n->rb_left;
+		else
 			return req;
-		}
 	}
 	return NULL;
 }
@@ -1527,7 +1533,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
 	struct backing_dev_info *bdi = inode_to_bdi(inode);
 	int i;
 
-	list_del(&req->writepages_entry);
+	rb_erase(&req->writepages_entry, &fi->writepages);
 	for (i = 0; i < req->num_pages; i++) {
 		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
 		dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
@@ -1605,6 +1611,36 @@ __acquires(fi->lock)
 	}
 }
 
+static int tree_insert(struct rb_root *root, struct fuse_req *ins_req)
+{
+	pgoff_t idx_from = ins_req->misc.write.in.offset >> PAGE_SHIFT;
+	pgoff_t idx_to   = idx_from + (ins_req->num_pages ?
+				ins_req->num_pages - 1 : 0);
+	struct rb_node **p = &root->rb_node;
+	struct rb_node  *parent = NULL;
+
+	while (*p) {
+		struct fuse_req *req;
+		pgoff_t curr_index;
+
+		parent = *p;
+		req = rb_entry(parent, struct fuse_req, writepages_entry);
+		BUG_ON(req->inode != ins_req->inode);
+		curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
+
+		if (idx_from >= curr_index + req->num_pages)
+			p = &(*p)->rb_right;
+		else if (idx_to < curr_index)
+			p = &(*p)->rb_left;
+		else
+			BUG();
+	}
+
+	rb_link_node(&ins_req->writepages_entry, parent, p);
+	rb_insert_color(&ins_req->writepages_entry, root);
+	return 0;
+}
+
 static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
 {
 	struct inode *inode = req->inode;
@@ -1619,7 +1655,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
 		req->misc.write.next = next->misc.write.next;
 		next->misc.write.next = NULL;
 		next->ff = fuse_file_get(req->ff);
-		list_add(&next->writepages_entry, &fi->writepages);
+		tree_insert(&fi->writepages, next);
 
 		/*
 		 * Skip fuse_flush_writepages() to make it easy to crop requests
@@ -1735,7 +1771,7 @@ static int fuse_writepage_locked(struct page *page)
 	inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
 
 	spin_lock(&fi->lock);
-	list_add(&req->writepages_entry, &fi->writepages);
+	tree_insert(&fi->writepages, req);
 	list_add_tail(&req->list, &fi->queued_writes);
 	fuse_flush_writepages(inode);
 	spin_unlock(&fi->lock);
@@ -1820,10 +1856,10 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
 	WARN_ON(new_req->num_pages != 0);
 
 	spin_lock(&fi->lock);
-	list_del(&new_req->writepages_entry);
+	rb_erase(&new_req->writepages_entry, &fi->writepages);
 	old_req = fuse_find_writeback(fi, page->index, page->index);
 	if (!old_req) {
-		list_add(&new_req->writepages_entry, &fi->writepages);
+		tree_insert(&fi->writepages, new_req);
 		spin_unlock(&fi->lock);
 		return false;
 	}
@@ -1940,7 +1976,7 @@ static int fuse_writepages_fill(struct page *page,
 		req->inode = inode;
 
 		spin_lock(&fi->lock);
-		list_add(&req->writepages_entry, &fi->writepages);
+		tree_insert(&fi->writepages, req);
 		spin_unlock(&fi->lock);
 
 		data->req = req;
@@ -3262,5 +3298,5 @@ void fuse_init_file_inode(struct inode *inode)
 	INIT_LIST_HEAD(&fi->queued_writes);
 	fi->writectr = 0;
 	init_waitqueue_head(&fi->page_waitq);
-	INIT_LIST_HEAD(&fi->writepages);
+	fi->writepages = RB_ROOT;
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 24dbca777775..bbb3ca395892 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -114,7 +114,7 @@ struct fuse_inode {
 			wait_queue_head_t page_waitq;
 
 			/* List of writepage requestst (pending or sent) */
-			struct list_head writepages;
+			struct rb_root writepages;
 		};
 
 		/* readdir cache (directory only) */
@@ -437,7 +437,7 @@ struct fuse_req {
 	struct fuse_io_priv *io;
 
 	/** Link on fi->writepages */
-	struct list_head writepages_entry;
+	struct rb_node writepages_entry;
 
 	/** Request completion callback */
 	void (*end)(struct fuse_conn *, struct fuse_req *);
-- 
2.17.1


             reply	other threads:[~2019-09-19 14:11 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-19 14:11 Vasily Averin [this message]
2020-06-11  8:53 ` [PATCH RFC] fuse: optimize writepages search Miklos Szeredi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b762fcc4-1ddf-70c2-3189-544779186d3d@virtuozzo.com \
    --to=vvs@virtuozzo.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).