linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Miklos Szeredi <miklos@szeredi.hu>
To: wangyan <wangyan122@huawei.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	linux-fsdevel <linux-fsdevel@vger.kernel.org>,
	"virtio-fs@redhat.com" <virtio-fs@redhat.com>
Subject: Re: [Virtio-fs] [QUESTION] A performance problem for buffer write compared with 9p
Date: Mon, 26 Aug 2019 14:39:09 +0200	[thread overview]
Message-ID: <CAJfpegsd7v=DWwhAnNRq+L28xQFaw9EPhLyStnAG8V_hc_TZvg@mail.gmail.com> (raw)
In-Reply-To: <fd7a2791-d95c-3bd9-e387-b8778a9eca83@huawei.com>

[-- Attachment #1: Type: text/plain, Size: 884 bytes --]

On Sat, Aug 24, 2019 at 10:44 AM wangyan <wangyan122@huawei.com> wrote:

> According to the result, for "-size=1G", it maybe exceed the dirty pages'
> upper limit, and it frequently triggered pdflush for write-back. And for
> "-size=700M", it maybe didn't exceed the dirty pages' upper limit, so no
> extra pdflush was triggered.
>
> But for 9p using "-size=1G", the latency 3.94 usec, and the bandwidth is
> 2305.5MB/s. It is better than virtiofs using "-size=1G". It seems that
> it is not affected by the dirty pages' upper limit.

I tried to reproduce these results, but failed to get decent
(>100MB/s) performance out of 9p.  I don't have fscache set up, does
that play a part in getting high performance cached writes?

What you describe makes sense, and I have a new patch (attached), but
didn't see drastic improvement in performance of virtio-fs in my
tests.

Thanks,
Miklos

[-- Attachment #2: virtio-fs-nostrict-v4.patch --]
[-- Type: text/x-patch, Size: 7456 bytes --]

---
 fs/fuse/file.c      |  111 +++++++++++++++++++++++++++++++++++++---------------
 fs/fuse/fuse_i.h    |    3 +
 fs/fuse/inode.c     |    3 +
 fs/fuse/virtio_fs.c |    4 +
 4 files changed, 90 insertions(+), 31 deletions(-)

--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -891,6 +891,10 @@ static int virtio_fs_fill_super(struct s
 	if (err < 0)
 		goto err_free_init_req;
 
+	/* No strict accounting needed for virtio-fs */
+	sb->s_bdi->capabilities = 0;
+	bdi_set_max_ratio(sb->s_bdi, 100);
+
 	fc = fs->vqs[VQ_REQUEST].fud->fc;
 
 	/* TODO take fuse_mutex around this loop? */
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -695,6 +695,9 @@ struct fuse_conn {
 	/** cache READLINK responses in page cache */
 	unsigned cache_symlinks:1;
 
+	/** use temp pages for writeback */
+	unsigned writeback_tmp:1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1244,6 +1244,9 @@ static int fuse_fill_super(struct super_
 	err = fuse_fill_super_common(sb, &d);
 	if (err < 0)
 		goto err_free_init_req;
+
+	get_fuse_conn_super(sb)->writeback_tmp = 1;
+
 	/*
 	 * atomic_dec_and_test() in fput() provides the necessary
 	 * memory barrier for file->private_data to be visible on all
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -363,11 +363,16 @@ static bool fuse_range_is_writeback(stru
 				   pgoff_t idx_to)
 {
 	struct fuse_inode *fi = get_fuse_inode(inode);
-	bool found;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	bool found = false;
 
-	spin_lock(&fi->lock);
-	found = fuse_find_writeback(fi, idx_from, idx_to);
-	spin_unlock(&fi->lock);
+	if (fc->writeback_tmp) {
+		spin_lock(&fi->lock);
+		found = fuse_find_writeback(fi, idx_from, idx_to);
+		spin_unlock(&fi->lock);
+	} else {
+		WARN_ON(!list_empty(&fi->writepages));
+	}
 
 	return found;
 }
@@ -1514,7 +1519,7 @@ static void fuse_writepage_free(struct f
 	int i;
 
 	for (i = 0; i < req->num_pages; i++)
-		__free_page(req->pages[i]);
+		put_page(req->pages[i]);
 
 	if (req->ff)
 		fuse_file_put(req->ff, false, false);
@@ -1527,11 +1532,19 @@ static void fuse_writepage_finish(struct
 	struct backing_dev_info *bdi = inode_to_bdi(inode);
 	int i;
 
-	list_del(&req->writepages_entry);
+	if (fc->writeback_tmp)
+		list_del(&req->writepages_entry);
+	else
+		WARN_ON(!list_empty(&req->writepages_entry));
+
 	for (i = 0; i < req->num_pages; i++) {
-		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
-		dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
-		wb_writeout_inc(&bdi->wb);
+		if (fc->writeback_tmp) {
+			dec_wb_stat(&bdi->wb, WB_WRITEBACK);
+			dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
+			wb_writeout_inc(&bdi->wb);
+		} else {
+			end_page_writeback(req->pages[i]);
+		}
 	}
 	wake_up(&fi->page_waitq);
 }
@@ -1616,6 +1629,10 @@ static void fuse_writepage_end(struct fu
 		struct fuse_conn *fc = get_fuse_conn(inode);
 		struct fuse_write_in *inarg = &req->misc.write.in;
 		struct fuse_req *next = req->misc.write.next;
+
+		if (WARN_ON(!fc->writeback_tmp))
+			break;
+
 		req->misc.write.next = next->misc.write.next;
 		next->misc.write.next = NULL;
 		next->ff = fuse_file_get(req->ff);
@@ -1709,9 +1726,16 @@ static int fuse_writepage_locked(struct
 
 	/* writeback always goes to bg_queue */
 	__set_bit(FR_BACKGROUND, &req->flags);
-	tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
-	if (!tmp_page)
-		goto err_free;
+
+
+	if (fc->writeback_tmp) {
+		tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+		if (!tmp_page)
+			goto err_free;
+	} else {
+		tmp_page = page;
+		get_page(tmp_page);
+	}
 
 	error = -EIO;
 	req->ff = fuse_write_file_get(fc, fi);
@@ -1720,7 +1744,8 @@ static int fuse_writepage_locked(struct
 
 	fuse_write_fill(req, req->ff, page_offset(page), 0);
 
-	copy_highpage(tmp_page, page);
+	if (fc->writeback_tmp)
+		copy_highpage(tmp_page, page);
 	req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
 	req->misc.write.next = NULL;
 	req->in.argpages = 1;
@@ -1731,21 +1756,27 @@ static int fuse_writepage_locked(struct
 	req->end = fuse_writepage_end;
 	req->inode = inode;
 
-	inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
-	inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+	if (fc->writeback_tmp) {
+		inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
+		inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+	}
 
 	spin_lock(&fi->lock);
-	list_add(&req->writepages_entry, &fi->writepages);
+	if (fc->writeback_tmp)
+		list_add(&req->writepages_entry, &fi->writepages);
+	else
+		INIT_LIST_HEAD(&req->writepages_entry);
 	list_add_tail(&req->list, &fi->queued_writes);
 	fuse_flush_writepages(inode);
 	spin_unlock(&fi->lock);
 
-	end_page_writeback(page);
+	if (fc->writeback_tmp)
+		end_page_writeback(page);
 
 	return 0;
 
 err_nofile:
-	__free_page(tmp_page);
+	put_page(tmp_page);
 err_free:
 	fuse_request_free(req);
 err:
@@ -1788,6 +1819,7 @@ static void fuse_writepages_send(struct
 	struct fuse_req *req = data->req;
 	struct inode *inode = data->inode;
 	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	int num_pages = req->num_pages;
 	int i;
 
@@ -1797,8 +1829,10 @@ static void fuse_writepages_send(struct
 	fuse_flush_writepages(inode);
 	spin_unlock(&fi->lock);
 
-	for (i = 0; i < num_pages; i++)
-		end_page_writeback(data->orig_pages[i]);
+	if (fc->writeback_tmp) {
+		for (i = 0; i < num_pages; i++)
+			end_page_writeback(data->orig_pages[i]);
+	}
 }
 
 /*
@@ -1816,6 +1850,9 @@ static bool fuse_writepage_in_flight(str
 	struct fuse_req *tmp;
 	struct fuse_req *old_req;
 
+	if (WARN_ON(!fc->writeback_tmp))
+		return false;
+
 	WARN_ON(new_req->num_pages != 0);
 
 	spin_lock(&fi->lock);
@@ -1901,10 +1938,15 @@ static int fuse_writepages_fill(struct p
 		}
 	}
 
-	err = -ENOMEM;
-	tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
-	if (!tmp_page)
-		goto out_unlock;
+	if (fc->writeback_tmp) {
+		err = -ENOMEM;
+		tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+		if (!tmp_page)
+			goto out_unlock;
+	} else {
+		tmp_page = page;
+		get_page(tmp_page);
+	}
 
 	/*
 	 * The page must not be redirtied until the writeout is completed
@@ -1925,7 +1967,7 @@ static int fuse_writepages_fill(struct p
 		err = -ENOMEM;
 		req = fuse_request_alloc_nofs(FUSE_REQ_INLINE_PAGES);
 		if (!req) {
-			__free_page(tmp_page);
+			put_page(tmp_page);
 			goto out_unlock;
 		}
 
@@ -1938,21 +1980,28 @@ static int fuse_writepages_fill(struct p
 		req->end = fuse_writepage_end;
 		req->inode = inode;
 
-		spin_lock(&fi->lock);
-		list_add(&req->writepages_entry, &fi->writepages);
-		spin_unlock(&fi->lock);
+		if (fc->writeback_tmp) {
+			spin_lock(&fi->lock);
+			list_add(&req->writepages_entry, &fi->writepages);
+			spin_unlock(&fi->lock);
+		} else {
+			INIT_LIST_HEAD(&req->writepages_entry);
+		}
 
 		data->req = req;
 	}
 	set_page_writeback(page);
 
-	copy_highpage(tmp_page, page);
+	if (fc->writeback_tmp)
+		copy_highpage(tmp_page, page);
 	req->pages[req->num_pages] = tmp_page;
 	req->page_descs[req->num_pages].offset = 0;
 	req->page_descs[req->num_pages].length = PAGE_SIZE;
 
-	inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
-	inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+	if (fc->writeback_tmp) {
+		inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
+		inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+	}
 
 	err = 0;
 	if (is_writeback && fuse_writepage_in_flight(req, page)) {

  parent reply	other threads:[~2019-08-26 12:39 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-15  0:30 [QUESTION] A performance problem for buffer write compared with 9p wangyan
2019-08-15  0:56 ` Gao Xiang
2019-08-20  9:16 ` [Virtio-fs] " Stefan Hajnoczi
2019-08-21  7:51   ` Miklos Szeredi
2019-08-21 16:05     ` Stefan Hajnoczi
2019-08-22  0:59       ` wangyan
2019-08-22 11:43         ` Miklos Szeredi
2019-08-22 12:48           ` wangyan
2019-08-22 13:07             ` Miklos Szeredi
2019-08-22 13:17               ` wangyan
2019-08-22 13:29                 ` Miklos Szeredi
2019-08-22 14:02                   ` Miklos Szeredi
     [not found]                     ` <fd7a2791-d95c-3bd9-e387-b8778a9eca83@huawei.com>
2019-08-26 12:39                       ` Miklos Szeredi [this message]
2019-08-28  0:57                         ` wangyan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAJfpegsd7v=DWwhAnNRq+L28xQFaw9EPhLyStnAG8V_hc_TZvg@mail.gmail.com' \
    --to=miklos@szeredi.hu \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=mszeredi@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=virtio-fs@redhat.com \
    --cc=wangyan122@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).