From: Miklos Szeredi <miklos@szeredi.hu>
To: wangyan <wangyan122@huawei.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>,
Stefan Hajnoczi <stefanha@redhat.com>,
linux-fsdevel <linux-fsdevel@vger.kernel.org>,
"virtio-fs@redhat.com" <virtio-fs@redhat.com>
Subject: Re: [Virtio-fs] [QUESTION] A performance problem for buffer write compared with 9p
Date: Mon, 26 Aug 2019 14:39:09 +0200 [thread overview]
Message-ID: <CAJfpegsd7v=DWwhAnNRq+L28xQFaw9EPhLyStnAG8V_hc_TZvg@mail.gmail.com> (raw)
In-Reply-To: <fd7a2791-d95c-3bd9-e387-b8778a9eca83@huawei.com>
[-- Attachment #1: Type: text/plain, Size: 884 bytes --]
On Sat, Aug 24, 2019 at 10:44 AM wangyan <wangyan122@huawei.com> wrote:
> According to the result, for "-size=1G", it maybe exceed the dirty pages'
> upper limit, and it frequently triggered pdflush for write-back. And for
> "-size=700M", it maybe didn't exceed the dirty pages' upper limit, so no
> extra pdflush was triggered.
>
> But for 9p using "-size=1G", the latency 3.94 usec, and the bandwidth is
> 2305.5MB/s. It is better than virtiofs using "-size=1G". It seems that
> it is not affected by the dirty pages' upper limit.
I tried to reproduce these results, but failed to get decent
(>100MB/s) performance out of 9p. I don't have fscache set up, does
that play a part in getting high performance cached writes?
What you describe makes sense, and I have a new patch (attached), but
didn't see drastic improvement in performance of virtio-fs in my
tests.
Thanks,
Miklos
[-- Attachment #2: virtio-fs-nostrict-v4.patch --]
[-- Type: text/x-patch, Size: 7456 bytes --]
---
fs/fuse/file.c | 111 +++++++++++++++++++++++++++++++++++++---------------
fs/fuse/fuse_i.h | 3 +
fs/fuse/inode.c | 3 +
fs/fuse/virtio_fs.c | 4 +
4 files changed, 90 insertions(+), 31 deletions(-)
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -891,6 +891,10 @@ static int virtio_fs_fill_super(struct s
if (err < 0)
goto err_free_init_req;
+ /* No strict accounting needed for virtio-fs */
+ sb->s_bdi->capabilities = 0;
+ bdi_set_max_ratio(sb->s_bdi, 100);
+
fc = fs->vqs[VQ_REQUEST].fud->fc;
/* TODO take fuse_mutex around this loop? */
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -695,6 +695,9 @@ struct fuse_conn {
/** cache READLINK responses in page cache */
unsigned cache_symlinks:1;
+ /** use temp pages for writeback */
+ unsigned writeback_tmp:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1244,6 +1244,9 @@ static int fuse_fill_super(struct super_
err = fuse_fill_super_common(sb, &d);
if (err < 0)
goto err_free_init_req;
+
+ get_fuse_conn_super(sb)->writeback_tmp = 1;
+
/*
* atomic_dec_and_test() in fput() provides the necessary
* memory barrier for file->private_data to be visible on all
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -363,11 +363,16 @@ static bool fuse_range_is_writeback(stru
pgoff_t idx_to)
{
struct fuse_inode *fi = get_fuse_inode(inode);
- bool found;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ bool found = false;
- spin_lock(&fi->lock);
- found = fuse_find_writeback(fi, idx_from, idx_to);
- spin_unlock(&fi->lock);
+ if (fc->writeback_tmp) {
+ spin_lock(&fi->lock);
+ found = fuse_find_writeback(fi, idx_from, idx_to);
+ spin_unlock(&fi->lock);
+ } else {
+ WARN_ON(!list_empty(&fi->writepages));
+ }
return found;
}
@@ -1514,7 +1519,7 @@ static void fuse_writepage_free(struct f
int i;
for (i = 0; i < req->num_pages; i++)
- __free_page(req->pages[i]);
+ put_page(req->pages[i]);
if (req->ff)
fuse_file_put(req->ff, false, false);
@@ -1527,11 +1532,19 @@ static void fuse_writepage_finish(struct
struct backing_dev_info *bdi = inode_to_bdi(inode);
int i;
- list_del(&req->writepages_entry);
+ if (fc->writeback_tmp)
+ list_del(&req->writepages_entry);
+ else
+ WARN_ON(!list_empty(&req->writepages_entry));
+
for (i = 0; i < req->num_pages; i++) {
- dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
- wb_writeout_inc(&bdi->wb);
+ if (fc->writeback_tmp) {
+ dec_wb_stat(&bdi->wb, WB_WRITEBACK);
+ dec_node_page_state(req->pages[i], NR_WRITEBACK_TEMP);
+ wb_writeout_inc(&bdi->wb);
+ } else {
+ end_page_writeback(req->pages[i]);
+ }
}
wake_up(&fi->page_waitq);
}
@@ -1616,6 +1629,10 @@ static void fuse_writepage_end(struct fu
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_write_in *inarg = &req->misc.write.in;
struct fuse_req *next = req->misc.write.next;
+
+ if (WARN_ON(!fc->writeback_tmp))
+ break;
+
req->misc.write.next = next->misc.write.next;
next->misc.write.next = NULL;
next->ff = fuse_file_get(req->ff);
@@ -1709,9 +1726,16 @@ static int fuse_writepage_locked(struct
/* writeback always goes to bg_queue */
__set_bit(FR_BACKGROUND, &req->flags);
- tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (!tmp_page)
- goto err_free;
+
+
+ if (fc->writeback_tmp) {
+ tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (!tmp_page)
+ goto err_free;
+ } else {
+ tmp_page = page;
+ get_page(tmp_page);
+ }
error = -EIO;
req->ff = fuse_write_file_get(fc, fi);
@@ -1720,7 +1744,8 @@ static int fuse_writepage_locked(struct
fuse_write_fill(req, req->ff, page_offset(page), 0);
- copy_highpage(tmp_page, page);
+ if (fc->writeback_tmp)
+ copy_highpage(tmp_page, page);
req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
req->misc.write.next = NULL;
req->in.argpages = 1;
@@ -1731,21 +1756,27 @@ static int fuse_writepage_locked(struct
req->end = fuse_writepage_end;
req->inode = inode;
- inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
- inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+ if (fc->writeback_tmp) {
+ inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
+ inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+ }
spin_lock(&fi->lock);
- list_add(&req->writepages_entry, &fi->writepages);
+ if (fc->writeback_tmp)
+ list_add(&req->writepages_entry, &fi->writepages);
+ else
+ INIT_LIST_HEAD(&req->writepages_entry);
list_add_tail(&req->list, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
- end_page_writeback(page);
+ if (fc->writeback_tmp)
+ end_page_writeback(page);
return 0;
err_nofile:
- __free_page(tmp_page);
+ put_page(tmp_page);
err_free:
fuse_request_free(req);
err:
@@ -1788,6 +1819,7 @@ static void fuse_writepages_send(struct
struct fuse_req *req = data->req;
struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
int num_pages = req->num_pages;
int i;
@@ -1797,8 +1829,10 @@ static void fuse_writepages_send(struct
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
- for (i = 0; i < num_pages; i++)
- end_page_writeback(data->orig_pages[i]);
+ if (fc->writeback_tmp) {
+ for (i = 0; i < num_pages; i++)
+ end_page_writeback(data->orig_pages[i]);
+ }
}
/*
@@ -1816,6 +1850,9 @@ static bool fuse_writepage_in_flight(str
struct fuse_req *tmp;
struct fuse_req *old_req;
+ if (WARN_ON(!fc->writeback_tmp))
+ return false;
+
WARN_ON(new_req->num_pages != 0);
spin_lock(&fi->lock);
@@ -1901,10 +1938,15 @@ static int fuse_writepages_fill(struct p
}
}
- err = -ENOMEM;
- tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (!tmp_page)
- goto out_unlock;
+ if (fc->writeback_tmp) {
+ err = -ENOMEM;
+ tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (!tmp_page)
+ goto out_unlock;
+ } else {
+ tmp_page = page;
+ get_page(tmp_page);
+ }
/*
* The page must not be redirtied until the writeout is completed
@@ -1925,7 +1967,7 @@ static int fuse_writepages_fill(struct p
err = -ENOMEM;
req = fuse_request_alloc_nofs(FUSE_REQ_INLINE_PAGES);
if (!req) {
- __free_page(tmp_page);
+ put_page(tmp_page);
goto out_unlock;
}
@@ -1938,21 +1980,28 @@ static int fuse_writepages_fill(struct p
req->end = fuse_writepage_end;
req->inode = inode;
- spin_lock(&fi->lock);
- list_add(&req->writepages_entry, &fi->writepages);
- spin_unlock(&fi->lock);
+ if (fc->writeback_tmp) {
+ spin_lock(&fi->lock);
+ list_add(&req->writepages_entry, &fi->writepages);
+ spin_unlock(&fi->lock);
+ } else {
+ INIT_LIST_HEAD(&req->writepages_entry);
+ }
data->req = req;
}
set_page_writeback(page);
- copy_highpage(tmp_page, page);
+ if (fc->writeback_tmp)
+ copy_highpage(tmp_page, page);
req->pages[req->num_pages] = tmp_page;
req->page_descs[req->num_pages].offset = 0;
req->page_descs[req->num_pages].length = PAGE_SIZE;
- inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
- inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+ if (fc->writeback_tmp) {
+ inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
+ inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+ }
err = 0;
if (is_writeback && fuse_writepage_in_flight(req, page)) {
next prev parent reply other threads:[~2019-08-26 12:39 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-08-15 0:30 [QUESTION] A performance problem for buffer write compared with 9p wangyan
2019-08-15 0:56 ` Gao Xiang
2019-08-20 9:16 ` [Virtio-fs] " Stefan Hajnoczi
2019-08-21 7:51 ` Miklos Szeredi
2019-08-21 16:05 ` Stefan Hajnoczi
2019-08-22 0:59 ` wangyan
2019-08-22 11:43 ` Miklos Szeredi
2019-08-22 12:48 ` wangyan
2019-08-22 13:07 ` Miklos Szeredi
2019-08-22 13:17 ` wangyan
2019-08-22 13:29 ` Miklos Szeredi
2019-08-22 14:02 ` Miklos Szeredi
[not found] ` <fd7a2791-d95c-3bd9-e387-b8778a9eca83@huawei.com>
2019-08-26 12:39 ` Miklos Szeredi [this message]
2019-08-28 0:57 ` wangyan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAJfpegsd7v=DWwhAnNRq+L28xQFaw9EPhLyStnAG8V_hc_TZvg@mail.gmail.com' \
--to=miklos@szeredi.hu \
--cc=linux-fsdevel@vger.kernel.org \
--cc=mszeredi@redhat.com \
--cc=stefanha@redhat.com \
--cc=virtio-fs@redhat.com \
--cc=wangyan122@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).