If user supplied a large value with the 'msize' option, then client would silently limit that 'msize' value to the maximum value supported by transport. That's a bit confusing for users of not having any indication why the preferred 'msize' value could not be satisfied. Reported-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Christian Schoenebeck <linux_oss@crudebyte.com> --- net/9p/client.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/9p/client.c b/net/9p/client.c index 213f12ed76cd..fa2afeaf1394 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1044,8 +1044,14 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err) goto put_trans; - if (clnt->msize > clnt->trans_mod->maxsize) + if (clnt->msize > clnt->trans_mod->maxsize) { clnt->msize = clnt->trans_mod->maxsize; + p9_debug(P9_DEBUG_ERROR, + "Limiting 'msize' to %d as this is the maximum " + "supported by transport %s\n", + clnt->msize, clnt->trans_mod->name + ); + } if (clnt->msize < 4096) { p9_debug(P9_DEBUG_ERROR, -- 2.20.1
The scatter gather list in struct virtio_chan currently resides as compile-time constant size array directly within the contiguous struct virtio_chan's memory space. Separate memory space and allocation of the scatter gather list from memory space and allocation of struct virtio_chan. Signed-off-by: Christian Schoenebeck <linux_oss@crudebyte.com> --- net/9p/trans_virtio.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 490a4c900339..1dbe2e921bb8 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -77,7 +77,7 @@ struct virtio_chan { */ unsigned long p9_max_pages; /* Scatterlist: can be too big for stack. */ - struct scatterlist sg[VIRTQUEUE_NUM]; + struct scatterlist *sg; /** * @tag: name to identify a mount null terminated */ @@ -574,6 +574,14 @@ static int p9_virtio_probe(struct virtio_device *vdev) goto fail; } + chan->sg = kmalloc_array(VIRTQUEUE_NUM, + sizeof(struct scatterlist), GFP_KERNEL); + if (!chan->sg) { + pr_err("Failed to allocate virtio 9P channel\n"); + err = -ENOMEM; + goto out_free_chan_shallow; + } + chan->vdev = vdev; /* We expect one virtqueue, for requests. */ @@ -635,6 +643,8 @@ static int p9_virtio_probe(struct virtio_device *vdev) out_free_vq: vdev->config->del_vqs(vdev); out_free_chan: + kfree(chan->sg); +out_free_chan_shallow: kfree(chan); fail: return err; @@ -728,6 +738,7 @@ static void p9_virtio_remove(struct virtio_device *vdev) kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); kfree(chan->tag); kfree(chan->vc_wq); + kfree(chan->sg); kfree(chan); } -- 2.20.1
The size of scatter/gather lists used by the virtio transport is currently hard coded. Refactor this to using a runtime variable. Signed-off-by: Christian Schoenebeck <linux_oss@crudebyte.com> --- net/9p/trans_virtio.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 1dbe2e921bb8..3347d35a5e6e 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -36,7 +36,7 @@ #include <linux/virtio_9p.h> #include "trans_common.h" -#define VIRTQUEUE_NUM 128 +#define VIRTQUEUE_DEFAULT_NUM 128 /* a single mutex to manage channel initialization and attachment */ static DEFINE_MUTEX(virtio_9p_lock); @@ -54,6 +54,7 @@ static atomic_t vp_pinned = ATOMIC_INIT(0); * @vc_wq: wait queue for waiting for thing to be added to ring buf * @p9_max_pages: maximum number of pinned pages * @sg: scatter gather list which is used to pack a request (protected?) + * @sg_n: amount of elements in sg array * @chan_list: linked list of channels * * We keep all per-channel information in a structure. @@ -78,6 +79,7 @@ struct virtio_chan { unsigned long p9_max_pages; /* Scatterlist: can be too big for stack. */ struct scatterlist *sg; + size_t sg_n; /** * @tag: name to identify a mount null terminated */ @@ -270,12 +272,12 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) out_sgs = in_sgs = 0; /* Handle out VirtIO ring buffers */ out = pack_sg_list(chan->sg, 0, - VIRTQUEUE_NUM, req->tc.sdata, req->tc.size); + chan->sg_n, req->tc.sdata, req->tc.size); if (out) sgs[out_sgs++] = chan->sg; in = pack_sg_list(chan->sg, out, - VIRTQUEUE_NUM, req->rc.sdata, req->rc.capacity); + chan->sg_n, req->rc.sdata, req->rc.capacity); if (in) sgs[out_sgs + in_sgs++] = chan->sg + out; @@ -447,14 +449,14 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, /* out data */ out = pack_sg_list(chan->sg, 0, - VIRTQUEUE_NUM, req->tc.sdata, req->tc.size); + chan->sg_n, req->tc.sdata, req->tc.size); if (out) sgs[out_sgs++] = chan->sg; if (out_pages) { sgs[out_sgs++] = chan->sg + out; - out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + out += pack_sg_list_p(chan->sg, out, chan->sg_n, out_pages, out_nr_pages, offs, outlen); } @@ -466,13 +468,13 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, * allocated memory and payload onto the user buffer. */ in = pack_sg_list(chan->sg, out, - VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len); + chan->sg_n, req->rc.sdata, in_hdr_len); if (in) sgs[out_sgs + in_sgs++] = chan->sg + out; if (in_pages) { sgs[out_sgs + in_sgs++] = chan->sg + out + in; - in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, + in += pack_sg_list_p(chan->sg, out + in, chan->sg_n, in_pages, in_nr_pages, offs, inlen); } @@ -574,13 +576,14 @@ static int p9_virtio_probe(struct virtio_device *vdev) goto fail; } - chan->sg = kmalloc_array(VIRTQUEUE_NUM, + chan->sg = kmalloc_array(VIRTQUEUE_DEFAULT_NUM, sizeof(struct scatterlist), GFP_KERNEL); if (!chan->sg) { pr_err("Failed to allocate virtio 9P channel\n"); err = -ENOMEM; goto out_free_chan_shallow; } + chan->sg_n = VIRTQUEUE_DEFAULT_NUM; chan->vdev = vdev; @@ -593,7 +596,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->vq->vdev->priv = chan; spin_lock_init(&chan->lock); - sg_init_table(chan->sg, VIRTQUEUE_NUM); + sg_init_table(chan->sg, chan->sg_n); chan->inuse = false; if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { @@ -777,7 +780,7 @@ static struct p9_trans_module p9_virtio_trans = { * that are not at page boundary, that can result in an extra * page in zero copy. */ - .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), + .maxsize = PAGE_SIZE * (VIRTQUEUE_DEFAULT_NUM - 3), .def = 1, .owner = THIS_MODULE, }; -- 2.20.1
The amount of elements in a scatter/gather list is limited to approximately 128 elements. To allow going beyond that limit with subsequent patches, pave the way by turning the one- dimensional sg list array into a two-dimensional array, i.e: sg[128] becomes sgl[nsgl][SG_MAX_SINGLE_ALLOC] As the value of 'nsgl' is exactly (still) 1 in this commit and the compile-time (compiler and architecture dependent) value of 'SG_MAX_SINGLE_ALLOC' equals approximately the previous hard coded 128 elements, this commit is therefore more of a preparotory refactoring then actual behaviour change. A custom struct virtqueue_sg is defined instead of using shared API struct sg_table, because the latter would not allow to resize the table after allocation. sg_append_table API OTOH would not fit either, because it requires a list of pages beforehand upon allocation. And both APIs only support all-or-nothing allocation. Signed-off-by: Christian Schoenebeck <linux_oss@crudebyte.com> --- net/9p/trans_virtio.c | 185 +++++++++++++++++++++++++++++++----------- 1 file changed, 139 insertions(+), 46 deletions(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 3347d35a5e6e..1a45e0df2336 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -36,7 +36,30 @@ #include <linux/virtio_9p.h> #include "trans_common.h" -#define VIRTQUEUE_DEFAULT_NUM 128 +/** + * (chained) scatter gather lists for virtqueue data transmission + */ +struct virtqueue_sg { + /** amount of elements in array field @sgl */ + unsigned int nsgl; + /** two-dimensional array, i.e. sgl[nsgl][SG_MAX_SINGLE_ALLOC] */ + struct scatterlist *sgl[]; +}; + +/* + * Default value for field nsgl in struct virtqueue_sg, which defines the + * initial virtio data transmission capacity when this virtio transport is + * probed. + */ +#define VIRTQUEUE_SG_NSGL_DEFAULT 1 + +/* maximum value for field nsgl in struct virtqueue_sg */ +#define VIRTQUEUE_SG_NSGL_MAX \ + ((PAGE_SIZE - sizeof(struct virtqueue_sg)) / \ + sizeof(struct scatterlist *)) \ + +/* last entry per sg list is used for chaining (pointer to next list) */ +#define SG_USER_PAGES_PER_LIST (SG_MAX_SINGLE_ALLOC - 1) /* a single mutex to manage channel initialization and attachment */ static DEFINE_MUTEX(virtio_9p_lock); @@ -53,8 +76,7 @@ static atomic_t vp_pinned = ATOMIC_INIT(0); * @ring_bufs_avail: flag to indicate there is some available in the ring buf * @vc_wq: wait queue for waiting for thing to be added to ring buf * @p9_max_pages: maximum number of pinned pages - * @sg: scatter gather list which is used to pack a request (protected?) - * @sg_n: amount of elements in sg array + * @vq_sg: table of scatter gather lists, which are used to pack a request * @chan_list: linked list of channels * * We keep all per-channel information in a structure. @@ -77,9 +99,7 @@ struct virtio_chan { * will be placing it in each channel. */ unsigned long p9_max_pages; - /* Scatterlist: can be too big for stack. */ - struct scatterlist *sg; - size_t sg_n; + struct virtqueue_sg *vq_sg; /** * @tag: name to identify a mount null terminated */ @@ -96,6 +116,85 @@ static unsigned int rest_of_page(void *data) return PAGE_SIZE - offset_in_page(data); } +/** + * Returns user page for given page index. + * @vq_sg: scatter gather lists used by this transport + * @page: user page index across all scatter gather lists + */ +static struct scatterlist *vq_sg_page(struct virtqueue_sg *vq_sg, size_t page) +{ + unsigned int node = page / SG_USER_PAGES_PER_LIST; + unsigned int leaf = page % SG_USER_PAGES_PER_LIST; + BUG_ON(node >= VIRTQUEUE_SG_NSGL_MAX); + return &vq_sg->sgl[node][leaf]; +} + +/** + * Returns total number of individual user pages in passed scatter gather + * lists. + */ +static size_t vq_sg_npages(struct virtqueue_sg *vq_sg) +{ + return vq_sg->nsgl * SG_USER_PAGES_PER_LIST; +} + +/** + * free all memory previously allocated for @vq_sg + */ +static void vq_sg_free(struct virtqueue_sg *vq_sg) +{ + unsigned int i; + + for (i = 0; i < vq_sg->nsgl; ++i) { + kfree(vq_sg->sgl[i]); + } + kfree(vq_sg); +} + +/** + * Allocates and returns @nsgl scatter gather lists, if @nsgl is larger than + * one then chained lists are used (if supported by architecture). + */ +static struct virtqueue_sg *vq_sg_alloc(unsigned int nsgl) +{ + struct virtqueue_sg *vq_sg; + unsigned int i; + + BUG_ON(!nsgl || nsgl > VIRTQUEUE_SG_NSGL_MAX); +#ifdef CONFIG_ARCH_NO_SG_CHAIN + if (WARN_ON_ONCE(nsgl > 1)) + return NULL; +#endif + + vq_sg = kzalloc(sizeof(struct virtqueue_sg) + + nsgl * sizeof(struct scatterlist *), + GFP_KERNEL); + + if (!vq_sg) + return NULL; + + vq_sg->nsgl = nsgl; + + for (i = 0; i < nsgl; ++i) { + vq_sg->sgl[i] = kmalloc_array( + SG_MAX_SINGLE_ALLOC, sizeof(struct scatterlist), + GFP_KERNEL + ); + if (!vq_sg->sgl[i]) { + vq_sg_free(vq_sg); + return NULL; + } + sg_init_table(vq_sg->sgl[i], SG_MAX_SINGLE_ALLOC); + if (i) { + /* chain the lists */ + sg_chain(vq_sg->sgl[i - 1], SG_MAX_SINGLE_ALLOC, + vq_sg->sgl[i]); + } + } + sg_mark_end(&vq_sg->sgl[nsgl - 1][SG_MAX_SINGLE_ALLOC - 1]); + return vq_sg; +} + /** * p9_virtio_close - reclaim resources of a channel * @client: client instance @@ -158,9 +257,8 @@ static void req_done(struct virtqueue *vq) /** * pack_sg_list - pack a scatter gather list from a linear buffer - * @sg: scatter/gather list to pack into + * @vq_sg: scatter/gather lists to pack into * @start: which segment of the sg_list to start at - * @limit: maximum segment to pack data to * @data: data to pack into scatter/gather list * @count: amount of data to pack into the scatter/gather list * @@ -170,11 +268,12 @@ static void req_done(struct virtqueue *vq) * */ -static int pack_sg_list(struct scatterlist *sg, int start, - int limit, char *data, int count) +static int pack_sg_list(struct virtqueue_sg *vq_sg, int start, + char *data, int count) { int s; int index = start; + size_t limit = vq_sg_npages(vq_sg); while (count) { s = rest_of_page(data); @@ -182,13 +281,13 @@ static int pack_sg_list(struct scatterlist *sg, int start, s = count; BUG_ON(index >= limit); /* Make sure we don't terminate early. */ - sg_unmark_end(&sg[index]); - sg_set_buf(&sg[index++], data, s); + sg_unmark_end(vq_sg_page(vq_sg, index)); + sg_set_buf(vq_sg_page(vq_sg, index++), data, s); count -= s; data += s; } if (index-start) - sg_mark_end(&sg[index - 1]); + sg_mark_end(vq_sg_page(vq_sg, index - 1)); return index-start; } @@ -208,21 +307,21 @@ static int p9_virtio_cancelled(struct p9_client *client, struct p9_req_t *req) /** * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, * this takes a list of pages. - * @sg: scatter/gather list to pack into + * @vq_sg: scatter/gather lists to pack into * @start: which segment of the sg_list to start at - * @limit: maximum number of pages in sg list. * @pdata: a list of pages to add into sg. * @nr_pages: number of pages to pack into the scatter/gather list * @offs: amount of data in the beginning of first page _not_ to pack * @count: amount of data to pack into the scatter/gather list */ static int -pack_sg_list_p(struct scatterlist *sg, int start, int limit, +pack_sg_list_p(struct virtqueue_sg *vq_sg, int start, struct page **pdata, int nr_pages, size_t offs, int count) { int i = 0, s; int data_off = offs; int index = start; + size_t limit = vq_sg_npages(vq_sg); BUG_ON(nr_pages > (limit - start)); /* @@ -235,15 +334,16 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, s = count; BUG_ON(index >= limit); /* Make sure we don't terminate early. */ - sg_unmark_end(&sg[index]); - sg_set_page(&sg[index++], pdata[i++], s, data_off); + sg_unmark_end(vq_sg_page(vq_sg, index)); + sg_set_page(vq_sg_page(vq_sg, index++), pdata[i++], s, + data_off); data_off = 0; count -= s; nr_pages--; } if (index-start) - sg_mark_end(&sg[index - 1]); + sg_mark_end(vq_sg_page(vq_sg, index - 1)); return index - start; } @@ -271,15 +371,13 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) out_sgs = in_sgs = 0; /* Handle out VirtIO ring buffers */ - out = pack_sg_list(chan->sg, 0, - chan->sg_n, req->tc.sdata, req->tc.size); + out = pack_sg_list(chan->vq_sg, 0, req->tc.sdata, req->tc.size); if (out) - sgs[out_sgs++] = chan->sg; + sgs[out_sgs++] = vq_sg_page(chan->vq_sg, 0); - in = pack_sg_list(chan->sg, out, - chan->sg_n, req->rc.sdata, req->rc.capacity); + in = pack_sg_list(chan->vq_sg, out, req->rc.sdata, req->rc.capacity); if (in) - sgs[out_sgs + in_sgs++] = chan->sg + out; + sgs[out_sgs + in_sgs++] = vq_sg_page(chan->vq_sg, out); err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); @@ -448,16 +546,15 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, out_sgs = in_sgs = 0; /* out data */ - out = pack_sg_list(chan->sg, 0, - chan->sg_n, req->tc.sdata, req->tc.size); + out = pack_sg_list(chan->vq_sg, 0, req->tc.sdata, req->tc.size); if (out) - sgs[out_sgs++] = chan->sg; + sgs[out_sgs++] = vq_sg_page(chan->vq_sg, 0); if (out_pages) { - sgs[out_sgs++] = chan->sg + out; - out += pack_sg_list_p(chan->sg, out, chan->sg_n, - out_pages, out_nr_pages, offs, outlen); + sgs[out_sgs++] = vq_sg_page(chan->vq_sg, out); + out += pack_sg_list_p(chan->vq_sg, out, out_pages, + out_nr_pages, offs, outlen); } /* @@ -467,15 +564,14 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, * Arrange in such a way that server places header in the * allocated memory and payload onto the user buffer. */ - in = pack_sg_list(chan->sg, out, - chan->sg_n, req->rc.sdata, in_hdr_len); + in = pack_sg_list(chan->vq_sg, out, req->rc.sdata, in_hdr_len); if (in) - sgs[out_sgs + in_sgs++] = chan->sg + out; + sgs[out_sgs + in_sgs++] = vq_sg_page(chan->vq_sg, out); if (in_pages) { - sgs[out_sgs + in_sgs++] = chan->sg + out + in; - in += pack_sg_list_p(chan->sg, out + in, chan->sg_n, - in_pages, in_nr_pages, offs, inlen); + sgs[out_sgs + in_sgs++] = vq_sg_page(chan->vq_sg, out + in); + in += pack_sg_list_p(chan->vq_sg, out + in, in_pages, + in_nr_pages, offs, inlen); } BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs)); @@ -576,14 +672,12 @@ static int p9_virtio_probe(struct virtio_device *vdev) goto fail; } - chan->sg = kmalloc_array(VIRTQUEUE_DEFAULT_NUM, - sizeof(struct scatterlist), GFP_KERNEL); - if (!chan->sg) { + chan->vq_sg = vq_sg_alloc(VIRTQUEUE_SG_NSGL_DEFAULT); + if (!chan->vq_sg) { pr_err("Failed to allocate virtio 9P channel\n"); err = -ENOMEM; goto out_free_chan_shallow; } - chan->sg_n = VIRTQUEUE_DEFAULT_NUM; chan->vdev = vdev; @@ -596,8 +690,6 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->vq->vdev->priv = chan; spin_lock_init(&chan->lock); - sg_init_table(chan->sg, chan->sg_n); - chan->inuse = false; if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { virtio_cread(vdev, struct virtio_9p_config, tag_len, &tag_len); @@ -646,7 +738,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) out_free_vq: vdev->config->del_vqs(vdev); out_free_chan: - kfree(chan->sg); + vq_sg_free(chan->vq_sg); out_free_chan_shallow: kfree(chan); fail: @@ -741,7 +833,7 @@ static void p9_virtio_remove(struct virtio_device *vdev) kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); kfree(chan->tag); kfree(chan->vc_wq); - kfree(chan->sg); + vq_sg_free(chan->vq_sg); kfree(chan); } @@ -780,7 +872,8 @@ static struct p9_trans_module p9_virtio_trans = { * that are not at page boundary, that can result in an extra * page in zero copy. */ - .maxsize = PAGE_SIZE * (VIRTQUEUE_DEFAULT_NUM - 3), + .maxsize = PAGE_SIZE * + ((VIRTQUEUE_SG_NSGL_DEFAULT * SG_USER_PAGES_PER_LIST) - 3), .def = 1, .owner = THIS_MODULE, }; -- 2.20.1
This new field 'trans_maxsize' optionally allows transport to update it to reflect the actual maximum msize supported by allocated transport channel. Signed-off-by: Christian Schoenebeck <linux_oss@crudebyte.com> --- include/net/9p/client.h | 2 ++ net/9p/client.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/net/9p/client.h b/include/net/9p/client.h index e1c308d8d288..e48c4cdf9be0 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -89,6 +89,7 @@ struct p9_req_t { * struct p9_client - per client instance state * @lock: protect @fids and @reqs * @msize: maximum data size negotiated by protocol + * @trans_maxsize: actual maximum msize supported by transport channel * @proto_version: 9P protocol version to use * @trans_mod: module API instantiated with this client * @status: connection state @@ -103,6 +104,7 @@ struct p9_req_t { struct p9_client { spinlock_t lock; unsigned int msize; + unsigned int trans_maxsize; unsigned char proto_version; struct p9_trans_module *trans_mod; enum p9_trans_status status; diff --git a/net/9p/client.c b/net/9p/client.c index fa2afeaf1394..6e699ede069e 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1037,6 +1037,14 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) goto free_client; } + /* + * transport will get a chance to increase trans_maxsize (if + * necessary) and it may update trans_maxsize in create() function + * below accordingly to reflect the actual maximum size supported by + * the allocated transport channel + */ + clnt->trans_maxsize = clnt->trans_mod->maxsize; + p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); @@ -1044,8 +1052,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err) goto put_trans; - if (clnt->msize > clnt->trans_mod->maxsize) { - clnt->msize = clnt->trans_mod->maxsize; + if (clnt->msize > clnt->trans_maxsize) { + clnt->msize = clnt->trans_maxsize; p9_debug(P9_DEBUG_ERROR, "Limiting 'msize' to %d as this is the maximum " "supported by transport %s\n", -- 2.20.1
The virtio transport supports by default a 9p 'msize' of up to approximately 500 kB. This patches adds support for larger 'msize' values by resizing the amount of scatter/gather lists if required. Signed-off-by: Christian Schoenebeck <linux_oss@crudebyte.com> --- net/9p/trans_virtio.c | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 1a45e0df2336..1f9a0283d7b8 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -195,6 +195,30 @@ static struct virtqueue_sg *vq_sg_alloc(unsigned int nsgl) return vq_sg; } +/** + * Resize passed virtqueue scatter/gather lists to the passed amount of + * list blocks. + * @_vq_sg: scatter/gather lists to be resized + * @nsgl: new amount of scatter/gather list blocks + */ +static int vq_sg_resize(struct virtqueue_sg **_vq_sg, unsigned int nsgl) +{ + struct virtqueue_sg *vq_sg; + + BUG_ON(!_vq_sg || !nsgl); + vq_sg = *_vq_sg; + if (vq_sg->nsgl == nsgl) + return 0; + + /* lazy resize implementation for now */ + vq_sg = vq_sg_alloc(nsgl); + if (!vq_sg) + return -ENOMEM; + + *_vq_sg = vq_sg; + return 0; +} + /** * p9_virtio_close - reclaim resources of a channel * @client: client instance @@ -766,6 +790,10 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) struct virtio_chan *chan; int ret = -ENOENT; int found = 0; +#if !defined(CONFIG_ARCH_NO_SG_CHAIN) + size_t npages; + size_t nsgl; +#endif if (devname == NULL) return -EINVAL; @@ -788,6 +816,30 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) return ret; } + /* + * if user supplied an 'msize' option that's larger than what this + * transport supports by default, then try to allocate more sg lists + */ + if (client->msize > client->trans_maxsize) { +#if !defined(CONFIG_ARCH_NO_SG_CHAIN) + npages = DIV_ROUND_UP(client->msize, PAGE_SIZE); + if (npages > chan->p9_max_pages) + npages = chan->p9_max_pages; + nsgl = DIV_ROUND_UP(npages, SG_USER_PAGES_PER_LIST); + if (nsgl > chan->vq_sg->nsgl) { + /* + * if resize fails, no big deal, then just + * continue with default msize instead + */ + if (!vq_sg_resize(&chan->vq_sg, nsgl)) { + client->trans_maxsize = + PAGE_SIZE * + ((nsgl * SG_USER_PAGES_PER_LIST) - 3); + } + } +#endif /* !defined(CONFIG_ARCH_NO_SG_CHAIN) */ + } + client->trans = (void *)chan; client->status = Connected; chan->client = client; -- 2.20.1
Right now vq_sg_resize() used a lazy implementation following the all-or-nothing princible. So it either resized exactly to the requested new sg lists size, or it did not resize at all. The problem with this is if a user supplies a very large msize value, resize would simply fail and the user would stick to the default maximum msize supported by the virtio transport. To resolve this potential issue, change vq_sg_resize() to resize the passed sg list to whatever is possible on the machine. Signed-off-by: Christian Schoenebeck <linux_oss@crudebyte.com> --- net/9p/trans_virtio.c | 64 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 1f9a0283d7b8..d81c0be475ba 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -200,23 +200,66 @@ static struct virtqueue_sg *vq_sg_alloc(unsigned int nsgl) * list blocks. * @_vq_sg: scatter/gather lists to be resized * @nsgl: new amount of scatter/gather list blocks + * + * Old scatter/gather lists are retained. Only growing the size is supported. + * If the requested amount cannot be satisfied, then lists are increased to + * whatever is possible. */ static int vq_sg_resize(struct virtqueue_sg **_vq_sg, unsigned int nsgl) { struct virtqueue_sg *vq_sg; + unsigned int i; + size_t sz; + int ret = 0; BUG_ON(!_vq_sg || !nsgl); vq_sg = *_vq_sg; + if (nsgl > VIRTQUEUE_SG_NSGL_MAX) + nsgl = VIRTQUEUE_SG_NSGL_MAX; if (vq_sg->nsgl == nsgl) return 0; + if (vq_sg->nsgl > nsgl) + return -ENOTSUPP; + + vq_sg = kzalloc(sizeof(struct virtqueue_sg) + + nsgl * sizeof(struct scatterlist *), + GFP_KERNEL); - /* lazy resize implementation for now */ - vq_sg = vq_sg_alloc(nsgl); if (!vq_sg) return -ENOMEM; + /* copy over old scatter gather lists */ + sz = sizeof(struct virtqueue_sg) + + (*_vq_sg)->nsgl * sizeof(struct scatterlist *); + memcpy(vq_sg, *_vq_sg, sz); + + vq_sg->nsgl = nsgl; + + for (i = (*_vq_sg)->nsgl; i < nsgl; ++i) { + vq_sg->sgl[i] = kmalloc_array( + SG_MAX_SINGLE_ALLOC, sizeof(struct scatterlist), + GFP_KERNEL + ); + /* + * handle failed allocation as soft error, we take whatever + * we get + */ + if (!vq_sg->sgl[i]) { + ret = -ENOMEM; + vq_sg->nsgl = nsgl = i; + break; + } + sg_init_table(vq_sg->sgl[i], SG_MAX_SINGLE_ALLOC); + if (i) { + /* chain the lists */ + sg_chain(vq_sg->sgl[i - 1], SG_MAX_SINGLE_ALLOC, + vq_sg->sgl[i]); + } + } + sg_mark_end(&vq_sg->sgl[nsgl - 1][SG_MAX_SINGLE_ALLOC - 1]); + *_vq_sg = vq_sg; - return 0; + return ret; } /** @@ -829,13 +872,16 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) if (nsgl > chan->vq_sg->nsgl) { /* * if resize fails, no big deal, then just - * continue with default msize instead + * continue with whatever we got */ - if (!vq_sg_resize(&chan->vq_sg, nsgl)) { - client->trans_maxsize = - PAGE_SIZE * - ((nsgl * SG_USER_PAGES_PER_LIST) - 3); - } + vq_sg_resize(&chan->vq_sg, nsgl); + /* + * actual allocation size might be less than + * requested, so use vq_sg->nsgl instead of nsgl + */ + client->trans_maxsize = + PAGE_SIZE * ((chan->vq_sg->nsgl * + SG_USER_PAGES_PER_LIST) - 3); } #endif /* !defined(CONFIG_ARCH_NO_SG_CHAIN) */ } -- 2.20.1
This is an initial draft for getting rid of the current 500k 'msize' limitation in the 9p virtio transport, which is currently a bottleneck for performance of Linux 9p mounts. This is a follow-up of the following series and discussion: https://lore.kernel.org/all/28bb651ae0349a7d57e8ddc92c1bd5e62924a912.1630770829.git.linux_oss@crudebyte.com/T/#eb647d0c013616cee3eb8ba9d87da7d8b1f476f37 Known limitation: With this series applied I can run QEMU host <-> 9P virtio <-> Linux guest with up to 3 MB msize. If I try to run it with 4 MB it seems to hit some limitation on QEMU side: qemu-system-x86_64: virtio: too many write descriptors in indirect table I haven't looked into this issue yet. Testing and feedback appreciated! Christian Schoenebeck (7): net/9p: show error message if user 'msize' cannot be satisfied 9p/trans_virtio: separate allocation of scatter gather list 9p/trans_virtio: turn amount of sg lists into runtime info 9p/trans_virtio: introduce struct virtqueue_sg net/9p: add trans_maxsize to struct p9_client 9p/trans_virtio: support larger msize values 9p/trans_virtio: resize sg lists to whatever is possible include/net/9p/client.h | 2 + net/9p/client.c | 18 ++- net/9p/trans_virtio.c | 281 ++++++++++++++++++++++++++++++++++------ 3 files changed, 261 insertions(+), 40 deletions(-) -- 2.20.1
On Thu, 16 Sep 2021 20:26:08 +0200 Christian Schoenebeck wrote:
> This is an initial draft for getting rid of the current 500k 'msize'
> limitation in the 9p virtio transport, which is currently a bottleneck for
> performance of Linux 9p mounts.
>
> This is a follow-up of the following series and discussion:
> https://lore.kernel.org/all/28bb651ae0349a7d57e8ddc92c1bd5e62924a912.1630770829.git.linux_oss@crudebyte.com/T/#eb647d0c013616cee3eb8ba9d87da7d8b1f476f37
>
> Known limitation: With this series applied I can run
>
> QEMU host <-> 9P virtio <-> Linux guest
>
> with up to 3 MB msize. If I try to run it with 4 MB it seems to hit some
> limitation on QEMU side:
>
> qemu-system-x86_64: virtio: too many write descriptors in indirect table
>
> I haven't looked into this issue yet.
>
> Testing and feedback appreciated!
nit - please run ./scripts/kernel-doc -none on files you're changing.
There seems to be a handful of warnings like this added by the series:
net/9p/trans_virtio.c:155: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
On Donnerstag, 16. September 2021 20:25:16 CEST Christian Schoenebeck wrote: > The virtio transport supports by default a 9p 'msize' of up to > approximately 500 kB. This patches adds support for larger 'msize' > values by resizing the amount of scatter/gather lists if required. > > Signed-off-by: Christian Schoenebeck <linux_oss@crudebyte.com> > --- s/patches/patch/ > net/9p/trans_virtio.c | 52 +++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 52 insertions(+) > > diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c > index 1a45e0df2336..1f9a0283d7b8 100644 > --- a/net/9p/trans_virtio.c > +++ b/net/9p/trans_virtio.c > @@ -195,6 +195,30 @@ static struct virtqueue_sg *vq_sg_alloc(unsigned int > nsgl) return vq_sg; > } > > +/** > + * Resize passed virtqueue scatter/gather lists to the passed amount of > + * list blocks. > + * @_vq_sg: scatter/gather lists to be resized > + * @nsgl: new amount of scatter/gather list blocks > + */ > +static int vq_sg_resize(struct virtqueue_sg **_vq_sg, unsigned int nsgl) > +{ > + struct virtqueue_sg *vq_sg; > + > + BUG_ON(!_vq_sg || !nsgl); > + vq_sg = *_vq_sg; > + if (vq_sg->nsgl == nsgl) > + return 0; > + > + /* lazy resize implementation for now */ > + vq_sg = vq_sg_alloc(nsgl); > + if (!vq_sg) > + return -ENOMEM; > + Missing kfree(*_vq_sg); here, and probably also a prior memcpy() as in patch 7, i.e.: /* copy over old scatter gather lists */ sz = sizeof(struct virtqueue_sg) + (*_vq_sg)->nsgl * sizeof(struct scatterlist *); memcpy(vq_sg, *_vq_sg, sz); > + *_vq_sg = vq_sg; > + return 0; > +} > + > /** > * p9_virtio_close - reclaim resources of a channel > * @client: client instance > @@ -766,6 +790,10 @@ p9_virtio_create(struct p9_client *client, const char > *devname, char *args) struct virtio_chan *chan; > int ret = -ENOENT; > int found = 0; > +#if !defined(CONFIG_ARCH_NO_SG_CHAIN) > + size_t npages; > + size_t nsgl; > +#endif > > if (devname == NULL) > return -EINVAL; > @@ -788,6 +816,30 @@ p9_virtio_create(struct p9_client *client, const char > *devname, char *args) return ret; > } > > + /* > + * if user supplied an 'msize' option that's larger than what this > + * transport supports by default, then try to allocate more sg lists > + */ > + if (client->msize > client->trans_maxsize) { > +#if !defined(CONFIG_ARCH_NO_SG_CHAIN) > + npages = DIV_ROUND_UP(client->msize, PAGE_SIZE); > + if (npages > chan->p9_max_pages) > + npages = chan->p9_max_pages; > + nsgl = DIV_ROUND_UP(npages, SG_USER_PAGES_PER_LIST); > + if (nsgl > chan->vq_sg->nsgl) { > + /* > + * if resize fails, no big deal, then just > + * continue with default msize instead > + */ > + if (!vq_sg_resize(&chan->vq_sg, nsgl)) { > + client->trans_maxsize = > + PAGE_SIZE * > + ((nsgl * SG_USER_PAGES_PER_LIST) - 3); > + } > + } > +#endif /* !defined(CONFIG_ARCH_NO_SG_CHAIN) */ > + } Probably an error/info message for architectures not supporting SG chains would be useful here, to let users on those system know why they cannot get beyond 500k msize. > + > client->trans = (void *)chan; > client->status = Connected; > chan->client = client;
On Freitag, 17. September 2021 04:09:08 CEST Jakub Kicinski wrote:
> On Thu, 16 Sep 2021 20:26:08 +0200 Christian Schoenebeck wrote:
> > This is an initial draft for getting rid of the current 500k 'msize'
> > limitation in the 9p virtio transport, which is currently a bottleneck for
> > performance of Linux 9p mounts.
> >
> > This is a follow-up of the following series and discussion:
> > https://lore.kernel.org/all/28bb651ae0349a7d57e8ddc92c1bd5e62924a912.16307
> > 70829.git.linux_oss@crudebyte.com/T/#eb647d0c013616cee3eb8ba9d87da7d8b1f47
> > 6f37
> >
> > Known limitation: With this series applied I can run
> >
> > QEMU host <-> 9P virtio <-> Linux guest
> >
> > with up to 3 MB msize. If I try to run it with 4 MB it seems to hit some
> >
> > limitation on QEMU side:
> > qemu-system-x86_64: virtio: too many write descriptors in indirect table
> >
> > I haven't looked into this issue yet.
> >
> > Testing and feedback appreciated!
>
> nit - please run ./scripts/kernel-doc -none on files you're changing.
> There seems to be a handful of warnings like this added by the series:
>
> net/9p/trans_virtio.c:155: warning: This comment starts with '/**', but
> isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
Sure, I'll take care about that in v2. Thanks!
Best regards,
Christian Schoenebeck