* [PATCH 2/2] net/9p: add a per-client fcall kmem_cache
2018-07-30 9:34 ` [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs Dominique Martinet
@ 2018-07-30 9:34 ` Dominique Martinet
2018-07-31 1:18 ` [V9fs-developer] " piaojun
` (2 more replies)
2018-07-31 0:55 ` [V9fs-developer] [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs piaojun
` (2 subsequent siblings)
3 siblings, 3 replies; 53+ messages in thread
From: Dominique Martinet @ 2018-07-30 9:34 UTC (permalink / raw)
To: v9fs-developer; +Cc: Greg Kurz, Matthew Wilcox, linux-fsdevel, linux-kernel
From: Dominique Martinet <dominique.martinet@cea.fr>
Having a specific cache for the fcall allocations helps speed up
allocations a bit, especially in case of non-"round" msizes.
The caches will automatically be merged if there are multiple caches
of items with the same size so we do not need to try to share a cache
between different clients of the same size.
Since the msize is negotiated with the server, only allocate the cache
after that negotiation has happened - previous allocations or
allocations of different sizes (e.g. zero-copy fcall) are made with
kmalloc directly.
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
---
include/net/9p/client.h | 2 ++
net/9p/client.c | 40 ++++++++++++++++++++++++++++++++--------
net/9p/trans_rdma.c | 2 +-
3 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 4b4ac1362ad5..8d9bc7402a42 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -123,6 +123,7 @@ struct p9_client {
struct p9_trans_module *trans_mod;
enum p9_trans_status status;
void *trans;
+ struct kmem_cache *fcall_cache;
union {
struct {
@@ -230,6 +231,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
kgid_t gid, struct p9_qid *);
int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
+void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc);
struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
diff --git a/net/9p/client.c b/net/9p/client.c
index ba99a94a12c9..215e3b1ed7b4 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -231,15 +231,34 @@ static int parse_opts(char *opts, struct p9_client *clnt)
return ret;
}
-static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize)
+static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc,
+ int alloc_msize)
{
- fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
+ if (c->fcall_cache && alloc_msize == c->msize)
+ fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
+ else
+ fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
if (!fc->sdata)
return -ENOMEM;
fc->capacity = alloc_msize;
return 0;
}
+void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc)
+{
+ /* sdata can be NULL for interrupted requests in trans_rdma,
+ * and kmem_cache_free does not do NULL-check for us
+ */
+ if (unlikely(!fc->sdata))
+ return;
+
+ if (c->fcall_cache && fc->capacity == c->msize)
+ kmem_cache_free(c->fcall_cache, fc->sdata);
+ else
+ kfree(fc->sdata);
+}
+EXPORT_SYMBOL(p9_fcall_free);
+
static struct kmem_cache *p9_req_cache;
/**
@@ -261,9 +280,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
if (!req)
return NULL;
- if (p9_fcall_alloc(&req->tc, alloc_msize))
+ if (p9_fcall_alloc(c, &req->tc, alloc_msize))
goto free;
- if (p9_fcall_alloc(&req->rc, alloc_msize))
+ if (p9_fcall_alloc(c, &req->rc, alloc_msize))
goto free;
p9pdu_reset(&req->tc);
@@ -288,8 +307,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
return req;
free:
- kfree(req->tc.sdata);
- kfree(req->rc.sdata);
+ p9_fcall_free(c, &req->tc);
+ p9_fcall_free(c, &req->rc);
kmem_cache_free(p9_req_cache, req);
return ERR_PTR(-ENOMEM);
}
@@ -333,8 +352,8 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
spin_lock_irqsave(&c->lock, flags);
idr_remove(&c->reqs, tag);
spin_unlock_irqrestore(&c->lock, flags);
- kfree(r->tc.sdata);
- kfree(r->rc.sdata);
+ p9_fcall_free(c, &r->tc);
+ p9_fcall_free(c, &r->rc);
kmem_cache_free(p9_req_cache, r);
}
@@ -944,6 +963,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
clnt->trans_mod = NULL;
clnt->trans = NULL;
+ clnt->fcall_cache = NULL;
client_id = utsname()->nodename;
memcpy(clnt->name, client_id, strlen(client_id) + 1);
@@ -980,6 +1000,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (err)
goto close_trans;
+ clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize,
+ 0, 0, NULL);
+
return clnt;
close_trans:
@@ -1011,6 +1034,7 @@ void p9_client_destroy(struct p9_client *clnt)
p9_tag_cleanup(clnt);
+ kmem_cache_destroy(clnt->fcall_cache);
kfree(clnt);
}
EXPORT_SYMBOL(p9_client_destroy);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index c5cac97df7f7..5e43f0a00b3a 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -445,7 +445,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
/* Got one! */
- kfree(req->rc.sdata);
+ p9_fcall_free(client, &req->rc);
req->rc.sdata = NULL;
goto dont_need_post_recv;
} else {
--
2.17.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH 2/2] net/9p: add a per-client fcall kmem_cache
2018-07-30 9:34 ` [PATCH 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
@ 2018-07-31 1:18 ` piaojun
2018-07-31 1:35 ` Dominique Martinet
2018-07-31 2:46 ` Matthew Wilcox
2018-08-01 14:28 ` [V9fs-developer] " Greg Kurz
2 siblings, 1 reply; 53+ messages in thread
From: piaojun @ 2018-07-31 1:18 UTC (permalink / raw)
To: Dominique Martinet, v9fs-developer
Cc: linux-fsdevel, Greg Kurz, Matthew Wilcox, linux-kernel
Hi Dominique,
Could you help paste some test result before-and-after the patch applied?
And I have a little suggestion in comments below.
On 2018/7/30 17:34, Dominique Martinet wrote:
> From: Dominique Martinet <dominique.martinet@cea.fr>
>
> Having a specific cache for the fcall allocations helps speed up
> allocations a bit, especially in case of non-"round" msizes.
>
> The caches will automatically be merged if there are multiple caches
> of items with the same size so we do not need to try to share a cache
> between different clients of the same size.
>
> Since the msize is negotiated with the server, only allocate the cache
> after that negotiation has happened - previous allocations or
> allocations of different sizes (e.g. zero-copy fcall) are made with
> kmalloc directly.
>
> Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
> ---
> include/net/9p/client.h | 2 ++
> net/9p/client.c | 40 ++++++++++++++++++++++++++++++++--------
> net/9p/trans_rdma.c | 2 +-
> 3 files changed, 35 insertions(+), 9 deletions(-)
>
> diff --git a/include/net/9p/client.h b/include/net/9p/client.h
> index 4b4ac1362ad5..8d9bc7402a42 100644
> --- a/include/net/9p/client.h
> +++ b/include/net/9p/client.h
> @@ -123,6 +123,7 @@ struct p9_client {
> struct p9_trans_module *trans_mod;
> enum p9_trans_status status;
> void *trans;
> + struct kmem_cache *fcall_cache;
>
> union {
> struct {
> @@ -230,6 +231,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
> kgid_t gid, struct p9_qid *);
> int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
> int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
> +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc);
> struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
> void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
>
> diff --git a/net/9p/client.c b/net/9p/client.c
> index ba99a94a12c9..215e3b1ed7b4 100644
> --- a/net/9p/client.c
> +++ b/net/9p/client.c
> @@ -231,15 +231,34 @@ static int parse_opts(char *opts, struct p9_client *clnt)
> return ret;
> }
>
> -static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize)
> +static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc,
> + int alloc_msize)
> {
> - fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> + if (c->fcall_cache && alloc_msize == c->msize)
> + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
> + else
> + fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> if (!fc->sdata)
> return -ENOMEM;
> fc->capacity = alloc_msize;
> return 0;
> }
>
> +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc)
> +{
> + /* sdata can be NULL for interrupted requests in trans_rdma,
> + * and kmem_cache_free does not do NULL-check for us
> + */
> + if (unlikely(!fc->sdata))
> + return;
> +
> + if (c->fcall_cache && fc->capacity == c->msize)
> + kmem_cache_free(c->fcall_cache, fc->sdata);
> + else
> + kfree(fc->sdata);
> +}
> +EXPORT_SYMBOL(p9_fcall_free);
> +
> static struct kmem_cache *p9_req_cache;
>
> /**
> @@ -261,9 +280,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> if (!req)
> return NULL;
>
> - if (p9_fcall_alloc(&req->tc, alloc_msize))
> + if (p9_fcall_alloc(c, &req->tc, alloc_msize))
> goto free;
> - if (p9_fcall_alloc(&req->rc, alloc_msize))
> + if (p9_fcall_alloc(c, &req->rc, alloc_msize))
> goto free;
>
> p9pdu_reset(&req->tc);
> @@ -288,8 +307,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> return req;
>
> free:
> - kfree(req->tc.sdata);
> - kfree(req->rc.sdata);
> + p9_fcall_free(c, &req->tc);
> + p9_fcall_free(c, &req->rc);
> kmem_cache_free(p9_req_cache, req);
> return ERR_PTR(-ENOMEM);
> }
> @@ -333,8 +352,8 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
> spin_lock_irqsave(&c->lock, flags);
> idr_remove(&c->reqs, tag);
> spin_unlock_irqrestore(&c->lock, flags);
> - kfree(r->tc.sdata);
> - kfree(r->rc.sdata);
> + p9_fcall_free(c, &r->tc);
> + p9_fcall_free(c, &r->rc);
> kmem_cache_free(p9_req_cache, r);
> }
>
> @@ -944,6 +963,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
>
> clnt->trans_mod = NULL;
> clnt->trans = NULL;
> + clnt->fcall_cache = NULL;
>
> client_id = utsname()->nodename;
> memcpy(clnt->name, client_id, strlen(client_id) + 1);
> @@ -980,6 +1000,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
> if (err)
> goto close_trans;
>
> + clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize,
> + 0, 0, NULL);
> +
> return clnt;
>
> close_trans:
> @@ -1011,6 +1034,7 @@ void p9_client_destroy(struct p9_client *clnt)
>
> p9_tag_cleanup(clnt);
>
> + kmem_cache_destroy(clnt->fcall_cache);
We could set NULL for fcall_cache in case of use-after-free.
> kfree(clnt);
> }
> EXPORT_SYMBOL(p9_client_destroy);
> diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
> index c5cac97df7f7..5e43f0a00b3a 100644
> --- a/net/9p/trans_rdma.c
> +++ b/net/9p/trans_rdma.c
> @@ -445,7 +445,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
> if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
> /* Got one! */
> - kfree(req->rc.sdata);
> + p9_fcall_free(client, &req->rc);
> req->rc.sdata = NULL;
> goto dont_need_post_recv;
> } else {
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH 2/2] net/9p: add a per-client fcall kmem_cache
2018-07-31 1:18 ` [V9fs-developer] " piaojun
@ 2018-07-31 1:35 ` Dominique Martinet
2018-07-31 1:45 ` piaojun
0 siblings, 1 reply; 53+ messages in thread
From: Dominique Martinet @ 2018-07-31 1:35 UTC (permalink / raw)
To: piaojun
Cc: v9fs-developer, linux-fsdevel, Greg Kurz, Matthew Wilcox, linux-kernel
piaojun wrote on Tue, Jul 31, 2018:
> Could you help paste some test result before-and-after the patch applied?
The only performance tests I did were sent to the list a couple of mails
earlier, you can find it here:
http://lkml.kernel.org/r/20180730093101.GA7894@nautica
In particular, the results for benchmark on small writes just before and
after this patch, without KASAN (these are the same numbers as in the
link, hardware/setup is described there):
- no alloc (4.18-rc7 request cache): 65.4k req/s
- non-power of two alloc, no patch: 61.6k req/s
- power of two alloc, no patch: 62.2k req/s
- non-power of two alloc, with patch: 64.7k req/s
- power of two alloc, with patch: 65.1k req/s
I'm rather happy with the result, I didn't expect using a dedicated
cache would bring this much back but it's certainly worth it.
> > @@ -1011,6 +1034,7 @@ void p9_client_destroy(struct p9_client *clnt)
> >
> > p9_tag_cleanup(clnt);
> >
> > + kmem_cache_destroy(clnt->fcall_cache);
>
> We could set NULL for fcall_cache in case of use-after-free.
>
> > kfree(clnt);
Hmm, I understand where this comes from, but I'm not sure I agree.
If someone tries to access the client while/after it is freed things are
going to break anyway, I'd rather let things break as obviously as
possible than try to cover it up.
--
Dominique
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH 2/2] net/9p: add a per-client fcall kmem_cache
2018-07-31 1:35 ` Dominique Martinet
@ 2018-07-31 1:45 ` piaojun
0 siblings, 0 replies; 53+ messages in thread
From: piaojun @ 2018-07-31 1:45 UTC (permalink / raw)
To: Dominique Martinet
Cc: v9fs-developer, linux-fsdevel, Greg Kurz, Matthew Wilcox, linux-kernel
On 2018/7/31 9:35, Dominique Martinet wrote:
> piaojun wrote on Tue, Jul 31, 2018:
>> Could you help paste some test result before-and-after the patch applied?
>
> The only performance tests I did were sent to the list a couple of mails
> earlier, you can find it here:
> http://lkml.kernel.org/r/20180730093101.GA7894@nautica
>
> In particular, the results for benchmark on small writes just before and
> after this patch, without KASAN (these are the same numbers as in the
> link, hardware/setup is described there):
> - no alloc (4.18-rc7 request cache): 65.4k req/s
> - non-power of two alloc, no patch: 61.6k req/s
> - power of two alloc, no patch: 62.2k req/s
> - non-power of two alloc, with patch: 64.7k req/s
> - power of two alloc, with patch: 65.1k req/s
>
> I'm rather happy with the result, I didn't expect using a dedicated
> cache would bring this much back but it's certainly worth it.
>
It looks like an obvious promotion.
>>> @@ -1011,6 +1034,7 @@ void p9_client_destroy(struct p9_client *clnt)
>>>
>>> p9_tag_cleanup(clnt);
>>>
>>> + kmem_cache_destroy(clnt->fcall_cache);
>>
>> We could set NULL for fcall_cache in case of use-after-free.
>>
>>> kfree(clnt);
>
> Hmm, I understand where this comes from, but I'm not sure I agree.
> If someone tries to access the client while/after it is freed things are
> going to break anyway, I'd rather let things break as obviously as
> possible than try to cover it up.
>
Setting NULL is not a big matter, and I will hear others' opinion.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 2/2] net/9p: add a per-client fcall kmem_cache
2018-07-30 9:34 ` [PATCH 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
2018-07-31 1:18 ` [V9fs-developer] " piaojun
@ 2018-07-31 2:46 ` Matthew Wilcox
2018-07-31 4:17 ` Dominique Martinet
2018-08-01 14:28 ` [V9fs-developer] " Greg Kurz
2 siblings, 1 reply; 53+ messages in thread
From: Matthew Wilcox @ 2018-07-31 2:46 UTC (permalink / raw)
To: Dominique Martinet; +Cc: v9fs-developer, Greg Kurz, linux-fsdevel, linux-kernel
On Mon, Jul 30, 2018 at 11:34:23AM +0200, Dominique Martinet wrote:
> -static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize)
> +static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc,
> + int alloc_msize)
> {
> - fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> + if (c->fcall_cache && alloc_msize == c->msize)
> + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
> + else
> + fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
Could you simplify this by initialising c->msize to 0 and then this
can simply be:
> + if (alloc_msize == c->msize)
...
> +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc)
> +{
> + /* sdata can be NULL for interrupted requests in trans_rdma,
> + * and kmem_cache_free does not do NULL-check for us
> + */
> + if (unlikely(!fc->sdata))
> + return;
> +
> + if (c->fcall_cache && fc->capacity == c->msize)
> + kmem_cache_free(c->fcall_cache, fc->sdata);
> + else
> + kfree(fc->sdata);
> +}
Is it possible for fcall_cache to be allocated before fcall_free is
called? I'm concerned we might do this:
allocate message A
allocate message B
receive response A
allocate fcall_cache
receive response B
and then we'd call kmem_cache_free() for something allocated by kmalloc(),
which works with slab and slub, but doesn't work with slob (alas).
> @@ -980,6 +1000,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
> if (err)
> goto close_trans;
>
> + clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize,
> + 0, 0, NULL);
> +
If we have slab merging turned off, or we have two mounts from servers
with different msizes, we'll end up with two slabs called 9p-fcall-cache.
I'm OK with that, but are you?
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH 2/2] net/9p: add a per-client fcall kmem_cache
2018-07-31 2:46 ` Matthew Wilcox
@ 2018-07-31 4:17 ` Dominique Martinet
0 siblings, 0 replies; 53+ messages in thread
From: Dominique Martinet @ 2018-07-31 4:17 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: v9fs-developer, Greg Kurz, linux-fsdevel, linux-kernel
Matthew Wilcox wrote on Mon, Jul 30, 2018:
> On Mon, Jul 30, 2018 at 11:34:23AM +0200, Dominique Martinet wrote:
> > -static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize)
> > +static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc,
> > + int alloc_msize)
> > {
> > - fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> > + if (c->fcall_cache && alloc_msize == c->msize)
> > + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
> > + else
> > + fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
>
> Could you simplify this by initialising c->msize to 0 and then this
> can simply be:
>
> > + if (alloc_msize == c->msize)
> ...
Hmm, this is rather tricky with the current flow of things;
p9_client_version() has multiple uses for that msize field.
Basically what happens is:
- init client struct, set clip msize to mount option/transport-specific
max
- p9_client_version() uses current c->msize to send a suggested value
to the server
- p9_client_rpc() uses current c->msize to allocate that first rpc,
this is pretty much hard-coded and will be quite intrusive to make an
exception for
- p9_client_version() looks at the msize the server suggested and clips
c->msize if the reply's is smaller than c->msize
I kind of agree it'd be nice to remove that check being done all the
time for just startup, but I don't see how to do this easily with the
current code.
Making p9_client_version take an extra argument would be easy but we'd
need to actually hardcode in p9_client_rpc that "if the message type is
TVERSION then use [page size or whatever] for allocation" and that kinds
of kills the point... The alternative being having p9_client_rpc takes
the actual size as argument itself but this once again is pretty
intrusive even if it could be done mechanically...
I'll think about this some more
> > +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc)
> > +{
> > + /* sdata can be NULL for interrupted requests in trans_rdma,
> > + * and kmem_cache_free does not do NULL-check for us
> > + */
> > + if (unlikely(!fc->sdata))
> > + return;
> > +
> > + if (c->fcall_cache && fc->capacity == c->msize)
> > + kmem_cache_free(c->fcall_cache, fc->sdata);
> > + else
> > + kfree(fc->sdata);
> > +}
>
> Is it possible for fcall_cache to be allocated before fcall_free is
> called? I'm concerned we might do this:
>
> allocate message A
> allocate message B
> receive response A
> allocate fcall_cache
> receive response B
>
> and then we'd call kmem_cache_free() for something allocated by kmalloc(),
> which works with slab and slub, but doesn't work with slob (alas).
Bleh, I checked this would work for slab and didn't really check
others..
This cannot happen right now because we only return the client struct
from p9_client_create after the first message is done (and, right now,
freed) but when we start adding refcounting to requests it'd be possible
to free the very first response after fcall_cache is allocated with a
"bad" server like syzcaller does sending the version reply before the
request came in.
I can't see any work-around around this other than storing how the fcall
was allocated in the struct itself though...
I guess I might as well do that now, unless you have a better idea.
> > @@ -980,6 +1000,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
> > if (err)
> > goto close_trans;
> >
> > + clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize,
> > + 0, 0, NULL);
> > +
>
> If we have slab merging turned off, or we have two mounts from servers
> with different msizes, we'll end up with two slabs called 9p-fcall-cache.
> I'm OK with that, but are you?
Yeah, the reason I didn't make it global like p9_req_cache is precisely
to get two separate caches if the msizes are different.
I actually considered adding msize to the string with snprintf or
something but someone looking at it through slabinfo or similar will
have the sizes anyway so I don't think this would bring anything, do you
know if/think that tools will choke on multiple caches with the same
name?
I'm not sure about slab merging being disabled though, from the little I
understand I do not see why anyone would do that except for debugging,
and I'm fine with that.
Please let me know if I'm missing something though!
Thanks for the review,
--
Dominique Martinet
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH 2/2] net/9p: add a per-client fcall kmem_cache
2018-07-30 9:34 ` [PATCH 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
2018-07-31 1:18 ` [V9fs-developer] " piaojun
2018-07-31 2:46 ` Matthew Wilcox
@ 2018-08-01 14:28 ` Greg Kurz
2018-08-01 15:22 ` Dominique Martinet
2 siblings, 1 reply; 53+ messages in thread
From: Greg Kurz @ 2018-08-01 14:28 UTC (permalink / raw)
To: Dominique Martinet
Cc: v9fs-developer, linux-fsdevel, Matthew Wilcox, linux-kernel
On Mon, 30 Jul 2018 11:34:23 +0200
Dominique Martinet <asmadeus@codewreck.org> wrote:
> From: Dominique Martinet <dominique.martinet@cea.fr>
>
> Having a specific cache for the fcall allocations helps speed up
> allocations a bit, especially in case of non-"round" msizes.
>
> The caches will automatically be merged if there are multiple caches
> of items with the same size so we do not need to try to share a cache
> between different clients of the same size.
>
> Since the msize is negotiated with the server, only allocate the cache
> after that negotiation has happened - previous allocations or
> allocations of different sizes (e.g. zero-copy fcall) are made with
> kmalloc directly.
>
> Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
> ---
The patch looks good to me. It would need to be rebased when you have fixed
the potential kfree() of stale data in patch 1. Either with an extra goto
label in p9_tag_alloc or by turning p9_fcall_alloc into p9_fcall_alloc_sdata,
both solutions are equivalent.
Just one suggestion, see below.
> include/net/9p/client.h | 2 ++
> net/9p/client.c | 40 ++++++++++++++++++++++++++++++++--------
> net/9p/trans_rdma.c | 2 +-
> 3 files changed, 35 insertions(+), 9 deletions(-)
>
> diff --git a/include/net/9p/client.h b/include/net/9p/client.h
> index 4b4ac1362ad5..8d9bc7402a42 100644
> --- a/include/net/9p/client.h
> +++ b/include/net/9p/client.h
> @@ -123,6 +123,7 @@ struct p9_client {
> struct p9_trans_module *trans_mod;
> enum p9_trans_status status;
> void *trans;
> + struct kmem_cache *fcall_cache;
>
> union {
> struct {
> @@ -230,6 +231,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
> kgid_t gid, struct p9_qid *);
> int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
> int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
> +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc);
> struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
> void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
>
> diff --git a/net/9p/client.c b/net/9p/client.c
> index ba99a94a12c9..215e3b1ed7b4 100644
> --- a/net/9p/client.c
> +++ b/net/9p/client.c
> @@ -231,15 +231,34 @@ static int parse_opts(char *opts, struct p9_client *clnt)
> return ret;
> }
>
> -static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize)
> +static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc,
> + int alloc_msize)
> {
> - fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> + if (c->fcall_cache && alloc_msize == c->msize)
This is a presumably hot path for any request but the initial TVERSION,
you probably want likely() here...
> + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
> + else
> + fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> if (!fc->sdata)
> return -ENOMEM;
> fc->capacity = alloc_msize;
> return 0;
> }
>
> +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc)
> +{
> + /* sdata can be NULL for interrupted requests in trans_rdma,
> + * and kmem_cache_free does not do NULL-check for us
> + */
> + if (unlikely(!fc->sdata))
> + return;
> +
> + if (c->fcall_cache && fc->capacity == c->msize)
... and here as well.
> + kmem_cache_free(c->fcall_cache, fc->sdata);
> + else
> + kfree(fc->sdata);
> +}
> +EXPORT_SYMBOL(p9_fcall_free);
> +
> static struct kmem_cache *p9_req_cache;
>
> /**
> @@ -261,9 +280,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> if (!req)
> return NULL;
>
> - if (p9_fcall_alloc(&req->tc, alloc_msize))
> + if (p9_fcall_alloc(c, &req->tc, alloc_msize))
> goto free;
> - if (p9_fcall_alloc(&req->rc, alloc_msize))
> + if (p9_fcall_alloc(c, &req->rc, alloc_msize))
> goto free;
>
> p9pdu_reset(&req->tc);
> @@ -288,8 +307,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> return req;
>
> free:
> - kfree(req->tc.sdata);
> - kfree(req->rc.sdata);
> + p9_fcall_free(c, &req->tc);
> + p9_fcall_free(c, &req->rc);
> kmem_cache_free(p9_req_cache, req);
> return ERR_PTR(-ENOMEM);
> }
> @@ -333,8 +352,8 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
> spin_lock_irqsave(&c->lock, flags);
> idr_remove(&c->reqs, tag);
> spin_unlock_irqrestore(&c->lock, flags);
> - kfree(r->tc.sdata);
> - kfree(r->rc.sdata);
> + p9_fcall_free(c, &r->tc);
> + p9_fcall_free(c, &r->rc);
> kmem_cache_free(p9_req_cache, r);
> }
>
> @@ -944,6 +963,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
>
> clnt->trans_mod = NULL;
> clnt->trans = NULL;
> + clnt->fcall_cache = NULL;
>
> client_id = utsname()->nodename;
> memcpy(clnt->name, client_id, strlen(client_id) + 1);
> @@ -980,6 +1000,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
> if (err)
> goto close_trans;
>
> + clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize,
> + 0, 0, NULL);
> +
> return clnt;
>
> close_trans:
> @@ -1011,6 +1034,7 @@ void p9_client_destroy(struct p9_client *clnt)
>
> p9_tag_cleanup(clnt);
>
> + kmem_cache_destroy(clnt->fcall_cache);
> kfree(clnt);
> }
> EXPORT_SYMBOL(p9_client_destroy);
> diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
> index c5cac97df7f7..5e43f0a00b3a 100644
> --- a/net/9p/trans_rdma.c
> +++ b/net/9p/trans_rdma.c
> @@ -445,7 +445,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
> if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
> /* Got one! */
> - kfree(req->rc.sdata);
> + p9_fcall_free(client, &req->rc);
> req->rc.sdata = NULL;
> goto dont_need_post_recv;
> } else {
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH 2/2] net/9p: add a per-client fcall kmem_cache
2018-08-01 14:28 ` [V9fs-developer] " Greg Kurz
@ 2018-08-01 15:22 ` Dominique Martinet
0 siblings, 0 replies; 53+ messages in thread
From: Dominique Martinet @ 2018-08-01 15:22 UTC (permalink / raw)
To: Greg Kurz; +Cc: v9fs-developer, linux-fsdevel, Matthew Wilcox, linux-kernel
Greg Kurz wrote on Wed, Aug 01, 2018:
> > diff --git a/net/9p/client.c b/net/9p/client.c
> > index ba99a94a12c9..215e3b1ed7b4 100644
> > --- a/net/9p/client.c
> > +++ b/net/9p/client.c
> > @@ -231,15 +231,34 @@ static int parse_opts(char *opts, struct p9_client *clnt)
> > return ret;
> > }
> >
> > -static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize)
> > +static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc,
> > + int alloc_msize)
> > {
> > - fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> > + if (c->fcall_cache && alloc_msize == c->msize)
>
> This is a presumably hot path for any request but the initial TVERSION,
> you probably want likely() here...
c->fcall_cache is indeed very likely, but alloc_size == c->msize not so
much, as zc requests will be quite common for virtio and will be 4k in
size.
Although with that cache I'm starting to wonder if we should always use
it... Speed-wise if there is no memory pressure the cache is likely
going to be faster.
If there is pressure and the items are reclaimed though that'll bring
some heavier slow-down as it'll need to find bigger memory regions.
I'm not sure which path we should favor, tbh. I'll keep these separate
for now.
For the first part of the check, Matthew suggested trying to trick msize
into a different value to fail this check for the initial TVERSION call,
but even after thinking it a bit I don't really see how to do that
cleanly. I can at least make -that- likely()...
>
> > + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
> > + else
> > + fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> > if (!fc->sdata)
> > return -ENOMEM;
> > fc->capacity = alloc_msize;
> > return 0;
> > }
> >
> > +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc)
> > +{
> > + /* sdata can be NULL for interrupted requests in trans_rdma,
> > + * and kmem_cache_free does not do NULL-check for us
> > + */
> > + if (unlikely(!fc->sdata))
> > + return;
> > +
> > + if (c->fcall_cache && fc->capacity == c->msize)
>
> ... and here as well.
For this one I'll unfortunately need to store in the fc how it has been
allocated as slob doesn't allow to kmem_cache_free() a buffer allocated
by kmalloc and in prevision of refs being refcounted in a hostile world
the initial TVERSION req could be freed after fcall_cache is created :/
That's a bit of a burden but at least will reduce the checks to one here
> > + kmem_cache_free(c->fcall_cache, fc->sdata);
> > + else
> > + kfree(fc->sdata);
> > +}
> > +EXPORT_SYMBOL(p9_fcall_free);
> > +
> > static struct kmem_cache *p9_req_cache;
> >
> > /**
Anyway I've had as many comments as I could hope for, thanks everyone
for the quick review.
I'll send a v2 of both patches tomorrow
--
Dominique
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs
2018-07-30 9:34 ` [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs Dominique Martinet
2018-07-30 9:34 ` [PATCH 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
@ 2018-07-31 0:55 ` piaojun
2018-07-31 1:12 ` Dominique Martinet
2018-08-01 14:14 ` Greg Kurz
2018-08-02 2:37 ` [PATCH v2 " Dominique Martinet
3 siblings, 1 reply; 53+ messages in thread
From: piaojun @ 2018-07-31 0:55 UTC (permalink / raw)
To: Dominique Martinet, v9fs-developer
Cc: linux-fsdevel, Greg Kurz, Matthew Wilcox, linux-kernel
Hi Dominique,
This is really a *big* patch, but the modification seems no harm. And I
suggest running testcases to cover this. Please see my comments below.
On 2018/7/30 17:34, Dominique Martinet wrote:
> From: Dominique Martinet <dominique.martinet@cea.fr>
>
> 'msize' is often a power of two, or at least page-aligned, so avoiding
> an overhead of two dozen bytes for each allocation will help the
> allocator do its work and reduce memory fragmentation.
>
> Suggested-by: Matthew Wilcox <willy@infradead.org>
> Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
> ---
> include/net/9p/client.h | 4 +-
> net/9p/client.c | 160 ++++++++++++++++++++--------------------
> net/9p/trans_fd.c | 12 +--
> net/9p/trans_rdma.c | 29 ++++----
> net/9p/trans_virtio.c | 18 ++---
> net/9p/trans_xen.c | 12 +--
> 6 files changed, 117 insertions(+), 118 deletions(-)
>
> diff --git a/include/net/9p/client.h b/include/net/9p/client.h
> index a4dc42c53d18..4b4ac1362ad5 100644
> --- a/include/net/9p/client.h
> +++ b/include/net/9p/client.h
> @@ -95,8 +95,8 @@ struct p9_req_t {
> int status;
> int t_err;
> wait_queue_head_t wq;
> - struct p9_fcall *tc;
> - struct p9_fcall *rc;
> + struct p9_fcall tc;
> + struct p9_fcall rc;
> void *aux;
> struct list_head req_list;
> };
> diff --git a/net/9p/client.c b/net/9p/client.c
> index 88db45966740..ba99a94a12c9 100644
> --- a/net/9p/client.c
> +++ b/net/9p/client.c
> @@ -231,15 +231,13 @@ static int parse_opts(char *opts, struct p9_client *clnt)
> return ret;
> }
>
> -static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
> +static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize)
> {
> - struct p9_fcall *fc;
> - fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
> - if (!fc)
> - return NULL;
> + fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> + if (!fc->sdata)
> + return -ENOMEM;
> fc->capacity = alloc_msize;
> - fc->sdata = (char *) fc + sizeof(struct p9_fcall);
> - return fc;
> + return 0;
> }
>
> static struct kmem_cache *p9_req_cache;
> @@ -263,13 +261,13 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> if (!req)
> return NULL;
>
> - req->tc = p9_fcall_alloc(alloc_msize);
> - req->rc = p9_fcall_alloc(alloc_msize);
> - if (!req->tc || !req->rc)
> + if (p9_fcall_alloc(&req->tc, alloc_msize))
> + goto free;
> + if (p9_fcall_alloc(&req->rc, alloc_msize))
> goto free;
>
> - p9pdu_reset(req->tc);
> - p9pdu_reset(req->rc);
> + p9pdu_reset(&req->tc);
> + p9pdu_reset(&req->rc);
> req->status = REQ_STATUS_ALLOC;
> init_waitqueue_head(&req->wq);
> INIT_LIST_HEAD(&req->req_list);
> @@ -281,7 +279,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> GFP_NOWAIT);
> else
> tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT);
> - req->tc->tag = tag;
> + req->tc.tag = tag;
> spin_unlock_irq(&c->lock);
> idr_preload_end();
> if (tag < 0)
> @@ -290,8 +288,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> return req;
>
> free:
> - kfree(req->tc);
> - kfree(req->rc);
> + kfree(req->tc.sdata);
> + kfree(req->rc.sdata);
I wonder if we will free a wild pointer as 'sdata' has not been initialized NULL.
> kmem_cache_free(p9_req_cache, req);
> return ERR_PTR(-ENOMEM);
> }
> @@ -329,14 +327,14 @@ EXPORT_SYMBOL(p9_tag_lookup);
> static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
> {
> unsigned long flags;
> - u16 tag = r->tc->tag;
> + u16 tag = r->tc.tag;
>
> p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
> spin_lock_irqsave(&c->lock, flags);
> idr_remove(&c->reqs, tag);
> spin_unlock_irqrestore(&c->lock, flags);
> - kfree(r->tc);
> - kfree(r->rc);
> + kfree(r->tc.sdata);
> + kfree(r->rc.sdata);
> kmem_cache_free(p9_req_cache, r);
> }
>
> @@ -368,7 +366,7 @@ static void p9_tag_cleanup(struct p9_client *c)
> */
> void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
> {
> - p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
> + p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag);
>
> /*
> * This barrier is needed to make sure any change made to req before
> @@ -378,7 +376,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
> req->status = status;
>
> wake_up(&req->wq);
> - p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
> + p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
> }
> EXPORT_SYMBOL(p9_client_cb);
>
> @@ -449,18 +447,18 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
> int err;
> int ecode;
>
> - err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
> - if (req->rc->size >= c->msize) {
> + err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
> + if (req->rc.size >= c->msize) {
> p9_debug(P9_DEBUG_ERROR,
> "requested packet size too big: %d\n",
> - req->rc->size);
> + req->rc.size);
> return -EIO;
> }
> /*
> * dump the response from server
> * This should be after check errors which poplulate pdu_fcall.
> */
> - trace_9p_protocol_dump(c, req->rc);
> + trace_9p_protocol_dump(c, &req->rc);
> if (err) {
> p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
> return err;
> @@ -470,7 +468,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
>
> if (!p9_is_proto_dotl(c)) {
> char *ename;
> - err = p9pdu_readf(req->rc, c->proto_version, "s?d",
> + err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
> &ename, &ecode);
> if (err)
> goto out_err;
> @@ -486,7 +484,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
> }
> kfree(ename);
> } else {
> - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
> + err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
> err = -ecode;
>
> p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
> @@ -520,12 +518,12 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> int8_t type;
> char *ename = NULL;
>
> - err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
> + err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
> /*
> * dump the response from server
> * This should be after parse_header which poplulate pdu_fcall.
> */
> - trace_9p_protocol_dump(c, req->rc);
> + trace_9p_protocol_dump(c, &req->rc);
> if (err) {
> p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
> return err;
> @@ -540,13 +538,13 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> /* 7 = header size for RERROR; */
> int inline_len = in_hdrlen - 7;
>
> - len = req->rc->size - req->rc->offset;
> + len = req->rc.size - req->rc.offset;
> if (len > (P9_ZC_HDR_SZ - 7)) {
> err = -EFAULT;
> goto out_err;
> }
>
> - ename = &req->rc->sdata[req->rc->offset];
> + ename = &req->rc.sdata[req->rc.offset];
> if (len > inline_len) {
> /* We have error in external buffer */
> if (!copy_from_iter_full(ename + inline_len,
> @@ -556,7 +554,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> }
> }
> ename = NULL;
> - err = p9pdu_readf(req->rc, c->proto_version, "s?d",
> + err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
> &ename, &ecode);
> if (err)
> goto out_err;
> @@ -572,7 +570,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> }
> kfree(ename);
> } else {
> - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
> + err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
> err = -ecode;
>
> p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
> @@ -605,7 +603,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
> int16_t oldtag;
> int err;
>
> - err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1);
> + err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1);
> if (err)
> return err;
>
> @@ -649,12 +647,12 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
> return req;
>
> /* marshall the data */
> - p9pdu_prepare(req->tc, req->tc->tag, type);
> - err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
> + p9pdu_prepare(&req->tc, req->tc.tag, type);
> + err = p9pdu_vwritef(&req->tc, c->proto_version, fmt, ap);
> if (err)
> goto reterr;
> - p9pdu_finalize(c, req->tc);
> - trace_9p_client_req(c, type, req->tc->tag);
> + p9pdu_finalize(c, &req->tc);
> + trace_9p_client_req(c, type, req->tc.tag);
> return req;
> reterr:
> p9_free_req(c, req);
> @@ -739,7 +737,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
> goto reterr;
>
> err = p9_check_errors(c, req);
> - trace_9p_client_res(c, type, req->rc->tag, err);
> + trace_9p_client_res(c, type, req->rc.tag, err);
> if (!err)
> return req;
> reterr:
> @@ -821,7 +819,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
> goto reterr;
>
> err = p9_check_zc_errors(c, req, uidata, in_hdrlen);
> - trace_9p_client_res(c, type, req->rc->tag, err);
> + trace_9p_client_res(c, type, req->rc.tag, err);
> if (!err)
> return req;
> reterr:
> @@ -904,10 +902,10 @@ static int p9_client_version(struct p9_client *c)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
> + err = p9pdu_readf(&req->rc, c->proto_version, "ds", &msize, &version);
> if (err) {
> p9_debug(P9_DEBUG_9P, "version error %d\n", err);
> - trace_9p_protocol_dump(c, req->rc);
> + trace_9p_protocol_dump(c, &req->rc);
> goto error;
> }
>
> @@ -1056,9 +1054,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1113,9 +1111,9 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto clunk_fid;
> }
> @@ -1180,9 +1178,9 @@ int p9_client_open(struct p9_fid *fid, int mode)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1224,9 +1222,9 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", qid, &iounit);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1269,9 +1267,9 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1308,9 +1306,9 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1506,10 +1504,10 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
> break;
> }
>
> - *err = p9pdu_readf(req->rc, clnt->proto_version,
> + *err = p9pdu_readf(&req->rc, clnt->proto_version,
> "D", &count, &dataptr);
> if (*err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> break;
> }
> @@ -1579,9 +1577,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
> break;
> }
>
> - *err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
> + *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count);
> if (*err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> break;
> }
> @@ -1623,9 +1621,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1676,9 +1674,9 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1828,11 +1826,11 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
> - &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
> - &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
> + &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
> + &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1936,9 +1934,9 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
> err = PTR_ERR(req);
> goto error;
> }
> - err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto clunk_fid;
> }
> @@ -2024,9 +2022,9 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
> if (rsize < count) {
> @@ -2065,9 +2063,9 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
> @@ -2096,9 +2094,9 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
> @@ -2131,9 +2129,9 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "b", status);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
> @@ -2162,11 +2160,11 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
> - &glock->start, &glock->length, &glock->proc_id,
> - &glock->client_id);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "bqqds", &glock->type,
> + &glock->start, &glock->length, &glock->proc_id,
> + &glock->client_id);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
> @@ -2192,9 +2190,9 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "s", target);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
> index e2ef3c782c53..20f46f13fe83 100644
> --- a/net/9p/trans_fd.c
> +++ b/net/9p/trans_fd.c
> @@ -354,7 +354,7 @@ static void p9_read_work(struct work_struct *work)
> goto error;
> }
>
> - if (m->req->rc == NULL) {
> + if (m->req->rc.sdata == NULL) {
> p9_debug(P9_DEBUG_ERROR,
> "No recv fcall for tag %d (req %p), disconnecting!\n",
> m->rc.tag, m->req);
> @@ -362,7 +362,7 @@ static void p9_read_work(struct work_struct *work)
> err = -EIO;
> goto error;
> }
> - m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
> + m->rc.sdata = m->req->rc.sdata;
> memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
> m->rc.capacity = m->rc.size;
> }
> @@ -372,7 +372,7 @@ static void p9_read_work(struct work_struct *work)
> */
> if ((m->req) && (m->rc.offset == m->rc.capacity)) {
> p9_debug(P9_DEBUG_TRANS, "got new packet\n");
> - m->req->rc->size = m->rc.offset;
> + m->req->rc.size = m->rc.offset;
> spin_lock(&m->client->lock);
> if (m->req->status != REQ_STATUS_ERROR)
> status = REQ_STATUS_RCVD;
> @@ -469,8 +469,8 @@ static void p9_write_work(struct work_struct *work)
> p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
> list_move_tail(&req->req_list, &m->req_list);
>
> - m->wbuf = req->tc->sdata;
> - m->wsize = req->tc->size;
> + m->wbuf = req->tc.sdata;
> + m->wsize = req->tc.size;
> m->wpos = 0;
> spin_unlock(&m->client->lock);
> }
> @@ -663,7 +663,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
> struct p9_conn *m = &ts->conn;
>
> p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
> - m, current, req->tc, req->tc->id);
> + m, current, &req->tc, req->tc.id);
> if (m->err < 0)
> return m->err;
>
> diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
> index 2ab4574183c9..c5cac97df7f7 100644
> --- a/net/9p/trans_rdma.c
> +++ b/net/9p/trans_rdma.c
> @@ -122,7 +122,7 @@ struct p9_rdma_context {
> dma_addr_t busa;
> union {
> struct p9_req_t *req;
> - struct p9_fcall *rc;
> + struct p9_fcall rc;
> };
> };
>
> @@ -320,8 +320,8 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
> if (wc->status != IB_WC_SUCCESS)
> goto err_out;
>
> - c->rc->size = wc->byte_len;
> - err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
> + c->rc.size = wc->byte_len;
> + err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
> if (err)
> goto err_out;
>
> @@ -331,12 +331,13 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
>
> /* Check that we have not yet received a reply for this request.
> */
> - if (unlikely(req->rc)) {
> + if (unlikely(req->rc.sdata)) {
> pr_err("Duplicate reply for request %d", tag);
> goto err_out;
> }
>
> - req->rc = c->rc;
> + req->rc.size = c->rc.size;
> + req->rc.sdata = c->rc.sdata;
> p9_client_cb(client, req, REQ_STATUS_RCVD);
>
> out:
> @@ -361,7 +362,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
> container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
>
> ib_dma_unmap_single(rdma->cm_id->device,
> - c->busa, c->req->tc->size,
> + c->busa, c->req->tc.size,
> DMA_TO_DEVICE);
> up(&rdma->sq_sem);
> kfree(c);
> @@ -401,7 +402,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
> struct ib_sge sge;
>
> c->busa = ib_dma_map_single(rdma->cm_id->device,
> - c->rc->sdata, client->msize,
> + c->rc.sdata, client->msize,
> DMA_FROM_DEVICE);
> if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
> goto error;
> @@ -443,9 +444,9 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> **/
> if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
> if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
> - /* Got one ! */
> - kfree(req->rc);
> - req->rc = NULL;
> + /* Got one! */
> + kfree(req->rc.sdata);
> + req->rc.sdata = NULL;
> goto dont_need_post_recv;
> } else {
> /* We raced and lost. */
> @@ -459,7 +460,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> err = -ENOMEM;
> goto recv_error;
> }
> - rpl_context->rc = req->rc;
> + rpl_context->rc.sdata = req->rc.sdata;
>
> /*
> * Post a receive buffer for this request. We need to ensure
> @@ -479,7 +480,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> goto recv_error;
> }
> /* remove posted receive buffer from request structure */
> - req->rc = NULL;
> + req->rc.sdata = NULL;
>
> dont_need_post_recv:
> /* Post the request */
> @@ -491,7 +492,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> c->req = req;
>
> c->busa = ib_dma_map_single(rdma->cm_id->device,
> - c->req->tc->sdata, c->req->tc->size,
> + c->req->tc.sdata, c->req->tc.size,
> DMA_TO_DEVICE);
> if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
> err = -EIO;
> @@ -501,7 +502,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> c->cqe.done = send_done;
>
> sge.addr = c->busa;
> - sge.length = c->req->tc->size;
> + sge.length = c->req->tc.size;
> sge.lkey = rdma->pd->local_dma_lkey;
>
> wr.next = NULL;
> diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
> index 6265d1d62749..80dd34d8a6af 100644
> --- a/net/9p/trans_virtio.c
> +++ b/net/9p/trans_virtio.c
> @@ -157,7 +157,7 @@ static void req_done(struct virtqueue *vq)
> }
>
> if (len) {
> - req->rc->size = len;
> + req->rc.size = len;
> p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
> }
> }
> @@ -274,12 +274,12 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
> out_sgs = in_sgs = 0;
> /* Handle out VirtIO ring buffers */
> out = pack_sg_list(chan->sg, 0,
> - VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
> + VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
> if (out)
> sgs[out_sgs++] = chan->sg;
>
> in = pack_sg_list(chan->sg, out,
> - VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
> + VIRTQUEUE_NUM, req->rc.sdata, req->rc.capacity);
> if (in)
> sgs[out_sgs + in_sgs++] = chan->sg + out;
>
> @@ -417,15 +417,15 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
> out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
> if (n != outlen) {
> __le32 v = cpu_to_le32(n);
> - memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
> + memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
> outlen = n;
> }
> /* The size field of the message must include the length of the
> * header and the length of the data. We didn't actually know
> * the length of the data until this point so add it in now.
> */
> - sz = cpu_to_le32(req->tc->size + outlen);
> - memcpy(&req->tc->sdata[0], &sz, sizeof(sz));
> + sz = cpu_to_le32(req->tc.size + outlen);
> + memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
> } else if (uidata) {
> int n = p9_get_mapped_pages(chan, &in_pages, uidata,
> inlen, &offs, &need_drop);
> @@ -434,7 +434,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
> in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
> if (n != inlen) {
> __le32 v = cpu_to_le32(n);
> - memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
> + memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
> inlen = n;
> }
> }
> @@ -446,7 +446,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
>
> /* out data */
> out = pack_sg_list(chan->sg, 0,
> - VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
> + VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
>
> if (out)
> sgs[out_sgs++] = chan->sg;
> @@ -465,7 +465,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
> * alloced memory and payload onto the user buffer.
> */
> in = pack_sg_list(chan->sg, out,
> - VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
> + VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len);
> if (in)
> sgs[out_sgs + in_sgs++] = chan->sg + out;
>
> diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
> index c2d54ac76bfd..1a5b38892eb4 100644
> --- a/net/9p/trans_xen.c
> +++ b/net/9p/trans_xen.c
> @@ -141,7 +141,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
> struct xen_9pfs_front_priv *priv = NULL;
> RING_IDX cons, prod, masked_cons, masked_prod;
> unsigned long flags;
> - u32 size = p9_req->tc->size;
> + u32 size = p9_req->tc.size;
> struct xen_9pfs_dataring *ring;
> int num;
>
> @@ -154,7 +154,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
> if (!priv || priv->client != client)
> return -EINVAL;
>
> - num = p9_req->tc->tag % priv->num_rings;
> + num = p9_req->tc.tag % priv->num_rings;
> ring = &priv->rings[num];
>
> again:
> @@ -176,7 +176,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
> masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
> masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
>
> - xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size,
> + xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size,
> &masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
>
> p9_req->status = REQ_STATUS_SENT;
> @@ -229,12 +229,12 @@ static void p9_xen_response(struct work_struct *work)
> continue;
> }
>
> - memcpy(req->rc, &h, sizeof(h));
> - req->rc->offset = 0;
> + memcpy(&req->rc, &h, sizeof(h));
> + req->rc.offset = 0;
>
> masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
> /* Then, read the whole packet (including the header) */
> - xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size,
> + xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size,
> masked_prod, &masked_cons,
> XEN_9PFS_RING_SIZE);
>
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs
2018-07-31 0:55 ` [V9fs-developer] [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs piaojun
@ 2018-07-31 1:12 ` Dominique Martinet
2018-07-31 1:28 ` piaojun
0 siblings, 1 reply; 53+ messages in thread
From: Dominique Martinet @ 2018-07-31 1:12 UTC (permalink / raw)
To: piaojun
Cc: v9fs-developer, linux-fsdevel, Greg Kurz, Matthew Wilcox, linux-kernel
piaojun wrote on Tue, Jul 31, 2018:
> This is really a *big* patch, but the modification seems no harm. And I
> suggest running testcases to cover this. Please see my comments below.
I'm always running tests, but more never hurt - please help ;)
For reference I'm running a subset of cthon04[1], ltp[2] and some custom
tests like these[3][4]
[1] https://fedorapeople.org/cgit/steved/public_git/cthon04.git/
[2] https://github.com/linux-test-project/ltp
[3] https://github.com/phdeniel/sigmund/blob/master/modules/allfs.inc#L208
[4] https://github.com/phdeniel/sigmund/blob/master/modules/allfs.inc#L251
> > [...]
> > @@ -263,13 +261,13 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> > if (!req)
> > return NULL;
> >
> > - req->tc = p9_fcall_alloc(alloc_msize);
> > - req->rc = p9_fcall_alloc(alloc_msize);
> > - if (!req->tc || !req->rc)
> > + if (p9_fcall_alloc(&req->tc, alloc_msize))
> > + goto free;
> > + if (p9_fcall_alloc(&req->rc, alloc_msize))
> > goto free;
> >
> > - p9pdu_reset(req->tc);
> > - p9pdu_reset(req->rc);
> > + p9pdu_reset(&req->tc);
> > + p9pdu_reset(&req->rc);
> > req->status = REQ_STATUS_ALLOC;
> > init_waitqueue_head(&req->wq);
> > INIT_LIST_HEAD(&req->req_list);
> > @@ -281,7 +279,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> > GFP_NOWAIT);
> > else
> > tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT);
> > - req->tc->tag = tag;
> > + req->tc.tag = tag;
> > spin_unlock_irq(&c->lock);
> > idr_preload_end();
> > if (tag < 0)
> > @@ -290,8 +288,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> > return req;
> >
> > free:
> > - kfree(req->tc);
> > - kfree(req->rc);
> > + kfree(req->tc.sdata);
> > + kfree(req->rc.sdata);
>
> I wonder if we will free a wild pointer as 'sdata' has not been initialized NULL.
Good point, it's possible to jump here if the first fcall_alloc failed
since this declustered the two allocations.
Please consider this added to the previous patch (I'll send a v2 after
this has had more time for review, you can find the amended commit in my
9p-test tree meanwhile):
-----8<-----------------------------
diff --git a/net/9p/client.c b/net/9p/client.c
index ba99a94a12c9..fe030ef1c076 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -262,7 +262,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
return NULL;
if (p9_fcall_alloc(&req->tc, alloc_msize))
- goto free;
+ goto free_req;
if (p9_fcall_alloc(&req->rc, alloc_msize))
goto free;
@@ -290,6 +290,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
free:
kfree(req->tc.sdata);
kfree(req->rc.sdata);
+free_req:
kmem_cache_free(p9_req_cache, req);
return ERR_PTR(-ENOMEM);
}
-----8<-----------------------------
The second goto doesn't need changing because rc.sdata will be set to
NULL if the allocation failed
--
Dominique
^ permalink raw reply related [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs
2018-07-31 1:12 ` Dominique Martinet
@ 2018-07-31 1:28 ` piaojun
0 siblings, 0 replies; 53+ messages in thread
From: piaojun @ 2018-07-31 1:28 UTC (permalink / raw)
To: Dominique Martinet
Cc: v9fs-developer, linux-fsdevel, Greg Kurz, Matthew Wilcox, linux-kernel
On 2018/7/31 9:12, Dominique Martinet wrote:
> piaojun wrote on Tue, Jul 31, 2018:
>> This is really a *big* patch, but the modification seems no harm. And I
>> suggest running testcases to cover this. Please see my comments below.
>
> I'm always running tests, but more never hurt - please help ;)
I'm glad to help testing, and actually I'm going to run some testcases in
xfs-test for 9p.
>
> For reference I'm running a subset of cthon04[1], ltp[2] and some custom
> tests like these[3][4]
>
> [1] https://fedorapeople.org/cgit/steved/public_git/cthon04.git/
> [2] https://github.com/linux-test-project/ltp
> [3] https://github.com/phdeniel/sigmund/blob/master/modules/allfs.inc#L208
> [4] https://github.com/phdeniel/sigmund/blob/master/modules/allfs.inc#L251
>
>>> [...]
>>> @@ -263,13 +261,13 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
>>> if (!req)
>>> return NULL;
>>>
>>> - req->tc = p9_fcall_alloc(alloc_msize);
>>> - req->rc = p9_fcall_alloc(alloc_msize);
>>> - if (!req->tc || !req->rc)
>>> + if (p9_fcall_alloc(&req->tc, alloc_msize))
>>> + goto free;
>>> + if (p9_fcall_alloc(&req->rc, alloc_msize))
>>> goto free;
>>>
>>> - p9pdu_reset(req->tc);
>>> - p9pdu_reset(req->rc);
>>> + p9pdu_reset(&req->tc);
>>> + p9pdu_reset(&req->rc);
>>> req->status = REQ_STATUS_ALLOC;
>>> init_waitqueue_head(&req->wq);
>>> INIT_LIST_HEAD(&req->req_list);
>>> @@ -281,7 +279,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
>>> GFP_NOWAIT);
>>> else
>>> tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT);
>>> - req->tc->tag = tag;
>>> + req->tc.tag = tag;
>>> spin_unlock_irq(&c->lock);
>>> idr_preload_end();
>>> if (tag < 0)
>>> @@ -290,8 +288,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
>>> return req;
>>>
>>> free:
>>> - kfree(req->tc);
>>> - kfree(req->rc);
>>> + kfree(req->tc.sdata);
>>> + kfree(req->rc.sdata);
>>
>> I wonder if we will free a wild pointer as 'sdata' has not been initialized NULL.
>
> Good point, it's possible to jump here if the first fcall_alloc failed
> since this declustered the two allocations.
>
> Please consider this added to the previous patch (I'll send a v2 after
> this has had more time for review, you can find the amended commit in my
> 9p-test tree meanwhile):
> -----8<-----------------------------
> diff --git a/net/9p/client.c b/net/9p/client.c
> index ba99a94a12c9..fe030ef1c076 100644
> --- a/net/9p/client.c
> +++ b/net/9p/client.c
> @@ -262,7 +262,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> return NULL;
>
> if (p9_fcall_alloc(&req->tc, alloc_msize))
> - goto free;
> + goto free_req;
> if (p9_fcall_alloc(&req->rc, alloc_msize))
> goto free;
>
> @@ -290,6 +290,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> free:
> kfree(req->tc.sdata);
> kfree(req->rc.sdata);
> +free_req:
> kmem_cache_free(p9_req_cache, req);
> return ERR_PTR(-ENOMEM);
> }
> -----8<-----------------------------
>
> The second goto doesn't need changing because rc.sdata will be set to
> NULL if the allocation failed
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs
2018-07-30 9:34 ` [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs Dominique Martinet
2018-07-30 9:34 ` [PATCH 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
2018-07-31 0:55 ` [V9fs-developer] [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs piaojun
@ 2018-08-01 14:14 ` Greg Kurz
2018-08-01 14:38 ` Dominique Martinet
2018-08-02 2:37 ` [PATCH v2 " Dominique Martinet
3 siblings, 1 reply; 53+ messages in thread
From: Greg Kurz @ 2018-08-01 14:14 UTC (permalink / raw)
To: Dominique Martinet
Cc: v9fs-developer, linux-fsdevel, Matthew Wilcox, linux-kernel
On Mon, 30 Jul 2018 11:34:22 +0200
Dominique Martinet <asmadeus@codewreck.org> wrote:
> From: Dominique Martinet <dominique.martinet@cea.fr>
>
> 'msize' is often a power of two, or at least page-aligned, so avoiding
> an overhead of two dozen bytes for each allocation will help the
> allocator do its work and reduce memory fragmentation.
>
> Suggested-by: Matthew Wilcox <willy@infradead.org>
> Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
> ---
> include/net/9p/client.h | 4 +-
> net/9p/client.c | 160 ++++++++++++++++++++--------------------
> net/9p/trans_fd.c | 12 +--
> net/9p/trans_rdma.c | 29 ++++----
> net/9p/trans_virtio.c | 18 ++---
> net/9p/trans_xen.c | 12 +--
> 6 files changed, 117 insertions(+), 118 deletions(-)
>
> diff --git a/include/net/9p/client.h b/include/net/9p/client.h
> index a4dc42c53d18..4b4ac1362ad5 100644
> --- a/include/net/9p/client.h
> +++ b/include/net/9p/client.h
> @@ -95,8 +95,8 @@ struct p9_req_t {
> int status;
> int t_err;
> wait_queue_head_t wq;
> - struct p9_fcall *tc;
> - struct p9_fcall *rc;
> + struct p9_fcall tc;
> + struct p9_fcall rc;
> void *aux;
> struct list_head req_list;
> };
> diff --git a/net/9p/client.c b/net/9p/client.c
> index 88db45966740..ba99a94a12c9 100644
> --- a/net/9p/client.c
> +++ b/net/9p/client.c
> @@ -231,15 +231,13 @@ static int parse_opts(char *opts, struct p9_client *clnt)
> return ret;
> }
>
> -static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
> +static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize)
> {
> - struct p9_fcall *fc;
> - fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
> - if (!fc)
> - return NULL;
> + fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> + if (!fc->sdata)
> + return -ENOMEM;
> fc->capacity = alloc_msize;
> - fc->sdata = (char *) fc + sizeof(struct p9_fcall);
> - return fc;
> + return 0;
> }
>
> static struct kmem_cache *p9_req_cache;
> @@ -263,13 +261,13 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> if (!req)
> return NULL;
>
> - req->tc = p9_fcall_alloc(alloc_msize);
> - req->rc = p9_fcall_alloc(alloc_msize);
> - if (!req->tc || !req->rc)
> + if (p9_fcall_alloc(&req->tc, alloc_msize))
> + goto free;
> + if (p9_fcall_alloc(&req->rc, alloc_msize))
> goto free;
Hmm... if the first allocation fails, we will kfree() req->rc.sdata.
Are we sure we won't have a stale pointer or uninitialized data in
there ?
And even if we don't with the current code base, this is fragile and
could be easily broken.
I think you should drop this hunk and rather rename p9_fcall_alloc() to
p9_fcall_alloc_sdata() instead, since this is what the function is
actually doing with this patch applied.
>
> - p9pdu_reset(req->tc);
> - p9pdu_reset(req->rc);
> + p9pdu_reset(&req->tc);
> + p9pdu_reset(&req->rc);
> req->status = REQ_STATUS_ALLOC;
> init_waitqueue_head(&req->wq);
> INIT_LIST_HEAD(&req->req_list);
> @@ -281,7 +279,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> GFP_NOWAIT);
> else
> tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT);
> - req->tc->tag = tag;
> + req->tc.tag = tag;
> spin_unlock_irq(&c->lock);
> idr_preload_end();
> if (tag < 0)
> @@ -290,8 +288,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> return req;
>
> free:
> - kfree(req->tc);
> - kfree(req->rc);
> + kfree(req->tc.sdata);
> + kfree(req->rc.sdata);
> kmem_cache_free(p9_req_cache, req);
> return ERR_PTR(-ENOMEM);
> }
> @@ -329,14 +327,14 @@ EXPORT_SYMBOL(p9_tag_lookup);
> static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
> {
> unsigned long flags;
> - u16 tag = r->tc->tag;
> + u16 tag = r->tc.tag;
>
> p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
> spin_lock_irqsave(&c->lock, flags);
> idr_remove(&c->reqs, tag);
> spin_unlock_irqrestore(&c->lock, flags);
> - kfree(r->tc);
> - kfree(r->rc);
> + kfree(r->tc.sdata);
> + kfree(r->rc.sdata);
> kmem_cache_free(p9_req_cache, r);
> }
>
> @@ -368,7 +366,7 @@ static void p9_tag_cleanup(struct p9_client *c)
> */
> void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
> {
> - p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
> + p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag);
>
> /*
> * This barrier is needed to make sure any change made to req before
> @@ -378,7 +376,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
> req->status = status;
>
> wake_up(&req->wq);
> - p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
> + p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
> }
> EXPORT_SYMBOL(p9_client_cb);
>
> @@ -449,18 +447,18 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
> int err;
> int ecode;
>
> - err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
> - if (req->rc->size >= c->msize) {
> + err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
> + if (req->rc.size >= c->msize) {
> p9_debug(P9_DEBUG_ERROR,
> "requested packet size too big: %d\n",
> - req->rc->size);
> + req->rc.size);
> return -EIO;
> }
> /*
> * dump the response from server
> * This should be after check errors which poplulate pdu_fcall.
> */
> - trace_9p_protocol_dump(c, req->rc);
> + trace_9p_protocol_dump(c, &req->rc);
> if (err) {
> p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
> return err;
> @@ -470,7 +468,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
>
> if (!p9_is_proto_dotl(c)) {
> char *ename;
> - err = p9pdu_readf(req->rc, c->proto_version, "s?d",
> + err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
> &ename, &ecode);
> if (err)
> goto out_err;
> @@ -486,7 +484,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
> }
> kfree(ename);
> } else {
> - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
> + err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
> err = -ecode;
>
> p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
> @@ -520,12 +518,12 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> int8_t type;
> char *ename = NULL;
>
> - err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
> + err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
> /*
> * dump the response from server
> * This should be after parse_header which poplulate pdu_fcall.
> */
> - trace_9p_protocol_dump(c, req->rc);
> + trace_9p_protocol_dump(c, &req->rc);
> if (err) {
> p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
> return err;
> @@ -540,13 +538,13 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> /* 7 = header size for RERROR; */
> int inline_len = in_hdrlen - 7;
>
> - len = req->rc->size - req->rc->offset;
> + len = req->rc.size - req->rc.offset;
> if (len > (P9_ZC_HDR_SZ - 7)) {
> err = -EFAULT;
> goto out_err;
> }
>
> - ename = &req->rc->sdata[req->rc->offset];
> + ename = &req->rc.sdata[req->rc.offset];
> if (len > inline_len) {
> /* We have error in external buffer */
> if (!copy_from_iter_full(ename + inline_len,
> @@ -556,7 +554,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> }
> }
> ename = NULL;
> - err = p9pdu_readf(req->rc, c->proto_version, "s?d",
> + err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
> &ename, &ecode);
> if (err)
> goto out_err;
> @@ -572,7 +570,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> }
> kfree(ename);
> } else {
> - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
> + err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
> err = -ecode;
>
> p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
> @@ -605,7 +603,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
> int16_t oldtag;
> int err;
>
> - err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1);
> + err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1);
> if (err)
> return err;
>
> @@ -649,12 +647,12 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
> return req;
>
> /* marshall the data */
> - p9pdu_prepare(req->tc, req->tc->tag, type);
> - err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
> + p9pdu_prepare(&req->tc, req->tc.tag, type);
> + err = p9pdu_vwritef(&req->tc, c->proto_version, fmt, ap);
> if (err)
> goto reterr;
> - p9pdu_finalize(c, req->tc);
> - trace_9p_client_req(c, type, req->tc->tag);
> + p9pdu_finalize(c, &req->tc);
> + trace_9p_client_req(c, type, req->tc.tag);
> return req;
> reterr:
> p9_free_req(c, req);
> @@ -739,7 +737,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
> goto reterr;
>
> err = p9_check_errors(c, req);
> - trace_9p_client_res(c, type, req->rc->tag, err);
> + trace_9p_client_res(c, type, req->rc.tag, err);
> if (!err)
> return req;
> reterr:
> @@ -821,7 +819,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
> goto reterr;
>
> err = p9_check_zc_errors(c, req, uidata, in_hdrlen);
> - trace_9p_client_res(c, type, req->rc->tag, err);
> + trace_9p_client_res(c, type, req->rc.tag, err);
> if (!err)
> return req;
> reterr:
> @@ -904,10 +902,10 @@ static int p9_client_version(struct p9_client *c)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
> + err = p9pdu_readf(&req->rc, c->proto_version, "ds", &msize, &version);
> if (err) {
> p9_debug(P9_DEBUG_9P, "version error %d\n", err);
> - trace_9p_protocol_dump(c, req->rc);
> + trace_9p_protocol_dump(c, &req->rc);
> goto error;
> }
>
> @@ -1056,9 +1054,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1113,9 +1111,9 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto clunk_fid;
> }
> @@ -1180,9 +1178,9 @@ int p9_client_open(struct p9_fid *fid, int mode)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1224,9 +1222,9 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", qid, &iounit);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1269,9 +1267,9 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1308,9 +1306,9 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1506,10 +1504,10 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
> break;
> }
>
> - *err = p9pdu_readf(req->rc, clnt->proto_version,
> + *err = p9pdu_readf(&req->rc, clnt->proto_version,
> "D", &count, &dataptr);
> if (*err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> break;
> }
> @@ -1579,9 +1577,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
> break;
> }
>
> - *err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
> + *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count);
> if (*err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> break;
> }
> @@ -1623,9 +1621,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1676,9 +1674,9 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1828,11 +1826,11 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
> - &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
> - &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
> + &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
> + &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1936,9 +1934,9 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
> err = PTR_ERR(req);
> goto error;
> }
> - err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto clunk_fid;
> }
> @@ -2024,9 +2022,9 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
> if (rsize < count) {
> @@ -2065,9 +2063,9 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
> @@ -2096,9 +2094,9 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
> @@ -2131,9 +2129,9 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "b", status);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
> @@ -2162,11 +2160,11 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
> - &glock->start, &glock->length, &glock->proc_id,
> - &glock->client_id);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "bqqds", &glock->type,
> + &glock->start, &glock->length, &glock->proc_id,
> + &glock->client_id);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
> @@ -2192,9 +2190,9 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "s", target);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
> index e2ef3c782c53..20f46f13fe83 100644
> --- a/net/9p/trans_fd.c
> +++ b/net/9p/trans_fd.c
> @@ -354,7 +354,7 @@ static void p9_read_work(struct work_struct *work)
> goto error;
> }
>
> - if (m->req->rc == NULL) {
> + if (m->req->rc.sdata == NULL) {
> p9_debug(P9_DEBUG_ERROR,
> "No recv fcall for tag %d (req %p), disconnecting!\n",
> m->rc.tag, m->req);
> @@ -362,7 +362,7 @@ static void p9_read_work(struct work_struct *work)
> err = -EIO;
> goto error;
> }
> - m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
> + m->rc.sdata = m->req->rc.sdata;
> memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
> m->rc.capacity = m->rc.size;
> }
> @@ -372,7 +372,7 @@ static void p9_read_work(struct work_struct *work)
> */
> if ((m->req) && (m->rc.offset == m->rc.capacity)) {
> p9_debug(P9_DEBUG_TRANS, "got new packet\n");
> - m->req->rc->size = m->rc.offset;
> + m->req->rc.size = m->rc.offset;
> spin_lock(&m->client->lock);
> if (m->req->status != REQ_STATUS_ERROR)
> status = REQ_STATUS_RCVD;
> @@ -469,8 +469,8 @@ static void p9_write_work(struct work_struct *work)
> p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
> list_move_tail(&req->req_list, &m->req_list);
>
> - m->wbuf = req->tc->sdata;
> - m->wsize = req->tc->size;
> + m->wbuf = req->tc.sdata;
> + m->wsize = req->tc.size;
> m->wpos = 0;
> spin_unlock(&m->client->lock);
> }
> @@ -663,7 +663,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
> struct p9_conn *m = &ts->conn;
>
> p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
> - m, current, req->tc, req->tc->id);
> + m, current, &req->tc, req->tc.id);
> if (m->err < 0)
> return m->err;
>
> diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
> index 2ab4574183c9..c5cac97df7f7 100644
> --- a/net/9p/trans_rdma.c
> +++ b/net/9p/trans_rdma.c
> @@ -122,7 +122,7 @@ struct p9_rdma_context {
> dma_addr_t busa;
> union {
> struct p9_req_t *req;
> - struct p9_fcall *rc;
> + struct p9_fcall rc;
> };
> };
>
> @@ -320,8 +320,8 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
> if (wc->status != IB_WC_SUCCESS)
> goto err_out;
>
> - c->rc->size = wc->byte_len;
> - err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
> + c->rc.size = wc->byte_len;
> + err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
> if (err)
> goto err_out;
>
> @@ -331,12 +331,13 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
>
> /* Check that we have not yet received a reply for this request.
> */
> - if (unlikely(req->rc)) {
> + if (unlikely(req->rc.sdata)) {
> pr_err("Duplicate reply for request %d", tag);
> goto err_out;
> }
>
> - req->rc = c->rc;
> + req->rc.size = c->rc.size;
> + req->rc.sdata = c->rc.sdata;
> p9_client_cb(client, req, REQ_STATUS_RCVD);
>
> out:
> @@ -361,7 +362,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
> container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
>
> ib_dma_unmap_single(rdma->cm_id->device,
> - c->busa, c->req->tc->size,
> + c->busa, c->req->tc.size,
> DMA_TO_DEVICE);
> up(&rdma->sq_sem);
> kfree(c);
> @@ -401,7 +402,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
> struct ib_sge sge;
>
> c->busa = ib_dma_map_single(rdma->cm_id->device,
> - c->rc->sdata, client->msize,
> + c->rc.sdata, client->msize,
> DMA_FROM_DEVICE);
> if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
> goto error;
> @@ -443,9 +444,9 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> **/
> if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
> if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
> - /* Got one ! */
> - kfree(req->rc);
> - req->rc = NULL;
> + /* Got one! */
> + kfree(req->rc.sdata);
> + req->rc.sdata = NULL;
> goto dont_need_post_recv;
> } else {
> /* We raced and lost. */
> @@ -459,7 +460,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> err = -ENOMEM;
> goto recv_error;
> }
> - rpl_context->rc = req->rc;
> + rpl_context->rc.sdata = req->rc.sdata;
>
> /*
> * Post a receive buffer for this request. We need to ensure
> @@ -479,7 +480,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> goto recv_error;
> }
> /* remove posted receive buffer from request structure */
> - req->rc = NULL;
> + req->rc.sdata = NULL;
>
> dont_need_post_recv:
> /* Post the request */
> @@ -491,7 +492,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> c->req = req;
>
> c->busa = ib_dma_map_single(rdma->cm_id->device,
> - c->req->tc->sdata, c->req->tc->size,
> + c->req->tc.sdata, c->req->tc.size,
> DMA_TO_DEVICE);
> if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
> err = -EIO;
> @@ -501,7 +502,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> c->cqe.done = send_done;
>
> sge.addr = c->busa;
> - sge.length = c->req->tc->size;
> + sge.length = c->req->tc.size;
> sge.lkey = rdma->pd->local_dma_lkey;
>
> wr.next = NULL;
> diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
> index 6265d1d62749..80dd34d8a6af 100644
> --- a/net/9p/trans_virtio.c
> +++ b/net/9p/trans_virtio.c
> @@ -157,7 +157,7 @@ static void req_done(struct virtqueue *vq)
> }
>
> if (len) {
> - req->rc->size = len;
> + req->rc.size = len;
> p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
> }
> }
> @@ -274,12 +274,12 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
> out_sgs = in_sgs = 0;
> /* Handle out VirtIO ring buffers */
> out = pack_sg_list(chan->sg, 0,
> - VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
> + VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
> if (out)
> sgs[out_sgs++] = chan->sg;
>
> in = pack_sg_list(chan->sg, out,
> - VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
> + VIRTQUEUE_NUM, req->rc.sdata, req->rc.capacity);
> if (in)
> sgs[out_sgs + in_sgs++] = chan->sg + out;
>
> @@ -417,15 +417,15 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
> out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
> if (n != outlen) {
> __le32 v = cpu_to_le32(n);
> - memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
> + memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
> outlen = n;
> }
> /* The size field of the message must include the length of the
> * header and the length of the data. We didn't actually know
> * the length of the data until this point so add it in now.
> */
> - sz = cpu_to_le32(req->tc->size + outlen);
> - memcpy(&req->tc->sdata[0], &sz, sizeof(sz));
> + sz = cpu_to_le32(req->tc.size + outlen);
> + memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
> } else if (uidata) {
> int n = p9_get_mapped_pages(chan, &in_pages, uidata,
> inlen, &offs, &need_drop);
> @@ -434,7 +434,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
> in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
> if (n != inlen) {
> __le32 v = cpu_to_le32(n);
> - memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
> + memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
> inlen = n;
> }
> }
> @@ -446,7 +446,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
>
> /* out data */
> out = pack_sg_list(chan->sg, 0,
> - VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
> + VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
>
> if (out)
> sgs[out_sgs++] = chan->sg;
> @@ -465,7 +465,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
> * alloced memory and payload onto the user buffer.
> */
> in = pack_sg_list(chan->sg, out,
> - VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
> + VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len);
> if (in)
> sgs[out_sgs + in_sgs++] = chan->sg + out;
>
> diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
> index c2d54ac76bfd..1a5b38892eb4 100644
> --- a/net/9p/trans_xen.c
> +++ b/net/9p/trans_xen.c
> @@ -141,7 +141,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
> struct xen_9pfs_front_priv *priv = NULL;
> RING_IDX cons, prod, masked_cons, masked_prod;
> unsigned long flags;
> - u32 size = p9_req->tc->size;
> + u32 size = p9_req->tc.size;
> struct xen_9pfs_dataring *ring;
> int num;
>
> @@ -154,7 +154,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
> if (!priv || priv->client != client)
> return -EINVAL;
>
> - num = p9_req->tc->tag % priv->num_rings;
> + num = p9_req->tc.tag % priv->num_rings;
> ring = &priv->rings[num];
>
> again:
> @@ -176,7 +176,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
> masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
> masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
>
> - xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size,
> + xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size,
> &masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
>
> p9_req->status = REQ_STATUS_SENT;
> @@ -229,12 +229,12 @@ static void p9_xen_response(struct work_struct *work)
> continue;
> }
>
> - memcpy(req->rc, &h, sizeof(h));
> - req->rc->offset = 0;
> + memcpy(&req->rc, &h, sizeof(h));
> + req->rc.offset = 0;
>
> masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
> /* Then, read the whole packet (including the header) */
> - xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size,
> + xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size,
> masked_prod, &masked_cons,
> XEN_9PFS_RING_SIZE);
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs
2018-08-01 14:14 ` Greg Kurz
@ 2018-08-01 14:38 ` Dominique Martinet
2018-08-01 15:03 ` Greg Kurz
0 siblings, 1 reply; 53+ messages in thread
From: Dominique Martinet @ 2018-08-01 14:38 UTC (permalink / raw)
To: Greg Kurz; +Cc: v9fs-developer, linux-fsdevel, Matthew Wilcox, linux-kernel
Greg Kurz wrote on Wed, Aug 01, 2018:
> > @@ -263,13 +261,13 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> > if (!req)
> > return NULL;
> >
> > - req->tc = p9_fcall_alloc(alloc_msize);
> > - req->rc = p9_fcall_alloc(alloc_msize);
> > - if (!req->tc || !req->rc)
> > + if (p9_fcall_alloc(&req->tc, alloc_msize))
> > + goto free;
> > + if (p9_fcall_alloc(&req->rc, alloc_msize))
> > goto free;
>
> Hmm... if the first allocation fails, we will kfree() req->rc.sdata.
>
> Are we sure we won't have a stale pointer or uninitialized data in
> there ?
Yeah, Jun pointed that out and I have a v2 that only frees as needed
with an extra goto (I sent an incremental diff in my reply to his
comment here[1])
[1] https://lkml.kernel.org/r/20180731011256.GA30388@nautica
> And even if we don't with the current code base, this is fragile and
> could be easily broken.
>
> I think you should drop this hunk and rather rename p9_fcall_alloc() to
> p9_fcall_alloc_sdata() instead, since this is what the function is
> actually doing with this patch applied.
Hmm. I agree the naming isn't accurate, but even if we rename it we'll
need to pass a pointer to fcall as argument as it inits its capacity.
p9_fcall_init(fc, msize) might be simpler?
(I'm not sure I follow what you mean by 'drop this hunk', to be honest,
did you want a single function call to init both maybe?)
--
Dominique
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs
2018-08-01 14:38 ` Dominique Martinet
@ 2018-08-01 15:03 ` Greg Kurz
0 siblings, 0 replies; 53+ messages in thread
From: Greg Kurz @ 2018-08-01 15:03 UTC (permalink / raw)
To: Dominique Martinet
Cc: v9fs-developer, linux-fsdevel, Matthew Wilcox, linux-kernel
On Wed, 1 Aug 2018 16:38:40 +0200
Dominique Martinet <asmadeus@codewreck.org> wrote:
> Greg Kurz wrote on Wed, Aug 01, 2018:
> > > @@ -263,13 +261,13 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> > > if (!req)
> > > return NULL;
> > >
> > > + if (p9_fcall_alloc(&req->tc, alloc_msize))
> > > + goto free;
> > > + if (p9_fcall_alloc(&req->rc, alloc_msize))
> > > goto free;
> >
> > Hmm... if the first allocation fails, we will kfree() req->rc.sdata.
> >
> > Are we sure we won't have a stale pointer or uninitialized data in
> > there ?
>
> Yeah, Jun pointed that out and I have a v2 that only frees as needed
> with an extra goto (I sent an incremental diff in my reply to his
> comment here[1])
>
> [1] https://lkml.kernel.org/r/20180731011256.GA30388@nautica
>
> > And even if we don't with the current code base, this is fragile and
> > could be easily broken.
> >
> > I think you should drop this hunk and rather rename p9_fcall_alloc() to
> > p9_fcall_alloc_sdata() instead, since this is what the function is
> > actually doing with this patch applied.
>
> Hmm. I agree the naming isn't accurate, but even if we rename it we'll
> need to pass a pointer to fcall as argument as it inits its capacity.
> p9_fcall_init(fc, msize) might be simpler?
>
Ah yes you're right... alloc is a bit misleading then. I agree that
p9_fcall_init() is more appropriate in this case.
And maybe you should introduce p9_fcall_fini() or _release() for
completeness. It would only do kfree() for a start, but it would
then evolve to be like the p9_fcall_kfree() function from patch 2.
> (I'm not sure I follow what you mean by 'drop this hunk', to be honest,
> did you want a single function call to init both maybe?)
>
I was meaning "keep the same logic in p9_tag_alloc()", something like:
req->tc.sdata = p9_fcall_alloc_sdata(&req->tc, alloc_msize);
req->rc.sdata = p9_fcall_alloc_sdata(&req->tc, alloc_msize);
if (!req->tc.sdata || !req->rc.sdata)
But I agree the way you did is cleaner.
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH v2 1/2] net/9p: embed fcall in req to round down buffer allocs
2018-07-30 9:34 ` [PATCH 1/2] net/9p: embed fcall in req to round down buffer allocs Dominique Martinet
` (2 preceding siblings ...)
2018-08-01 14:14 ` Greg Kurz
@ 2018-08-02 2:37 ` Dominique Martinet
2018-08-02 2:37 ` [PATCH v2 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
` (2 more replies)
3 siblings, 3 replies; 53+ messages in thread
From: Dominique Martinet @ 2018-08-02 2:37 UTC (permalink / raw)
To: v9fs-developer
Cc: linux-fsdevel, linux-kernel, Dominique Martinet, Matthew Wilcox,
Greg Kurz, Jun Piao
From: Dominique Martinet <dominique.martinet@cea.fr>
'msize' is often a power of two, or at least page-aligned, so avoiding
an overhead of two dozen bytes for each allocation will help the
allocator do its work and reduce memory fragmentation.
Suggested-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Greg Kurz <groug@kaod.org>
Cc: Jun Piao <piaojun@huawei.com>
---
v2:
- Add extra label to not free uninitialized memory on alloc failure
- Rename p9_fcall_alloc to 9p_fcall_init
- Add a p9_fcall_fini function to echo to init
include/net/9p/client.h | 4 +-
net/9p/client.c | 166 ++++++++++++++++++++--------------------
net/9p/trans_fd.c | 12 +--
net/9p/trans_rdma.c | 29 +++----
net/9p/trans_virtio.c | 18 ++---
net/9p/trans_xen.c | 12 +--
6 files changed, 123 insertions(+), 118 deletions(-)
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index a4dc42c53d18..4b4ac1362ad5 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -95,8 +95,8 @@ struct p9_req_t {
int status;
int t_err;
wait_queue_head_t wq;
- struct p9_fcall *tc;
- struct p9_fcall *rc;
+ struct p9_fcall tc;
+ struct p9_fcall rc;
void *aux;
struct list_head req_list;
};
diff --git a/net/9p/client.c b/net/9p/client.c
index 88db45966740..bc40bb11b832 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -231,15 +231,18 @@ static int parse_opts(char *opts, struct p9_client *clnt)
return ret;
}
-static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+static int p9_fcall_init(struct p9_fcall *fc, int alloc_msize)
{
- struct p9_fcall *fc;
- fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
- if (!fc)
- return NULL;
+ fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
+ if (!fc->sdata)
+ return -ENOMEM;
fc->capacity = alloc_msize;
- fc->sdata = (char *) fc + sizeof(struct p9_fcall);
- return fc;
+ return 0;
+}
+
+static void p9_fcall_fini(struct p9_fcall *fc)
+{
+ kfree(fc->sdata);
}
static struct kmem_cache *p9_req_cache;
@@ -263,13 +266,13 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
if (!req)
return NULL;
- req->tc = p9_fcall_alloc(alloc_msize);
- req->rc = p9_fcall_alloc(alloc_msize);
- if (!req->tc || !req->rc)
+ if (p9_fcall_init(&req->tc, alloc_msize))
+ goto free_req;
+ if (p9_fcall_init(&req->rc, alloc_msize))
goto free;
- p9pdu_reset(req->tc);
- p9pdu_reset(req->rc);
+ p9pdu_reset(&req->tc);
+ p9pdu_reset(&req->rc);
req->status = REQ_STATUS_ALLOC;
init_waitqueue_head(&req->wq);
INIT_LIST_HEAD(&req->req_list);
@@ -281,7 +284,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
GFP_NOWAIT);
else
tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT);
- req->tc->tag = tag;
+ req->tc.tag = tag;
spin_unlock_irq(&c->lock);
idr_preload_end();
if (tag < 0)
@@ -290,8 +293,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
return req;
free:
- kfree(req->tc);
- kfree(req->rc);
+ p9_fcall_fini(&req->tc);
+ p9_fcall_fini(&req->rc);
+free_req:
kmem_cache_free(p9_req_cache, req);
return ERR_PTR(-ENOMEM);
}
@@ -329,14 +333,14 @@ EXPORT_SYMBOL(p9_tag_lookup);
static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
{
unsigned long flags;
- u16 tag = r->tc->tag;
+ u16 tag = r->tc.tag;
p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
spin_lock_irqsave(&c->lock, flags);
idr_remove(&c->reqs, tag);
spin_unlock_irqrestore(&c->lock, flags);
- kfree(r->tc);
- kfree(r->rc);
+ p9_fcall_fini(&r->tc);
+ p9_fcall_fini(&r->rc);
kmem_cache_free(p9_req_cache, r);
}
@@ -368,7 +372,7 @@ static void p9_tag_cleanup(struct p9_client *c)
*/
void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
{
- p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
+ p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag);
/*
* This barrier is needed to make sure any change made to req before
@@ -378,7 +382,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
req->status = status;
wake_up(&req->wq);
- p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
+ p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
}
EXPORT_SYMBOL(p9_client_cb);
@@ -449,18 +453,18 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
int err;
int ecode;
- err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
- if (req->rc->size >= c->msize) {
+ err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
+ if (req->rc.size >= c->msize) {
p9_debug(P9_DEBUG_ERROR,
"requested packet size too big: %d\n",
- req->rc->size);
+ req->rc.size);
return -EIO;
}
/*
* dump the response from server
* This should be after check errors which poplulate pdu_fcall.
*/
- trace_9p_protocol_dump(c, req->rc);
+ trace_9p_protocol_dump(c, &req->rc);
if (err) {
p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
return err;
@@ -470,7 +474,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
if (!p9_is_proto_dotl(c)) {
char *ename;
- err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+ err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
&ename, &ecode);
if (err)
goto out_err;
@@ -486,7 +490,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
}
kfree(ename);
} else {
- err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+ err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
err = -ecode;
p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -520,12 +524,12 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
int8_t type;
char *ename = NULL;
- err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+ err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
/*
* dump the response from server
* This should be after parse_header which poplulate pdu_fcall.
*/
- trace_9p_protocol_dump(c, req->rc);
+ trace_9p_protocol_dump(c, &req->rc);
if (err) {
p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
return err;
@@ -540,13 +544,13 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
/* 7 = header size for RERROR; */
int inline_len = in_hdrlen - 7;
- len = req->rc->size - req->rc->offset;
+ len = req->rc.size - req->rc.offset;
if (len > (P9_ZC_HDR_SZ - 7)) {
err = -EFAULT;
goto out_err;
}
- ename = &req->rc->sdata[req->rc->offset];
+ ename = &req->rc.sdata[req->rc.offset];
if (len > inline_len) {
/* We have error in external buffer */
if (!copy_from_iter_full(ename + inline_len,
@@ -556,7 +560,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
}
}
ename = NULL;
- err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+ err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
&ename, &ecode);
if (err)
goto out_err;
@@ -572,7 +576,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
}
kfree(ename);
} else {
- err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+ err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
err = -ecode;
p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -605,7 +609,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
int16_t oldtag;
int err;
- err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1);
+ err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1);
if (err)
return err;
@@ -649,12 +653,12 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
return req;
/* marshall the data */
- p9pdu_prepare(req->tc, req->tc->tag, type);
- err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
+ p9pdu_prepare(&req->tc, req->tc.tag, type);
+ err = p9pdu_vwritef(&req->tc, c->proto_version, fmt, ap);
if (err)
goto reterr;
- p9pdu_finalize(c, req->tc);
- trace_9p_client_req(c, type, req->tc->tag);
+ p9pdu_finalize(c, &req->tc);
+ trace_9p_client_req(c, type, req->tc.tag);
return req;
reterr:
p9_free_req(c, req);
@@ -739,7 +743,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
goto reterr;
err = p9_check_errors(c, req);
- trace_9p_client_res(c, type, req->rc->tag, err);
+ trace_9p_client_res(c, type, req->rc.tag, err);
if (!err)
return req;
reterr:
@@ -821,7 +825,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
goto reterr;
err = p9_check_zc_errors(c, req, uidata, in_hdrlen);
- trace_9p_client_res(c, type, req->rc->tag, err);
+ trace_9p_client_res(c, type, req->rc.tag, err);
if (!err)
return req;
reterr:
@@ -904,10 +908,10 @@ static int p9_client_version(struct p9_client *c)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
+ err = p9pdu_readf(&req->rc, c->proto_version, "ds", &msize, &version);
if (err) {
p9_debug(P9_DEBUG_9P, "version error %d\n", err);
- trace_9p_protocol_dump(c, req->rc);
+ trace_9p_protocol_dump(c, &req->rc);
goto error;
}
@@ -1056,9 +1060,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -1113,9 +1117,9 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto clunk_fid;
}
@@ -1180,9 +1184,9 @@ int p9_client_open(struct p9_fid *fid, int mode)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1224,9 +1228,9 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", qid, &iounit);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1269,9 +1273,9 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1308,9 +1312,9 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1506,10 +1510,10 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
break;
}
- *err = p9pdu_readf(req->rc, clnt->proto_version,
+ *err = p9pdu_readf(&req->rc, clnt->proto_version,
"D", &count, &dataptr);
if (*err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
break;
}
@@ -1579,9 +1583,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
break;
}
- *err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
+ *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count);
if (*err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
break;
}
@@ -1623,9 +1627,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -1676,9 +1680,9 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -1828,11 +1832,11 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
- &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
- &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
+ &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
+ &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -1936,9 +1940,9 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
err = PTR_ERR(req);
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto clunk_fid;
}
@@ -2024,9 +2028,9 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
if (rsize < count) {
@@ -2065,9 +2069,9 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
@@ -2096,9 +2100,9 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
@@ -2131,9 +2135,9 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "b", status);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
@@ -2162,11 +2166,11 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
- &glock->start, &glock->length, &glock->proc_id,
- &glock->client_id);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "bqqds", &glock->type,
+ &glock->start, &glock->length, &glock->proc_id,
+ &glock->client_id);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
@@ -2192,9 +2196,9 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "s", target);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index e2ef3c782c53..20f46f13fe83 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -354,7 +354,7 @@ static void p9_read_work(struct work_struct *work)
goto error;
}
- if (m->req->rc == NULL) {
+ if (m->req->rc.sdata == NULL) {
p9_debug(P9_DEBUG_ERROR,
"No recv fcall for tag %d (req %p), disconnecting!\n",
m->rc.tag, m->req);
@@ -362,7 +362,7 @@ static void p9_read_work(struct work_struct *work)
err = -EIO;
goto error;
}
- m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+ m->rc.sdata = m->req->rc.sdata;
memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
m->rc.capacity = m->rc.size;
}
@@ -372,7 +372,7 @@ static void p9_read_work(struct work_struct *work)
*/
if ((m->req) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new packet\n");
- m->req->rc->size = m->rc.offset;
+ m->req->rc.size = m->rc.offset;
spin_lock(&m->client->lock);
if (m->req->status != REQ_STATUS_ERROR)
status = REQ_STATUS_RCVD;
@@ -469,8 +469,8 @@ static void p9_write_work(struct work_struct *work)
p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
list_move_tail(&req->req_list, &m->req_list);
- m->wbuf = req->tc->sdata;
- m->wsize = req->tc->size;
+ m->wbuf = req->tc.sdata;
+ m->wsize = req->tc.size;
m->wpos = 0;
spin_unlock(&m->client->lock);
}
@@ -663,7 +663,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
struct p9_conn *m = &ts->conn;
p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
- m, current, req->tc, req->tc->id);
+ m, current, &req->tc, req->tc.id);
if (m->err < 0)
return m->err;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 2ab4574183c9..c5cac97df7f7 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -122,7 +122,7 @@ struct p9_rdma_context {
dma_addr_t busa;
union {
struct p9_req_t *req;
- struct p9_fcall *rc;
+ struct p9_fcall rc;
};
};
@@ -320,8 +320,8 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS)
goto err_out;
- c->rc->size = wc->byte_len;
- err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
+ c->rc.size = wc->byte_len;
+ err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
if (err)
goto err_out;
@@ -331,12 +331,13 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
/* Check that we have not yet received a reply for this request.
*/
- if (unlikely(req->rc)) {
+ if (unlikely(req->rc.sdata)) {
pr_err("Duplicate reply for request %d", tag);
goto err_out;
}
- req->rc = c->rc;
+ req->rc.size = c->rc.size;
+ req->rc.sdata = c->rc.sdata;
p9_client_cb(client, req, REQ_STATUS_RCVD);
out:
@@ -361,7 +362,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
ib_dma_unmap_single(rdma->cm_id->device,
- c->busa, c->req->tc->size,
+ c->busa, c->req->tc.size,
DMA_TO_DEVICE);
up(&rdma->sq_sem);
kfree(c);
@@ -401,7 +402,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
struct ib_sge sge;
c->busa = ib_dma_map_single(rdma->cm_id->device,
- c->rc->sdata, client->msize,
+ c->rc.sdata, client->msize,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
goto error;
@@ -443,9 +444,9 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
**/
if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
- /* Got one ! */
- kfree(req->rc);
- req->rc = NULL;
+ /* Got one! */
+ kfree(req->rc.sdata);
+ req->rc.sdata = NULL;
goto dont_need_post_recv;
} else {
/* We raced and lost. */
@@ -459,7 +460,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
err = -ENOMEM;
goto recv_error;
}
- rpl_context->rc = req->rc;
+ rpl_context->rc.sdata = req->rc.sdata;
/*
* Post a receive buffer for this request. We need to ensure
@@ -479,7 +480,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
goto recv_error;
}
/* remove posted receive buffer from request structure */
- req->rc = NULL;
+ req->rc.sdata = NULL;
dont_need_post_recv:
/* Post the request */
@@ -491,7 +492,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
c->req = req;
c->busa = ib_dma_map_single(rdma->cm_id->device,
- c->req->tc->sdata, c->req->tc->size,
+ c->req->tc.sdata, c->req->tc.size,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
err = -EIO;
@@ -501,7 +502,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
c->cqe.done = send_done;
sge.addr = c->busa;
- sge.length = c->req->tc->size;
+ sge.length = c->req->tc.size;
sge.lkey = rdma->pd->local_dma_lkey;
wr.next = NULL;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 6265d1d62749..80dd34d8a6af 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -157,7 +157,7 @@ static void req_done(struct virtqueue *vq)
}
if (len) {
- req->rc->size = len;
+ req->rc.size = len;
p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
}
}
@@ -274,12 +274,12 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
out_sgs = in_sgs = 0;
/* Handle out VirtIO ring buffers */
out = pack_sg_list(chan->sg, 0,
- VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+ VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
if (out)
sgs[out_sgs++] = chan->sg;
in = pack_sg_list(chan->sg, out,
- VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
+ VIRTQUEUE_NUM, req->rc.sdata, req->rc.capacity);
if (in)
sgs[out_sgs + in_sgs++] = chan->sg + out;
@@ -417,15 +417,15 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
if (n != outlen) {
__le32 v = cpu_to_le32(n);
- memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+ memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
outlen = n;
}
/* The size field of the message must include the length of the
* header and the length of the data. We didn't actually know
* the length of the data until this point so add it in now.
*/
- sz = cpu_to_le32(req->tc->size + outlen);
- memcpy(&req->tc->sdata[0], &sz, sizeof(sz));
+ sz = cpu_to_le32(req->tc.size + outlen);
+ memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
} else if (uidata) {
int n = p9_get_mapped_pages(chan, &in_pages, uidata,
inlen, &offs, &need_drop);
@@ -434,7 +434,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
if (n != inlen) {
__le32 v = cpu_to_le32(n);
- memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+ memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
inlen = n;
}
}
@@ -446,7 +446,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
/* out data */
out = pack_sg_list(chan->sg, 0,
- VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+ VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
if (out)
sgs[out_sgs++] = chan->sg;
@@ -465,7 +465,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
* alloced memory and payload onto the user buffer.
*/
in = pack_sg_list(chan->sg, out,
- VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
+ VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len);
if (in)
sgs[out_sgs + in_sgs++] = chan->sg + out;
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index c2d54ac76bfd..1a5b38892eb4 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -141,7 +141,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
struct xen_9pfs_front_priv *priv = NULL;
RING_IDX cons, prod, masked_cons, masked_prod;
unsigned long flags;
- u32 size = p9_req->tc->size;
+ u32 size = p9_req->tc.size;
struct xen_9pfs_dataring *ring;
int num;
@@ -154,7 +154,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
if (!priv || priv->client != client)
return -EINVAL;
- num = p9_req->tc->tag % priv->num_rings;
+ num = p9_req->tc.tag % priv->num_rings;
ring = &priv->rings[num];
again:
@@ -176,7 +176,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
- xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size,
+ xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size,
&masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
p9_req->status = REQ_STATUS_SENT;
@@ -229,12 +229,12 @@ static void p9_xen_response(struct work_struct *work)
continue;
}
- memcpy(req->rc, &h, sizeof(h));
- req->rc->offset = 0;
+ memcpy(&req->rc, &h, sizeof(h));
+ req->rc.offset = 0;
masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
/* Then, read the whole packet (including the header) */
- xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size,
+ xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size,
masked_prod, &masked_cons,
XEN_9PFS_RING_SIZE);
--
2.17.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [PATCH v2 2/2] net/9p: add a per-client fcall kmem_cache
2018-08-02 2:37 ` [PATCH v2 " Dominique Martinet
@ 2018-08-02 2:37 ` Dominique Martinet
2018-08-02 4:58 ` [V9fs-developer] " Dominique Martinet
2018-08-02 9:23 ` [PATCH v2 1/2] net/9p: embed fcall in req to round down buffer allocs Greg Kurz
2018-08-09 14:33 ` [PATCH v3 " Dominique Martinet
2 siblings, 1 reply; 53+ messages in thread
From: Dominique Martinet @ 2018-08-02 2:37 UTC (permalink / raw)
To: v9fs-developer
Cc: linux-fsdevel, linux-kernel, Dominique Martinet, Matthew Wilcox,
Greg Kurz, Jun Piao
From: Dominique Martinet <dominique.martinet@cea.fr>
Having a specific cache for the fcall allocations helps speed up
allocations a bit, especially in case of non-"round" msizes.
The caches will automatically be merged if there are multiple caches
of items with the same size so we do not need to try to share a cache
between different clients of the same size.
Since the msize is negotiated with the server, only allocate the cache
after that negotiation has happened - previous allocations or
allocations of different sizes (e.g. zero-copy fcall) are made with
kmalloc directly.
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Greg Kurz <groug@kaod.org>
Cc: Jun Piao <piaojun@huawei.com>
---
v2:
- Add a pointer to the cache in p9_fcall to make sure a buffer
allocated with kmalloc gets freed with kfree and vice-versa
This could have been smaller with a bool but this spares having
to look at the client so looked a bit cleaner, I'm expecting this
patch will need a v3 one way or another so I went for the bolder
approach - please say if you think a smaller item is better ; I *think*
nothing relies on this being ordered the same way as the data on the
wire (struct isn't packed anyway) so we can move id after tag and add
another u8 to not have any overhead
- added likely() to cache existence check in allocation, but nothing
for msize check or free because of zc request being of different size
include/net/9p/9p.h | 1 +
include/net/9p/client.h | 2 ++
net/9p/client.c | 34 ++++++++++++++++++++++++++++------
net/9p/trans_rdma.c | 2 +-
4 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index e23896116d9a..f1d2ed3cee61 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -558,6 +558,7 @@ struct p9_fcall {
size_t offset;
size_t capacity;
+ struct kmem_cache *cache;
u8 *sdata;
};
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 4b4ac1362ad5..735f3979d559 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -123,6 +123,7 @@ struct p9_client {
struct p9_trans_module *trans_mod;
enum p9_trans_status status;
void *trans;
+ struct kmem_cache *fcall_cache;
union {
struct {
@@ -230,6 +231,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
kgid_t gid, struct p9_qid *);
int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
+void p9_fcall_fini(struct p9_fcall *fc);
struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
diff --git a/net/9p/client.c b/net/9p/client.c
index bc40bb11b832..0e0f8bb3fd3c 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -231,19 +231,36 @@ static int parse_opts(char *opts, struct p9_client *clnt)
return ret;
}
-static int p9_fcall_init(struct p9_fcall *fc, int alloc_msize)
+static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
+ int alloc_msize)
{
- fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
+ if (likely(c->fcall_cache) && alloc_msize == c->msize) {
+ fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
+ fc->cache = c->fcall_cache;
+ } else {
+ fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
+ fc->cache = NULL;
+ }
if (!fc->sdata)
return -ENOMEM;
fc->capacity = alloc_msize;
return 0;
}
-static void p9_fcall_fini(struct p9_fcall *fc)
+void p9_fcall_fini(struct p9_fcall *fc)
{
- kfree(fc->sdata);
+ /* sdata can be NULL for interrupted requests in trans_rdma,
+ * and kmem_cache_free does not do NULL-check for us
+ */
+ if (unlikely(!fc->sdata))
+ return;
+
+ if (fc->cache)
+ kmem_cache_free(fc->cache, fc->sdata);
+ else
+ kfree(fc->sdata);
}
+EXPORT_SYMBOL(p9_fcall_fini);
static struct kmem_cache *p9_req_cache;
@@ -266,9 +283,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
if (!req)
return NULL;
- if (p9_fcall_init(&req->tc, alloc_msize))
+ if (p9_fcall_init(c, &req->tc, alloc_msize))
goto free_req;
- if (p9_fcall_init(&req->rc, alloc_msize))
+ if (p9_fcall_init(c, &req->rc, alloc_msize))
goto free;
p9pdu_reset(&req->tc);
@@ -950,6 +967,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
clnt->trans_mod = NULL;
clnt->trans = NULL;
+ clnt->fcall_cache = NULL;
client_id = utsname()->nodename;
memcpy(clnt->name, client_id, strlen(client_id) + 1);
@@ -986,6 +1004,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (err)
goto close_trans;
+ clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize,
+ 0, 0, NULL);
+
return clnt;
close_trans:
@@ -1017,6 +1038,7 @@ void p9_client_destroy(struct p9_client *clnt)
p9_tag_cleanup(clnt);
+ kmem_cache_destroy(clnt->fcall_cache);
kfree(clnt);
}
EXPORT_SYMBOL(p9_client_destroy);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index c5cac97df7f7..c60655c90c9e 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -445,7 +445,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
/* Got one! */
- kfree(req->rc.sdata);
+ p9_fcall_fini(&req->rc);
req->rc.sdata = NULL;
goto dont_need_post_recv;
} else {
--
2.17.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* Re: [V9fs-developer] [PATCH v2 2/2] net/9p: add a per-client fcall kmem_cache
2018-08-02 2:37 ` [PATCH v2 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
@ 2018-08-02 4:58 ` Dominique Martinet
0 siblings, 0 replies; 53+ messages in thread
From: Dominique Martinet @ 2018-08-02 4:58 UTC (permalink / raw)
To: v9fs-developer
Cc: Dominique Martinet, linux-kernel, Matthew Wilcox, Greg Kurz,
linux-fsdevel
Dominique Martinet wrote on Thu, Aug 02, 2018:
> [...]
> + clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize,
> + 0, 0, NULL);
Well, my gut feeling that I'd need a v3 was right, after a bit more time
testing (slightly different setup), I got this warning:
Bad or missing usercopy whitelist? Kernel memory overwrite attempt
detected to SLUB object '9p-fcall-cache' (offset 23, size 55078)!
So this kmem_cache_create needs to change to kmem_cache_create_usercopy,
but I'm not sure how to best specify the range -- this warning was about
writing data to the buffer so a TWRITE:
size[4] Twrite tag[2] fid[4] offset[8] count[4] data[count]
(the data[count] part comes from user - this matches 4+1+2+4+8+4 = 23)
Similarily RREAD has data that can be copied to userspace, which has a
similar check:
size[4] Rread tag[2] count[4] data[count]
So I could hardcode offset = 4+1+2+4=11, usercopy size = msize - 11...
We have some P9_*HDR*SZ macros but none are really appropriate:
---
/* ample room for Twrite/Rread header */
#define P9_IOHDRSZ 24
/* Room for readdir header */
#define P9_READDIRHDRSZ 24
/* size of header for zero copy read/write */
#define P9_ZC_HDR_SZ 4096
---
It's actually been bugging me for a while that I see hardcoded '7's for
9p main header size (len + msg type + tag) everywhere, I could add a
first P9_HDRSZ of 7 and use that + 4?
--
Dominique
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH v2 1/2] net/9p: embed fcall in req to round down buffer allocs
2018-08-02 2:37 ` [PATCH v2 " Dominique Martinet
2018-08-02 2:37 ` [PATCH v2 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
@ 2018-08-02 9:23 ` Greg Kurz
2018-08-02 22:03 ` Dominique Martinet
2018-08-09 14:33 ` [PATCH v3 " Dominique Martinet
2 siblings, 1 reply; 53+ messages in thread
From: Greg Kurz @ 2018-08-02 9:23 UTC (permalink / raw)
To: Dominique Martinet
Cc: v9fs-developer, linux-fsdevel, linux-kernel, Dominique Martinet,
Matthew Wilcox, Jun Piao
On Thu, 2 Aug 2018 04:37:31 +0200
Dominique Martinet <asmadeus@codewreck.org> wrote:
> From: Dominique Martinet <dominique.martinet@cea.fr>
>
> 'msize' is often a power of two, or at least page-aligned, so avoiding
> an overhead of two dozen bytes for each allocation will help the
> allocator do its work and reduce memory fragmentation.
>
> Suggested-by: Matthew Wilcox <willy@infradead.org>
> Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Greg Kurz <groug@kaod.org>
> Cc: Jun Piao <piaojun@huawei.com>
> ---
>
> v2:
> - Add extra label to not free uninitialized memory on alloc failure
> - Rename p9_fcall_alloc to 9p_fcall_init
> - Add a p9_fcall_fini function to echo to init
>
[...]
> diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
> index 2ab4574183c9..c5cac97df7f7 100644
> --- a/net/9p/trans_rdma.c
> +++ b/net/9p/trans_rdma.c
> @@ -122,7 +122,7 @@ struct p9_rdma_context {
> dma_addr_t busa;
> union {
> struct p9_req_t *req;
> - struct p9_fcall *rc;
> + struct p9_fcall rc;
> };
> };
>
> @@ -320,8 +320,8 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
> if (wc->status != IB_WC_SUCCESS)
> goto err_out;
>
> - c->rc->size = wc->byte_len;
> - err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
> + c->rc.size = wc->byte_len;
> + err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
> if (err)
> goto err_out;
>
> @@ -331,12 +331,13 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
>
> /* Check that we have not yet received a reply for this request.
> */
> - if (unlikely(req->rc)) {
> + if (unlikely(req->rc.sdata)) {
> pr_err("Duplicate reply for request %d", tag);
> goto err_out;
> }
>
> - req->rc = c->rc;
> + req->rc.size = c->rc.size;
> + req->rc.sdata = c->rc.sdata;
> p9_client_cb(client, req, REQ_STATUS_RCVD);
>
> out:
> @@ -361,7 +362,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
> container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
>
> ib_dma_unmap_single(rdma->cm_id->device,
> - c->busa, c->req->tc->size,
> + c->busa, c->req->tc.size,
> DMA_TO_DEVICE);
> up(&rdma->sq_sem);
> kfree(c);
> @@ -401,7 +402,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
> struct ib_sge sge;
>
> c->busa = ib_dma_map_single(rdma->cm_id->device,
> - c->rc->sdata, client->msize,
> + c->rc.sdata, client->msize,
> DMA_FROM_DEVICE);
> if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
> goto error;
> @@ -443,9 +444,9 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> **/
> if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
> if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
> - /* Got one ! */
> - kfree(req->rc);
> - req->rc = NULL;
> + /* Got one! */
> + kfree(req->rc.sdata);
Shouldn't this be p9_fcall_fini(&req->rc) ?
The rest looks good, so, with that fixed, you can add:
Reviewed-by: Greg Kurz <groug@kaod.org>
> + req->rc.sdata = NULL;
> goto dont_need_post_recv;
> } else {
> /* We raced and lost. */
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH v2 1/2] net/9p: embed fcall in req to round down buffer allocs
2018-08-02 9:23 ` [PATCH v2 1/2] net/9p: embed fcall in req to round down buffer allocs Greg Kurz
@ 2018-08-02 22:03 ` Dominique Martinet
0 siblings, 0 replies; 53+ messages in thread
From: Dominique Martinet @ 2018-08-02 22:03 UTC (permalink / raw)
To: Greg Kurz
Cc: v9fs-developer, linux-fsdevel, linux-kernel, Dominique Martinet,
Matthew Wilcox, Jun Piao
Greg Kurz wrote on Thu, Aug 02, 2018:
> > @@ -443,9 +444,9 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> > **/
> > if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
> > if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
> > - /* Got one ! */
> > - kfree(req->rc);
> > - req->rc = NULL;
> > + /* Got one! */
> > + kfree(req->rc.sdata);
>
> Shouldn't this be p9_fcall_fini(&req->rc) ?
Right, I failed at bookkeeping, I changed that in the next patch but it
should have been done now.
Will add p9_fcall_fini to headers/export in this patch instead of the
next
> The rest looks good, so, with that fixed, you can add:
>
> Reviewed-by: Greg Kurz <groug@kaod.org>
Thanks!
--
Dominique
^ permalink raw reply [flat|nested] 53+ messages in thread
* [PATCH v3 1/2] net/9p: embed fcall in req to round down buffer allocs
2018-08-02 2:37 ` [PATCH v2 " Dominique Martinet
2018-08-02 2:37 ` [PATCH v2 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
2018-08-02 9:23 ` [PATCH v2 1/2] net/9p: embed fcall in req to round down buffer allocs Greg Kurz
@ 2018-08-09 14:33 ` Dominique Martinet
2018-08-09 14:33 ` [PATCH v3 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
2018-08-10 0:47 ` [PATCH v3 1/2] net/9p: embed fcall in req to round down buffer allocs piaojun
2 siblings, 2 replies; 53+ messages in thread
From: Dominique Martinet @ 2018-08-09 14:33 UTC (permalink / raw)
To: v9fs-developer
Cc: Dominique Martinet, linux-kernel, linux-fsdevel, Greg Kurz,
Matthew Wilcox, Jun Piao
From: Dominique Martinet <dominique.martinet@cea.fr>
'msize' is often a power of two, or at least page-aligned, so avoiding
an overhead of two dozen bytes for each allocation will help the
allocator do its work and reduce memory fragmentation.
Suggested-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Reviewed-by: Greg Kurz <groug@kaod.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jun Piao <piaojun@huawei.com>
---
v2:
- Add extra label to not free uninitialized memory on alloc failure
- Rename p9_fcall_alloc to 9p_fcall_init
- Add a p9_fcall_fini function to echo to init
v3:
- use p9_call_fini() in rdma_request() instead of kfree, the code
was in the following patch in previous version
include/net/9p/client.h | 5 +-
net/9p/client.c | 167 +++++++++++++++++++++-------------------
net/9p/trans_fd.c | 12 +--
net/9p/trans_rdma.c | 29 +++----
net/9p/trans_virtio.c | 18 ++---
net/9p/trans_xen.c | 12 +--
6 files changed, 125 insertions(+), 118 deletions(-)
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index a4dc42c53d18..c2671d40bb6b 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -95,8 +95,8 @@ struct p9_req_t {
int status;
int t_err;
wait_queue_head_t wq;
- struct p9_fcall *tc;
- struct p9_fcall *rc;
+ struct p9_fcall tc;
+ struct p9_fcall rc;
void *aux;
struct list_head req_list;
};
@@ -230,6 +230,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
kgid_t gid, struct p9_qid *);
int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
+void p9_fcall_fini(struct p9_fcall *fc);
struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
diff --git a/net/9p/client.c b/net/9p/client.c
index 88db45966740..ed78751aee7c 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -231,16 +231,20 @@ static int parse_opts(char *opts, struct p9_client *clnt)
return ret;
}
-static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+static int p9_fcall_init(struct p9_fcall *fc, int alloc_msize)
{
- struct p9_fcall *fc;
- fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
- if (!fc)
- return NULL;
+ fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
+ if (!fc->sdata)
+ return -ENOMEM;
fc->capacity = alloc_msize;
- fc->sdata = (char *) fc + sizeof(struct p9_fcall);
- return fc;
+ return 0;
+}
+
+void p9_fcall_fini(struct p9_fcall *fc)
+{
+ kfree(fc->sdata);
}
+EXPORT_SYMBOL(p9_fcall_fini);
static struct kmem_cache *p9_req_cache;
@@ -263,13 +267,13 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
if (!req)
return NULL;
- req->tc = p9_fcall_alloc(alloc_msize);
- req->rc = p9_fcall_alloc(alloc_msize);
- if (!req->tc || !req->rc)
+ if (p9_fcall_init(&req->tc, alloc_msize))
+ goto free_req;
+ if (p9_fcall_init(&req->rc, alloc_msize))
goto free;
- p9pdu_reset(req->tc);
- p9pdu_reset(req->rc);
+ p9pdu_reset(&req->tc);
+ p9pdu_reset(&req->rc);
req->status = REQ_STATUS_ALLOC;
init_waitqueue_head(&req->wq);
INIT_LIST_HEAD(&req->req_list);
@@ -281,7 +285,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
GFP_NOWAIT);
else
tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT);
- req->tc->tag = tag;
+ req->tc.tag = tag;
spin_unlock_irq(&c->lock);
idr_preload_end();
if (tag < 0)
@@ -290,8 +294,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
return req;
free:
- kfree(req->tc);
- kfree(req->rc);
+ p9_fcall_fini(&req->tc);
+ p9_fcall_fini(&req->rc);
+free_req:
kmem_cache_free(p9_req_cache, req);
return ERR_PTR(-ENOMEM);
}
@@ -329,14 +334,14 @@ EXPORT_SYMBOL(p9_tag_lookup);
static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
{
unsigned long flags;
- u16 tag = r->tc->tag;
+ u16 tag = r->tc.tag;
p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
spin_lock_irqsave(&c->lock, flags);
idr_remove(&c->reqs, tag);
spin_unlock_irqrestore(&c->lock, flags);
- kfree(r->tc);
- kfree(r->rc);
+ p9_fcall_fini(&r->tc);
+ p9_fcall_fini(&r->rc);
kmem_cache_free(p9_req_cache, r);
}
@@ -368,7 +373,7 @@ static void p9_tag_cleanup(struct p9_client *c)
*/
void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
{
- p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
+ p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag);
/*
* This barrier is needed to make sure any change made to req before
@@ -378,7 +383,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
req->status = status;
wake_up(&req->wq);
- p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
+ p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
}
EXPORT_SYMBOL(p9_client_cb);
@@ -449,18 +454,18 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
int err;
int ecode;
- err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
- if (req->rc->size >= c->msize) {
+ err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
+ if (req->rc.size >= c->msize) {
p9_debug(P9_DEBUG_ERROR,
"requested packet size too big: %d\n",
- req->rc->size);
+ req->rc.size);
return -EIO;
}
/*
* dump the response from server
* This should be after check errors which poplulate pdu_fcall.
*/
- trace_9p_protocol_dump(c, req->rc);
+ trace_9p_protocol_dump(c, &req->rc);
if (err) {
p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
return err;
@@ -470,7 +475,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
if (!p9_is_proto_dotl(c)) {
char *ename;
- err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+ err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
&ename, &ecode);
if (err)
goto out_err;
@@ -486,7 +491,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
}
kfree(ename);
} else {
- err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+ err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
err = -ecode;
p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -520,12 +525,12 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
int8_t type;
char *ename = NULL;
- err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
+ err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
/*
* dump the response from server
* This should be after parse_header which poplulate pdu_fcall.
*/
- trace_9p_protocol_dump(c, req->rc);
+ trace_9p_protocol_dump(c, &req->rc);
if (err) {
p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
return err;
@@ -540,13 +545,13 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
/* 7 = header size for RERROR; */
int inline_len = in_hdrlen - 7;
- len = req->rc->size - req->rc->offset;
+ len = req->rc.size - req->rc.offset;
if (len > (P9_ZC_HDR_SZ - 7)) {
err = -EFAULT;
goto out_err;
}
- ename = &req->rc->sdata[req->rc->offset];
+ ename = &req->rc.sdata[req->rc.offset];
if (len > inline_len) {
/* We have error in external buffer */
if (!copy_from_iter_full(ename + inline_len,
@@ -556,7 +561,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
}
}
ename = NULL;
- err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+ err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
&ename, &ecode);
if (err)
goto out_err;
@@ -572,7 +577,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
}
kfree(ename);
} else {
- err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+ err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
err = -ecode;
p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -605,7 +610,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
int16_t oldtag;
int err;
- err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1);
+ err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1);
if (err)
return err;
@@ -649,12 +654,12 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
return req;
/* marshall the data */
- p9pdu_prepare(req->tc, req->tc->tag, type);
- err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
+ p9pdu_prepare(&req->tc, req->tc.tag, type);
+ err = p9pdu_vwritef(&req->tc, c->proto_version, fmt, ap);
if (err)
goto reterr;
- p9pdu_finalize(c, req->tc);
- trace_9p_client_req(c, type, req->tc->tag);
+ p9pdu_finalize(c, &req->tc);
+ trace_9p_client_req(c, type, req->tc.tag);
return req;
reterr:
p9_free_req(c, req);
@@ -739,7 +744,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
goto reterr;
err = p9_check_errors(c, req);
- trace_9p_client_res(c, type, req->rc->tag, err);
+ trace_9p_client_res(c, type, req->rc.tag, err);
if (!err)
return req;
reterr:
@@ -821,7 +826,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
goto reterr;
err = p9_check_zc_errors(c, req, uidata, in_hdrlen);
- trace_9p_client_res(c, type, req->rc->tag, err);
+ trace_9p_client_res(c, type, req->rc.tag, err);
if (!err)
return req;
reterr:
@@ -904,10 +909,10 @@ static int p9_client_version(struct p9_client *c)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
+ err = p9pdu_readf(&req->rc, c->proto_version, "ds", &msize, &version);
if (err) {
p9_debug(P9_DEBUG_9P, "version error %d\n", err);
- trace_9p_protocol_dump(c, req->rc);
+ trace_9p_protocol_dump(c, &req->rc);
goto error;
}
@@ -1056,9 +1061,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -1113,9 +1118,9 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto clunk_fid;
}
@@ -1180,9 +1185,9 @@ int p9_client_open(struct p9_fid *fid, int mode)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1224,9 +1229,9 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", qid, &iounit);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1269,9 +1274,9 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1308,9 +1313,9 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
@@ -1506,10 +1511,10 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
break;
}
- *err = p9pdu_readf(req->rc, clnt->proto_version,
+ *err = p9pdu_readf(&req->rc, clnt->proto_version,
"D", &count, &dataptr);
if (*err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
break;
}
@@ -1579,9 +1584,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
break;
}
- *err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
+ *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count);
if (*err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
break;
}
@@ -1623,9 +1628,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -1676,9 +1681,9 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -1828,11 +1833,11 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
- &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
- &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
+ &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
+ &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto error;
}
@@ -1936,9 +1941,9 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
err = PTR_ERR(req);
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
p9_free_req(clnt, req);
goto clunk_fid;
}
@@ -2024,9 +2029,9 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
goto error;
}
- err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto free_and_error;
}
if (rsize < count) {
@@ -2065,9 +2070,9 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
@@ -2096,9 +2101,9 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
@@ -2131,9 +2136,9 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "b", status);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
@@ -2162,11 +2167,11 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
- &glock->start, &glock->length, &glock->proc_id,
- &glock->client_id);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "bqqds", &glock->type,
+ &glock->start, &glock->length, &glock->proc_id,
+ &glock->client_id);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
@@ -2192,9 +2197,9 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
if (IS_ERR(req))
return PTR_ERR(req);
- err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "s", target);
if (err) {
- trace_9p_protocol_dump(clnt, req->rc);
+ trace_9p_protocol_dump(clnt, &req->rc);
goto error;
}
p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index e2ef3c782c53..20f46f13fe83 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -354,7 +354,7 @@ static void p9_read_work(struct work_struct *work)
goto error;
}
- if (m->req->rc == NULL) {
+ if (m->req->rc.sdata == NULL) {
p9_debug(P9_DEBUG_ERROR,
"No recv fcall for tag %d (req %p), disconnecting!\n",
m->rc.tag, m->req);
@@ -362,7 +362,7 @@ static void p9_read_work(struct work_struct *work)
err = -EIO;
goto error;
}
- m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+ m->rc.sdata = m->req->rc.sdata;
memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
m->rc.capacity = m->rc.size;
}
@@ -372,7 +372,7 @@ static void p9_read_work(struct work_struct *work)
*/
if ((m->req) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new packet\n");
- m->req->rc->size = m->rc.offset;
+ m->req->rc.size = m->rc.offset;
spin_lock(&m->client->lock);
if (m->req->status != REQ_STATUS_ERROR)
status = REQ_STATUS_RCVD;
@@ -469,8 +469,8 @@ static void p9_write_work(struct work_struct *work)
p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
list_move_tail(&req->req_list, &m->req_list);
- m->wbuf = req->tc->sdata;
- m->wsize = req->tc->size;
+ m->wbuf = req->tc.sdata;
+ m->wsize = req->tc.size;
m->wpos = 0;
spin_unlock(&m->client->lock);
}
@@ -663,7 +663,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
struct p9_conn *m = &ts->conn;
p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
- m, current, req->tc, req->tc->id);
+ m, current, &req->tc, req->tc.id);
if (m->err < 0)
return m->err;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 2ab4574183c9..c60655c90c9e 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -122,7 +122,7 @@ struct p9_rdma_context {
dma_addr_t busa;
union {
struct p9_req_t *req;
- struct p9_fcall *rc;
+ struct p9_fcall rc;
};
};
@@ -320,8 +320,8 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS)
goto err_out;
- c->rc->size = wc->byte_len;
- err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
+ c->rc.size = wc->byte_len;
+ err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
if (err)
goto err_out;
@@ -331,12 +331,13 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
/* Check that we have not yet received a reply for this request.
*/
- if (unlikely(req->rc)) {
+ if (unlikely(req->rc.sdata)) {
pr_err("Duplicate reply for request %d", tag);
goto err_out;
}
- req->rc = c->rc;
+ req->rc.size = c->rc.size;
+ req->rc.sdata = c->rc.sdata;
p9_client_cb(client, req, REQ_STATUS_RCVD);
out:
@@ -361,7 +362,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
ib_dma_unmap_single(rdma->cm_id->device,
- c->busa, c->req->tc->size,
+ c->busa, c->req->tc.size,
DMA_TO_DEVICE);
up(&rdma->sq_sem);
kfree(c);
@@ -401,7 +402,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
struct ib_sge sge;
c->busa = ib_dma_map_single(rdma->cm_id->device,
- c->rc->sdata, client->msize,
+ c->rc.sdata, client->msize,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
goto error;
@@ -443,9 +444,9 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
**/
if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
- /* Got one ! */
- kfree(req->rc);
- req->rc = NULL;
+ /* Got one! */
+ p9_fcall_fini(&req->rc);
+ req->rc.sdata = NULL;
goto dont_need_post_recv;
} else {
/* We raced and lost. */
@@ -459,7 +460,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
err = -ENOMEM;
goto recv_error;
}
- rpl_context->rc = req->rc;
+ rpl_context->rc.sdata = req->rc.sdata;
/*
* Post a receive buffer for this request. We need to ensure
@@ -479,7 +480,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
goto recv_error;
}
/* remove posted receive buffer from request structure */
- req->rc = NULL;
+ req->rc.sdata = NULL;
dont_need_post_recv:
/* Post the request */
@@ -491,7 +492,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
c->req = req;
c->busa = ib_dma_map_single(rdma->cm_id->device,
- c->req->tc->sdata, c->req->tc->size,
+ c->req->tc.sdata, c->req->tc.size,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
err = -EIO;
@@ -501,7 +502,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
c->cqe.done = send_done;
sge.addr = c->busa;
- sge.length = c->req->tc->size;
+ sge.length = c->req->tc.size;
sge.lkey = rdma->pd->local_dma_lkey;
wr.next = NULL;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 7728b0acde09..3dd6ce1c0f2d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -155,7 +155,7 @@ static void req_done(struct virtqueue *vq)
}
if (len) {
- req->rc->size = len;
+ req->rc.size = len;
p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
}
}
@@ -273,12 +273,12 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
out_sgs = in_sgs = 0;
/* Handle out VirtIO ring buffers */
out = pack_sg_list(chan->sg, 0,
- VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+ VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
if (out)
sgs[out_sgs++] = chan->sg;
in = pack_sg_list(chan->sg, out,
- VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
+ VIRTQUEUE_NUM, req->rc.sdata, req->rc.capacity);
if (in)
sgs[out_sgs + in_sgs++] = chan->sg + out;
@@ -416,15 +416,15 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
if (n != outlen) {
__le32 v = cpu_to_le32(n);
- memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+ memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
outlen = n;
}
/* The size field of the message must include the length of the
* header and the length of the data. We didn't actually know
* the length of the data until this point so add it in now.
*/
- sz = cpu_to_le32(req->tc->size + outlen);
- memcpy(&req->tc->sdata[0], &sz, sizeof(sz));
+ sz = cpu_to_le32(req->tc.size + outlen);
+ memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
} else if (uidata) {
int n = p9_get_mapped_pages(chan, &in_pages, uidata,
inlen, &offs, &need_drop);
@@ -433,7 +433,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
if (n != inlen) {
__le32 v = cpu_to_le32(n);
- memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
+ memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
inlen = n;
}
}
@@ -445,7 +445,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
/* out data */
out = pack_sg_list(chan->sg, 0,
- VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+ VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
if (out)
sgs[out_sgs++] = chan->sg;
@@ -464,7 +464,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
* alloced memory and payload onto the user buffer.
*/
in = pack_sg_list(chan->sg, out,
- VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
+ VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len);
if (in)
sgs[out_sgs + in_sgs++] = chan->sg + out;
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index c2d54ac76bfd..1a5b38892eb4 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -141,7 +141,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
struct xen_9pfs_front_priv *priv = NULL;
RING_IDX cons, prod, masked_cons, masked_prod;
unsigned long flags;
- u32 size = p9_req->tc->size;
+ u32 size = p9_req->tc.size;
struct xen_9pfs_dataring *ring;
int num;
@@ -154,7 +154,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
if (!priv || priv->client != client)
return -EINVAL;
- num = p9_req->tc->tag % priv->num_rings;
+ num = p9_req->tc.tag % priv->num_rings;
ring = &priv->rings[num];
again:
@@ -176,7 +176,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
- xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size,
+ xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size,
&masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
p9_req->status = REQ_STATUS_SENT;
@@ -229,12 +229,12 @@ static void p9_xen_response(struct work_struct *work)
continue;
}
- memcpy(req->rc, &h, sizeof(h));
- req->rc->offset = 0;
+ memcpy(&req->rc, &h, sizeof(h));
+ req->rc.offset = 0;
masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
/* Then, read the whole packet (including the header) */
- xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size,
+ xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size,
masked_prod, &masked_cons,
XEN_9PFS_RING_SIZE);
--
2.17.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* [PATCH v3 2/2] net/9p: add a per-client fcall kmem_cache
2018-08-09 14:33 ` [PATCH v3 " Dominique Martinet
@ 2018-08-09 14:33 ` Dominique Martinet
2018-08-10 1:23 ` piaojun
2018-08-10 0:47 ` [PATCH v3 1/2] net/9p: embed fcall in req to round down buffer allocs piaojun
1 sibling, 1 reply; 53+ messages in thread
From: Dominique Martinet @ 2018-08-09 14:33 UTC (permalink / raw)
To: v9fs-developer
Cc: Dominique Martinet, linux-kernel, linux-fsdevel, Matthew Wilcox,
Greg Kurz, Jun Piao
From: Dominique Martinet <dominique.martinet@cea.fr>
Having a specific cache for the fcall allocations helps speed up
allocations a bit, especially in case of non-"round" msizes.
The caches will automatically be merged if there are multiple caches
of items with the same size so we do not need to try to share a cache
between different clients of the same size.
Since the msize is negotiated with the server, only allocate the cache
after that negotiation has happened - previous allocations or
allocations of different sizes (e.g. zero-copy fcall) are made with
kmalloc directly.
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Greg Kurz <groug@kaod.org>
Cc: Jun Piao <piaojun@huawei.com>
---
v2:
- Add a pointer to the cache in p9_fcall to make sure a buffer
allocated with kmalloc gets freed with kfree and vice-versa
This could have been smaller with a bool but this spares having
to look at the client so looked a bit cleaner, I'm expecting this
patch will need a v3 one way or another so I went for the bolder
approach - please say if you think a smaller item is better ; I *think*
nothing relies on this being ordered the same way as the data on the
wire (struct isn't packed anyway) so we can move id after tag and add
another u8 to not have any overhead
- added likely() to cache existence check in allocation, but nothing
for msize check or free because of zc request being of different size
v3:
- defined the userdata region in the cache, as read or write calls can
access the buffer directly (lead to warnings with HARDENED_USERCOPY=y)
I didn't get any comment on v2 for this patch, but I'm still not fully
happy with this in all honesty.
Part of me believes we might be better off always allocating from the
cache even for zero-copy headers, it's a waste of space but the buffers
are there and being reused constantly for non-zc calls, and mixing
kmallocs in could lead to these buffers being really freed and
reallocated all the time instead of reusing the slab buffers
continuously.
That would let me move the likely() for the fast path, with the only
exception being the TVERSION initial call on mount, for which I still
don't have any nice idea on how to handle, using a different size in
p9_client_rpc or prepare_req if the type is TVERSION and I'm really not
happy with that...
include/net/9p/9p.h | 4 ++++
include/net/9p/client.h | 1 +
net/9p/client.c | 40 +++++++++++++++++++++++++++++++++++-----
3 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index e23896116d9a..645266b74652 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -336,6 +336,9 @@ enum p9_qid_t {
#define P9_NOFID (u32)(~0)
#define P9_MAXWELEM 16
+/* Minimal header size: len + id + tag */
+#define P9_HDRSZ 7
+
/* ample room for Twrite/Rread header */
#define P9_IOHDRSZ 24
@@ -558,6 +561,7 @@ struct p9_fcall {
size_t offset;
size_t capacity;
+ struct kmem_cache *cache;
u8 *sdata;
};
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index c2671d40bb6b..735f3979d559 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -123,6 +123,7 @@ struct p9_client {
struct p9_trans_module *trans_mod;
enum p9_trans_status status;
void *trans;
+ struct kmem_cache *fcall_cache;
union {
struct {
diff --git a/net/9p/client.c b/net/9p/client.c
index ed78751aee7c..6c57ab1294d7 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -192,6 +192,9 @@ static int parse_opts(char *opts, struct p9_client *clnt)
goto free_and_return;
}
+ if (clnt->trans_mod) {
+ pr_warn("Had trans %s\n", clnt->trans_mod->name);
+ }
v9fs_put_trans(clnt->trans_mod);
clnt->trans_mod = v9fs_get_trans_by_name(s);
if (clnt->trans_mod == NULL) {
@@ -231,9 +234,16 @@ static int parse_opts(char *opts, struct p9_client *clnt)
return ret;
}
-static int p9_fcall_init(struct p9_fcall *fc, int alloc_msize)
+static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
+ int alloc_msize)
{
- fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
+ if (likely(c->fcall_cache) && alloc_msize == c->msize) {
+ fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
+ fc->cache = c->fcall_cache;
+ } else {
+ fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
+ fc->cache = NULL;
+ }
if (!fc->sdata)
return -ENOMEM;
fc->capacity = alloc_msize;
@@ -242,7 +252,16 @@ static int p9_fcall_init(struct p9_fcall *fc, int alloc_msize)
void p9_fcall_fini(struct p9_fcall *fc)
{
- kfree(fc->sdata);
+ /* sdata can be NULL for interrupted requests in trans_rdma,
+ * and kmem_cache_free does not do NULL-check for us
+ */
+ if (unlikely(!fc->sdata))
+ return;
+
+ if (fc->cache)
+ kmem_cache_free(fc->cache, fc->sdata);
+ else
+ kfree(fc->sdata);
}
EXPORT_SYMBOL(p9_fcall_fini);
@@ -267,9 +286,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
if (!req)
return NULL;
- if (p9_fcall_init(&req->tc, alloc_msize))
+ if (p9_fcall_init(c, &req->tc, alloc_msize))
goto free_req;
- if (p9_fcall_init(&req->rc, alloc_msize))
+ if (p9_fcall_init(c, &req->rc, alloc_msize))
goto free;
p9pdu_reset(&req->tc);
@@ -951,6 +970,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
clnt->trans_mod = NULL;
clnt->trans = NULL;
+ clnt->fcall_cache = NULL;
client_id = utsname()->nodename;
memcpy(clnt->name, client_id, strlen(client_id) + 1);
@@ -987,6 +1007,15 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
if (err)
goto close_trans;
+ /* P9_HDRSZ + 4 is the smallest packet header we can have that is
+ * followed by data accessed from userspace by read
+ */
+ clnt->fcall_cache =
+ kmem_cache_create_usercopy("9p-fcall-cache", clnt->msize,
+ 0, 0, P9_HDRSZ + 4,
+ clnt->msize - (P9_HDRSZ + 4),
+ NULL);
+
return clnt;
close_trans:
@@ -1018,6 +1047,7 @@ void p9_client_destroy(struct p9_client *clnt)
p9_tag_cleanup(clnt);
+ kmem_cache_destroy(clnt->fcall_cache);
kfree(clnt);
}
EXPORT_SYMBOL(p9_client_destroy);
--
2.17.1
^ permalink raw reply related [flat|nested] 53+ messages in thread
* Re: [PATCH v3 2/2] net/9p: add a per-client fcall kmem_cache
2018-08-09 14:33 ` [PATCH v3 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
@ 2018-08-10 1:23 ` piaojun
2018-08-10 1:41 ` Dominique Martinet
0 siblings, 1 reply; 53+ messages in thread
From: piaojun @ 2018-08-10 1:23 UTC (permalink / raw)
To: Dominique Martinet, v9fs-developer
Cc: Dominique Martinet, linux-kernel, linux-fsdevel, Matthew Wilcox,
Greg Kurz
Hi Dominique,
Could you help paste the test result of before-after-applied this patch in
comment? And please see my comments below.
On 2018/8/9 22:33, Dominique Martinet wrote:
> From: Dominique Martinet <dominique.martinet@cea.fr>
>
> Having a specific cache for the fcall allocations helps speed up
> allocations a bit, especially in case of non-"round" msizes.
>
> The caches will automatically be merged if there are multiple caches
> of items with the same size so we do not need to try to share a cache
> between different clients of the same size.
>
> Since the msize is negotiated with the server, only allocate the cache
> after that negotiation has happened - previous allocations or
> allocations of different sizes (e.g. zero-copy fcall) are made with
> kmalloc directly.
>
> Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Greg Kurz <groug@kaod.org>
> Cc: Jun Piao <piaojun@huawei.com>
> ---
> v2:
> - Add a pointer to the cache in p9_fcall to make sure a buffer
> allocated with kmalloc gets freed with kfree and vice-versa
> This could have been smaller with a bool but this spares having
> to look at the client so looked a bit cleaner, I'm expecting this
> patch will need a v3 one way or another so I went for the bolder
> approach - please say if you think a smaller item is better ; I *think*
> nothing relies on this being ordered the same way as the data on the
> wire (struct isn't packed anyway) so we can move id after tag and add
> another u8 to not have any overhead
>
> - added likely() to cache existence check in allocation, but nothing
> for msize check or free because of zc request being of different size
>
> v3:
> - defined the userdata region in the cache, as read or write calls can
> access the buffer directly (lead to warnings with HARDENED_USERCOPY=y)
>
>
>
> I didn't get any comment on v2 for this patch, but I'm still not fully
> happy with this in all honesty.
>
> Part of me believes we might be better off always allocating from the
> cache even for zero-copy headers, it's a waste of space but the buffers
> are there and being reused constantly for non-zc calls, and mixing
> kmallocs in could lead to these buffers being really freed and
> reallocated all the time instead of reusing the slab buffers
> continuously.
>
> That would let me move the likely() for the fast path, with the only
> exception being the TVERSION initial call on mount, for which I still
> don't have any nice idea on how to handle, using a different size in
> p9_client_rpc or prepare_req if the type is TVERSION and I'm really not
> happy with that...
>
>
> include/net/9p/9p.h | 4 ++++
> include/net/9p/client.h | 1 +
> net/9p/client.c | 40 +++++++++++++++++++++++++++++++++++-----
> 3 files changed, 40 insertions(+), 5 deletions(-)
>
> diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
> index e23896116d9a..645266b74652 100644
> --- a/include/net/9p/9p.h
> +++ b/include/net/9p/9p.h
> @@ -336,6 +336,9 @@ enum p9_qid_t {
> #define P9_NOFID (u32)(~0)
> #define P9_MAXWELEM 16
>
> +/* Minimal header size: len + id + tag */
Here should be 'size + id + tag'.
> +#define P9_HDRSZ 7
> +
> /* ample room for Twrite/Rread header */
> #define P9_IOHDRSZ 24
>
> @@ -558,6 +561,7 @@ struct p9_fcall {
> size_t offset;
> size_t capacity;
>
> + struct kmem_cache *cache;
> u8 *sdata;
> };
>
> diff --git a/include/net/9p/client.h b/include/net/9p/client.h
> index c2671d40bb6b..735f3979d559 100644
> --- a/include/net/9p/client.h
> +++ b/include/net/9p/client.h
> @@ -123,6 +123,7 @@ struct p9_client {
> struct p9_trans_module *trans_mod;
> enum p9_trans_status status;
> void *trans;
> + struct kmem_cache *fcall_cache;
>
> union {
> struct {
> diff --git a/net/9p/client.c b/net/9p/client.c
> index ed78751aee7c..6c57ab1294d7 100644
> --- a/net/9p/client.c
> +++ b/net/9p/client.c
> @@ -192,6 +192,9 @@ static int parse_opts(char *opts, struct p9_client *clnt)
> goto free_and_return;
> }
>
> + if (clnt->trans_mod) {
> + pr_warn("Had trans %s\n", clnt->trans_mod->name);
> + }
> v9fs_put_trans(clnt->trans_mod);
> clnt->trans_mod = v9fs_get_trans_by_name(s);
> if (clnt->trans_mod == NULL) {
> @@ -231,9 +234,16 @@ static int parse_opts(char *opts, struct p9_client *clnt)
> return ret;
> }
>
> -static int p9_fcall_init(struct p9_fcall *fc, int alloc_msize)
> +static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
> + int alloc_msize)
> {
> - fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> + if (likely(c->fcall_cache) && alloc_msize == c->msize) {
> + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
> + fc->cache = c->fcall_cache;
> + } else {
> + fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> + fc->cache = NULL;
> + }
> if (!fc->sdata)
> return -ENOMEM;
> fc->capacity = alloc_msize;
> @@ -242,7 +252,16 @@ static int p9_fcall_init(struct p9_fcall *fc, int alloc_msize)
>
> void p9_fcall_fini(struct p9_fcall *fc)
> {
> - kfree(fc->sdata);
> + /* sdata can be NULL for interrupted requests in trans_rdma,
> + * and kmem_cache_free does not do NULL-check for us
> + */
> + if (unlikely(!fc->sdata))
> + return;
> +
> + if (fc->cache)
> + kmem_cache_free(fc->cache, fc->sdata);
> + else
> + kfree(fc->sdata);
> }
> EXPORT_SYMBOL(p9_fcall_fini);
>
> @@ -267,9 +286,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> if (!req)
> return NULL;
>
> - if (p9_fcall_init(&req->tc, alloc_msize))
> + if (p9_fcall_init(c, &req->tc, alloc_msize))
> goto free_req;
> - if (p9_fcall_init(&req->rc, alloc_msize))
> + if (p9_fcall_init(c, &req->rc, alloc_msize))
> goto free;
>
> p9pdu_reset(&req->tc);
> @@ -951,6 +970,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
>
> clnt->trans_mod = NULL;
> clnt->trans = NULL;
> + clnt->fcall_cache = NULL;
>
> client_id = utsname()->nodename;
> memcpy(clnt->name, client_id, strlen(client_id) + 1);
> @@ -987,6 +1007,15 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
> if (err)
> goto close_trans;
>
> + /* P9_HDRSZ + 4 is the smallest packet header we can have that is
> + * followed by data accessed from userspace by read
> + */
> + clnt->fcall_cache =
> + kmem_cache_create_usercopy("9p-fcall-cache", clnt->msize,
> + 0, 0, P9_HDRSZ + 4,
> + clnt->msize - (P9_HDRSZ + 4),
> + NULL);
> +
> return clnt;
>
> close_trans:
> @@ -1018,6 +1047,7 @@ void p9_client_destroy(struct p9_client *clnt)
>
> p9_tag_cleanup(clnt);
>
> + kmem_cache_destroy(clnt->fcall_cache);
I'm afraid that we should check NULL for clnt->fcall_cache.
Thanks,
Jun
> kfree(clnt);
> }
> EXPORT_SYMBOL(p9_client_destroy);
>
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH v3 2/2] net/9p: add a per-client fcall kmem_cache
2018-08-10 1:23 ` piaojun
@ 2018-08-10 1:41 ` Dominique Martinet
2018-08-10 1:49 ` piaojun
0 siblings, 1 reply; 53+ messages in thread
From: Dominique Martinet @ 2018-08-10 1:41 UTC (permalink / raw)
To: piaojun
Cc: v9fs-developer, Dominique Martinet, linux-kernel, linux-fsdevel,
Matthew Wilcox, Greg Kurz
piaojun wrote on Fri, Aug 10, 2018:
> Could you help paste the test result of before-after-applied this patch in
> comment? And please see my comments below.
Thanks the the review, do you mean the commit message?
I'll add the summary I wrote in reply to your question a few mails
before.
> > diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
> > index e23896116d9a..645266b74652 100644
> > --- a/include/net/9p/9p.h
> > +++ b/include/net/9p/9p.h
> > @@ -336,6 +336,9 @@ enum p9_qid_t {
> > #define P9_NOFID (u32)(~0)
> > #define P9_MAXWELEM 16
> >
> > +/* Minimal header size: len + id + tag */
>
> Here should be 'size + id + tag'.
hm I didn't want to repeat size, but I guess people do refer to that
field as size.
I'll actually rewrite it as:
Minimal header size: size[4] type[1] tag[2]
> > + kmem_cache_destroy(clnt->fcall_cache);
>
> I'm afraid that we should check NULL for clnt->fcall_cache.
kmem_cache_destroy() in mm/slab_common.c does the null check for us:
------
void kmem_cache_destroy(struct kmem_cache *s)
{
int err;
if (unlikely(!s))
return;
------
--
Dominique
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH v3 2/2] net/9p: add a per-client fcall kmem_cache
2018-08-10 1:41 ` Dominique Martinet
@ 2018-08-10 1:49 ` piaojun
0 siblings, 0 replies; 53+ messages in thread
From: piaojun @ 2018-08-10 1:49 UTC (permalink / raw)
To: Dominique Martinet
Cc: v9fs-developer, Dominique Martinet, linux-kernel, linux-fsdevel,
Matthew Wilcox, Greg Kurz
Thanks for clearing my doubt, and you can add:
Acked-by: Jun Piao <piaojun@huawei.com>
On 2018/8/10 9:41, Dominique Martinet wrote:
> piaojun wrote on Fri, Aug 10, 2018:
>> Could you help paste the test result of before-after-applied this patch in
>> comment? And please see my comments below.
>
> Thanks the the review, do you mean the commit message?
>
> I'll add the summary I wrote in reply to your question a few mails
> before.
>
Yes, I mean the commit message.
>
>>> diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
>>> index e23896116d9a..645266b74652 100644
>>> --- a/include/net/9p/9p.h
>>> +++ b/include/net/9p/9p.h
>>> @@ -336,6 +336,9 @@ enum p9_qid_t {
>>> #define P9_NOFID (u32)(~0)
>>> #define P9_MAXWELEM 16
>>>
>>> +/* Minimal header size: len + id + tag */
>>
>> Here should be 'size + id + tag'.
>
> hm I didn't want to repeat size, but I guess people do refer to that
> field as size.
> I'll actually rewrite it as:
> Minimal header size: size[4] type[1] tag[2]
>
It looks better.
>>> + kmem_cache_destroy(clnt->fcall_cache);
>>
>> I'm afraid that we should check NULL for clnt->fcall_cache.
>
> kmem_cache_destroy() in mm/slab_common.c does the null check for us:
> ------
> void kmem_cache_destroy(struct kmem_cache *s)
> {
> int err;
>
> if (unlikely(!s))
> return;
> ------
>
OK, it makes sense.
^ permalink raw reply [flat|nested] 53+ messages in thread
* Re: [PATCH v3 1/2] net/9p: embed fcall in req to round down buffer allocs
2018-08-09 14:33 ` [PATCH v3 " Dominique Martinet
2018-08-09 14:33 ` [PATCH v3 2/2] net/9p: add a per-client fcall kmem_cache Dominique Martinet
@ 2018-08-10 0:47 ` piaojun
1 sibling, 0 replies; 53+ messages in thread
From: piaojun @ 2018-08-10 0:47 UTC (permalink / raw)
To: Dominique Martinet, v9fs-developer
Cc: Dominique Martinet, linux-kernel, linux-fsdevel, Greg Kurz,
Matthew Wilcox
LGTM
On 2018/8/9 22:33, Dominique Martinet wrote:
> From: Dominique Martinet <dominique.martinet@cea.fr>
>
> 'msize' is often a power of two, or at least page-aligned, so avoiding
> an overhead of two dozen bytes for each allocation will help the
> allocator do its work and reduce memory fragmentation.
>
> Suggested-by: Matthew Wilcox <willy@infradead.org>
> Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
> Reviewed-by: Greg Kurz <groug@kaod.org>
Acked-by: Jun Piao <piaojun@huawei.com>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Jun Piao <piaojun@huawei.com>
> ---
> v2:
> - Add extra label to not free uninitialized memory on alloc failure
> - Rename p9_fcall_alloc to 9p_fcall_init
> - Add a p9_fcall_fini function to echo to init
>
> v3:
> - use p9_call_fini() in rdma_request() instead of kfree, the code
> was in the following patch in previous version
>
> include/net/9p/client.h | 5 +-
> net/9p/client.c | 167 +++++++++++++++++++++-------------------
> net/9p/trans_fd.c | 12 +--
> net/9p/trans_rdma.c | 29 +++----
> net/9p/trans_virtio.c | 18 ++---
> net/9p/trans_xen.c | 12 +--
> 6 files changed, 125 insertions(+), 118 deletions(-)
>
> diff --git a/include/net/9p/client.h b/include/net/9p/client.h
> index a4dc42c53d18..c2671d40bb6b 100644
> --- a/include/net/9p/client.h
> +++ b/include/net/9p/client.h
> @@ -95,8 +95,8 @@ struct p9_req_t {
> int status;
> int t_err;
> wait_queue_head_t wq;
> - struct p9_fcall *tc;
> - struct p9_fcall *rc;
> + struct p9_fcall tc;
> + struct p9_fcall rc;
> void *aux;
> struct list_head req_list;
> };
> @@ -230,6 +230,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
> kgid_t gid, struct p9_qid *);
> int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
> int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
> +void p9_fcall_fini(struct p9_fcall *fc);
> struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
> void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
>
> diff --git a/net/9p/client.c b/net/9p/client.c
> index 88db45966740..ed78751aee7c 100644
> --- a/net/9p/client.c
> +++ b/net/9p/client.c
> @@ -231,16 +231,20 @@ static int parse_opts(char *opts, struct p9_client *clnt)
> return ret;
> }
>
> -static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
> +static int p9_fcall_init(struct p9_fcall *fc, int alloc_msize)
> {
> - struct p9_fcall *fc;
> - fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
> - if (!fc)
> - return NULL;
> + fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> + if (!fc->sdata)
> + return -ENOMEM;
> fc->capacity = alloc_msize;
> - fc->sdata = (char *) fc + sizeof(struct p9_fcall);
> - return fc;
> + return 0;
> +}
> +
> +void p9_fcall_fini(struct p9_fcall *fc)
> +{
> + kfree(fc->sdata);
> }
> +EXPORT_SYMBOL(p9_fcall_fini);
>
> static struct kmem_cache *p9_req_cache;
>
> @@ -263,13 +267,13 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> if (!req)
> return NULL;
>
> - req->tc = p9_fcall_alloc(alloc_msize);
> - req->rc = p9_fcall_alloc(alloc_msize);
> - if (!req->tc || !req->rc)
> + if (p9_fcall_init(&req->tc, alloc_msize))
> + goto free_req;
> + if (p9_fcall_init(&req->rc, alloc_msize))
> goto free;
>
> - p9pdu_reset(req->tc);
> - p9pdu_reset(req->rc);
> + p9pdu_reset(&req->tc);
> + p9pdu_reset(&req->rc);
> req->status = REQ_STATUS_ALLOC;
> init_waitqueue_head(&req->wq);
> INIT_LIST_HEAD(&req->req_list);
> @@ -281,7 +285,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> GFP_NOWAIT);
> else
> tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT);
> - req->tc->tag = tag;
> + req->tc.tag = tag;
> spin_unlock_irq(&c->lock);
> idr_preload_end();
> if (tag < 0)
> @@ -290,8 +294,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> return req;
>
> free:
> - kfree(req->tc);
> - kfree(req->rc);
> + p9_fcall_fini(&req->tc);
> + p9_fcall_fini(&req->rc);
> +free_req:
> kmem_cache_free(p9_req_cache, req);
> return ERR_PTR(-ENOMEM);
> }
> @@ -329,14 +334,14 @@ EXPORT_SYMBOL(p9_tag_lookup);
> static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
> {
> unsigned long flags;
> - u16 tag = r->tc->tag;
> + u16 tag = r->tc.tag;
>
> p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
> spin_lock_irqsave(&c->lock, flags);
> idr_remove(&c->reqs, tag);
> spin_unlock_irqrestore(&c->lock, flags);
> - kfree(r->tc);
> - kfree(r->rc);
> + p9_fcall_fini(&r->tc);
> + p9_fcall_fini(&r->rc);
> kmem_cache_free(p9_req_cache, r);
> }
>
> @@ -368,7 +373,7 @@ static void p9_tag_cleanup(struct p9_client *c)
> */
> void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
> {
> - p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
> + p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag);
>
> /*
> * This barrier is needed to make sure any change made to req before
> @@ -378,7 +383,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
> req->status = status;
>
> wake_up(&req->wq);
> - p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
> + p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
> }
> EXPORT_SYMBOL(p9_client_cb);
>
> @@ -449,18 +454,18 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
> int err;
> int ecode;
>
> - err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
> - if (req->rc->size >= c->msize) {
> + err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
> + if (req->rc.size >= c->msize) {
> p9_debug(P9_DEBUG_ERROR,
> "requested packet size too big: %d\n",
> - req->rc->size);
> + req->rc.size);
> return -EIO;
> }
> /*
> * dump the response from server
> * This should be after check errors which poplulate pdu_fcall.
> */
> - trace_9p_protocol_dump(c, req->rc);
> + trace_9p_protocol_dump(c, &req->rc);
> if (err) {
> p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
> return err;
> @@ -470,7 +475,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
>
> if (!p9_is_proto_dotl(c)) {
> char *ename;
> - err = p9pdu_readf(req->rc, c->proto_version, "s?d",
> + err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
> &ename, &ecode);
> if (err)
> goto out_err;
> @@ -486,7 +491,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
> }
> kfree(ename);
> } else {
> - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
> + err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
> err = -ecode;
>
> p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
> @@ -520,12 +525,12 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> int8_t type;
> char *ename = NULL;
>
> - err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
> + err = p9_parse_header(&req->rc, NULL, &type, NULL, 0);
> /*
> * dump the response from server
> * This should be after parse_header which poplulate pdu_fcall.
> */
> - trace_9p_protocol_dump(c, req->rc);
> + trace_9p_protocol_dump(c, &req->rc);
> if (err) {
> p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
> return err;
> @@ -540,13 +545,13 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> /* 7 = header size for RERROR; */
> int inline_len = in_hdrlen - 7;
>
> - len = req->rc->size - req->rc->offset;
> + len = req->rc.size - req->rc.offset;
> if (len > (P9_ZC_HDR_SZ - 7)) {
> err = -EFAULT;
> goto out_err;
> }
>
> - ename = &req->rc->sdata[req->rc->offset];
> + ename = &req->rc.sdata[req->rc.offset];
> if (len > inline_len) {
> /* We have error in external buffer */
> if (!copy_from_iter_full(ename + inline_len,
> @@ -556,7 +561,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> }
> }
> ename = NULL;
> - err = p9pdu_readf(req->rc, c->proto_version, "s?d",
> + err = p9pdu_readf(&req->rc, c->proto_version, "s?d",
> &ename, &ecode);
> if (err)
> goto out_err;
> @@ -572,7 +577,7 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
> }
> kfree(ename);
> } else {
> - err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
> + err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
> err = -ecode;
>
> p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
> @@ -605,7 +610,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
> int16_t oldtag;
> int err;
>
> - err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1);
> + err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1);
> if (err)
> return err;
>
> @@ -649,12 +654,12 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
> return req;
>
> /* marshall the data */
> - p9pdu_prepare(req->tc, req->tc->tag, type);
> - err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
> + p9pdu_prepare(&req->tc, req->tc.tag, type);
> + err = p9pdu_vwritef(&req->tc, c->proto_version, fmt, ap);
> if (err)
> goto reterr;
> - p9pdu_finalize(c, req->tc);
> - trace_9p_client_req(c, type, req->tc->tag);
> + p9pdu_finalize(c, &req->tc);
> + trace_9p_client_req(c, type, req->tc.tag);
> return req;
> reterr:
> p9_free_req(c, req);
> @@ -739,7 +744,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
> goto reterr;
>
> err = p9_check_errors(c, req);
> - trace_9p_client_res(c, type, req->rc->tag, err);
> + trace_9p_client_res(c, type, req->rc.tag, err);
> if (!err)
> return req;
> reterr:
> @@ -821,7 +826,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
> goto reterr;
>
> err = p9_check_zc_errors(c, req, uidata, in_hdrlen);
> - trace_9p_client_res(c, type, req->rc->tag, err);
> + trace_9p_client_res(c, type, req->rc.tag, err);
> if (!err)
> return req;
> reterr:
> @@ -904,10 +909,10 @@ static int p9_client_version(struct p9_client *c)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);
> + err = p9pdu_readf(&req->rc, c->proto_version, "ds", &msize, &version);
> if (err) {
> p9_debug(P9_DEBUG_9P, "version error %d\n", err);
> - trace_9p_protocol_dump(c, req->rc);
> + trace_9p_protocol_dump(c, &req->rc);
> goto error;
> }
>
> @@ -1056,9 +1061,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1113,9 +1118,9 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto clunk_fid;
> }
> @@ -1180,9 +1185,9 @@ int p9_client_open(struct p9_fid *fid, int mode)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1224,9 +1229,9 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", qid, &iounit);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1269,9 +1274,9 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1308,9 +1313,9 @@ int p9_client_symlink(struct p9_fid *dfid, const char *name,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
>
> @@ -1506,10 +1511,10 @@ p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err)
> break;
> }
>
> - *err = p9pdu_readf(req->rc, clnt->proto_version,
> + *err = p9pdu_readf(&req->rc, clnt->proto_version,
> "D", &count, &dataptr);
> if (*err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> break;
> }
> @@ -1579,9 +1584,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
> break;
> }
>
> - *err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);
> + *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &count);
> if (*err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> break;
> }
> @@ -1623,9 +1628,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1676,9 +1681,9 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1828,11 +1833,11 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
> - &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
> - &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
> + &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
> + &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto error;
> }
> @@ -1936,9 +1941,9 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
> err = PTR_ERR(req);
> goto error;
> }
> - err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> p9_free_req(clnt, req);
> goto clunk_fid;
> }
> @@ -2024,9 +2029,9 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
> goto error;
> }
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto free_and_error;
> }
> if (rsize < count) {
> @@ -2065,9 +2070,9 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
> @@ -2096,9 +2101,9 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
> @@ -2131,9 +2136,9 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "b", status);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);
> @@ -2162,11 +2167,11 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "bqqds", &glock->type,
> - &glock->start, &glock->length, &glock->proc_id,
> - &glock->client_id);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "bqqds", &glock->type,
> + &glock->start, &glock->length, &glock->proc_id,
> + &glock->client_id);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "
> @@ -2192,9 +2197,9 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> - err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);
> + err = p9pdu_readf(&req->rc, clnt->proto_version, "s", target);
> if (err) {
> - trace_9p_protocol_dump(clnt, req->rc);
> + trace_9p_protocol_dump(clnt, &req->rc);
> goto error;
> }
> p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);
> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
> index e2ef3c782c53..20f46f13fe83 100644
> --- a/net/9p/trans_fd.c
> +++ b/net/9p/trans_fd.c
> @@ -354,7 +354,7 @@ static void p9_read_work(struct work_struct *work)
> goto error;
> }
>
> - if (m->req->rc == NULL) {
> + if (m->req->rc.sdata == NULL) {
> p9_debug(P9_DEBUG_ERROR,
> "No recv fcall for tag %d (req %p), disconnecting!\n",
> m->rc.tag, m->req);
> @@ -362,7 +362,7 @@ static void p9_read_work(struct work_struct *work)
> err = -EIO;
> goto error;
> }
> - m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
> + m->rc.sdata = m->req->rc.sdata;
> memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
> m->rc.capacity = m->rc.size;
> }
> @@ -372,7 +372,7 @@ static void p9_read_work(struct work_struct *work)
> */
> if ((m->req) && (m->rc.offset == m->rc.capacity)) {
> p9_debug(P9_DEBUG_TRANS, "got new packet\n");
> - m->req->rc->size = m->rc.offset;
> + m->req->rc.size = m->rc.offset;
> spin_lock(&m->client->lock);
> if (m->req->status != REQ_STATUS_ERROR)
> status = REQ_STATUS_RCVD;
> @@ -469,8 +469,8 @@ static void p9_write_work(struct work_struct *work)
> p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
> list_move_tail(&req->req_list, &m->req_list);
>
> - m->wbuf = req->tc->sdata;
> - m->wsize = req->tc->size;
> + m->wbuf = req->tc.sdata;
> + m->wsize = req->tc.size;
> m->wpos = 0;
> spin_unlock(&m->client->lock);
> }
> @@ -663,7 +663,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
> struct p9_conn *m = &ts->conn;
>
> p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
> - m, current, req->tc, req->tc->id);
> + m, current, &req->tc, req->tc.id);
> if (m->err < 0)
> return m->err;
>
> diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
> index 2ab4574183c9..c60655c90c9e 100644
> --- a/net/9p/trans_rdma.c
> +++ b/net/9p/trans_rdma.c
> @@ -122,7 +122,7 @@ struct p9_rdma_context {
> dma_addr_t busa;
> union {
> struct p9_req_t *req;
> - struct p9_fcall *rc;
> + struct p9_fcall rc;
> };
> };
>
> @@ -320,8 +320,8 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
> if (wc->status != IB_WC_SUCCESS)
> goto err_out;
>
> - c->rc->size = wc->byte_len;
> - err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
> + c->rc.size = wc->byte_len;
> + err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
> if (err)
> goto err_out;
>
> @@ -331,12 +331,13 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc)
>
> /* Check that we have not yet received a reply for this request.
> */
> - if (unlikely(req->rc)) {
> + if (unlikely(req->rc.sdata)) {
> pr_err("Duplicate reply for request %d", tag);
> goto err_out;
> }
>
> - req->rc = c->rc;
> + req->rc.size = c->rc.size;
> + req->rc.sdata = c->rc.sdata;
> p9_client_cb(client, req, REQ_STATUS_RCVD);
>
> out:
> @@ -361,7 +362,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
> container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
>
> ib_dma_unmap_single(rdma->cm_id->device,
> - c->busa, c->req->tc->size,
> + c->busa, c->req->tc.size,
> DMA_TO_DEVICE);
> up(&rdma->sq_sem);
> kfree(c);
> @@ -401,7 +402,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
> struct ib_sge sge;
>
> c->busa = ib_dma_map_single(rdma->cm_id->device,
> - c->rc->sdata, client->msize,
> + c->rc.sdata, client->msize,
> DMA_FROM_DEVICE);
> if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
> goto error;
> @@ -443,9 +444,9 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> **/
> if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
> if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
> - /* Got one ! */
> - kfree(req->rc);
> - req->rc = NULL;
> + /* Got one! */
> + p9_fcall_fini(&req->rc);
> + req->rc.sdata = NULL;
> goto dont_need_post_recv;
> } else {
> /* We raced and lost. */
> @@ -459,7 +460,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> err = -ENOMEM;
> goto recv_error;
> }
> - rpl_context->rc = req->rc;
> + rpl_context->rc.sdata = req->rc.sdata;
>
> /*
> * Post a receive buffer for this request. We need to ensure
> @@ -479,7 +480,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> goto recv_error;
> }
> /* remove posted receive buffer from request structure */
> - req->rc = NULL;
> + req->rc.sdata = NULL;
>
> dont_need_post_recv:
> /* Post the request */
> @@ -491,7 +492,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> c->req = req;
>
> c->busa = ib_dma_map_single(rdma->cm_id->device,
> - c->req->tc->sdata, c->req->tc->size,
> + c->req->tc.sdata, c->req->tc.size,
> DMA_TO_DEVICE);
> if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
> err = -EIO;
> @@ -501,7 +502,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
> c->cqe.done = send_done;
>
> sge.addr = c->busa;
> - sge.length = c->req->tc->size;
> + sge.length = c->req->tc.size;
> sge.lkey = rdma->pd->local_dma_lkey;
>
> wr.next = NULL;
> diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
> index 7728b0acde09..3dd6ce1c0f2d 100644
> --- a/net/9p/trans_virtio.c
> +++ b/net/9p/trans_virtio.c
> @@ -155,7 +155,7 @@ static void req_done(struct virtqueue *vq)
> }
>
> if (len) {
> - req->rc->size = len;
> + req->rc.size = len;
> p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
> }
> }
> @@ -273,12 +273,12 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
> out_sgs = in_sgs = 0;
> /* Handle out VirtIO ring buffers */
> out = pack_sg_list(chan->sg, 0,
> - VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
> + VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
> if (out)
> sgs[out_sgs++] = chan->sg;
>
> in = pack_sg_list(chan->sg, out,
> - VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
> + VIRTQUEUE_NUM, req->rc.sdata, req->rc.capacity);
> if (in)
> sgs[out_sgs + in_sgs++] = chan->sg + out;
>
> @@ -416,15 +416,15 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
> out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
> if (n != outlen) {
> __le32 v = cpu_to_le32(n);
> - memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
> + memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
> outlen = n;
> }
> /* The size field of the message must include the length of the
> * header and the length of the data. We didn't actually know
> * the length of the data until this point so add it in now.
> */
> - sz = cpu_to_le32(req->tc->size + outlen);
> - memcpy(&req->tc->sdata[0], &sz, sizeof(sz));
> + sz = cpu_to_le32(req->tc.size + outlen);
> + memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
> } else if (uidata) {
> int n = p9_get_mapped_pages(chan, &in_pages, uidata,
> inlen, &offs, &need_drop);
> @@ -433,7 +433,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
> in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
> if (n != inlen) {
> __le32 v = cpu_to_le32(n);
> - memcpy(&req->tc->sdata[req->tc->size - 4], &v, 4);
> + memcpy(&req->tc.sdata[req->tc.size - 4], &v, 4);
> inlen = n;
> }
> }
> @@ -445,7 +445,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
>
> /* out data */
> out = pack_sg_list(chan->sg, 0,
> - VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
> + VIRTQUEUE_NUM, req->tc.sdata, req->tc.size);
>
> if (out)
> sgs[out_sgs++] = chan->sg;
> @@ -464,7 +464,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
> * alloced memory and payload onto the user buffer.
> */
> in = pack_sg_list(chan->sg, out,
> - VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
> + VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len);
> if (in)
> sgs[out_sgs + in_sgs++] = chan->sg + out;
>
> diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
> index c2d54ac76bfd..1a5b38892eb4 100644
> --- a/net/9p/trans_xen.c
> +++ b/net/9p/trans_xen.c
> @@ -141,7 +141,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
> struct xen_9pfs_front_priv *priv = NULL;
> RING_IDX cons, prod, masked_cons, masked_prod;
> unsigned long flags;
> - u32 size = p9_req->tc->size;
> + u32 size = p9_req->tc.size;
> struct xen_9pfs_dataring *ring;
> int num;
>
> @@ -154,7 +154,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
> if (!priv || priv->client != client)
> return -EINVAL;
>
> - num = p9_req->tc->tag % priv->num_rings;
> + num = p9_req->tc.tag % priv->num_rings;
> ring = &priv->rings[num];
>
> again:
> @@ -176,7 +176,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
> masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
> masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
>
> - xen_9pfs_write_packet(ring->data.out, p9_req->tc->sdata, size,
> + xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size,
> &masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
>
> p9_req->status = REQ_STATUS_SENT;
> @@ -229,12 +229,12 @@ static void p9_xen_response(struct work_struct *work)
> continue;
> }
>
> - memcpy(req->rc, &h, sizeof(h));
> - req->rc->offset = 0;
> + memcpy(&req->rc, &h, sizeof(h));
> + req->rc.offset = 0;
>
> masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
> /* Then, read the whole packet (including the header) */
> - xen_9pfs_read_packet(req->rc->sdata, ring->data.in, h.size,
> + xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size,
> masked_prod, &masked_cons,
> XEN_9PFS_RING_SIZE);
>
>
^ permalink raw reply [flat|nested] 53+ messages in thread