All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] wip-alloc-request
@ 2016-02-10 11:24 Ilya Dryomov
  2016-02-10 11:24 ` [PATCH 1/4] libceph: rename ceph_osd_req_op::payload_len to indata_len Ilya Dryomov
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Ilya Dryomov @ 2016-02-10 11:24 UTC (permalink / raw)
  To: ceph-devel; +Cc: Zheng Yan

Hello,

This is to replace

40ca9b2c9bbb libceph: enlarge max number of operations in OSD request
d0c2fddbf154 libceph: move r_reply_op_{len,result} into struct ceph_osd_req_op
7c6ad0dab79d libceph: allow reserving operations in OSD request

The current solution is clunky: for larger than usual requests, it does
two memory allocations instead of one and wastes 3 ops (~500 bytes) of
memory for each request.  This series fixes both of those.

Thanks,

                Ilya


Ilya Dryomov (3):
  libceph: rename ceph_osd_req_op::payload_len to indata_len
  libceph: osdc->req_mempool should be backed by a slab pool
  libceph: enable large, variable-sized OSD requests

Yan, Zheng (1):
  libceph: move r_reply_op_{len,result} into struct ceph_osd_req_op

 drivers/block/rbd.c             |  4 +---
 include/linux/ceph/osd_client.h | 13 +++++++----
 net/ceph/osd_client.c           | 52 +++++++++++++++++++++++------------------
 3 files changed, 38 insertions(+), 31 deletions(-)

-- 
2.4.3


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/4] libceph: rename ceph_osd_req_op::payload_len to indata_len
  2016-02-10 11:24 [PATCH 0/4] wip-alloc-request Ilya Dryomov
@ 2016-02-10 11:24 ` Ilya Dryomov
  2016-02-10 12:12   ` Yan, Zheng
  2016-02-10 11:24 ` [PATCH 2/4] libceph: move r_reply_op_{len,result} into struct ceph_osd_req_op Ilya Dryomov
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Ilya Dryomov @ 2016-02-10 11:24 UTC (permalink / raw)
  To: ceph-devel; +Cc: Zheng Yan

Follow userspace nomenclature on this - the next commit adds
outdata_len.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h |  2 +-
 net/ceph/osd_client.c           | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 7506b485bb6d..35c8b006916f 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -77,7 +77,7 @@ struct ceph_osd_data {
 struct ceph_osd_req_op {
 	u16 op;           /* CEPH_OSD_OP_* */
 	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
-	u32 payload_len;
+	u32 indata_len;   /* request */
 	union {
 		struct ceph_osd_data raw_data_in;
 		struct {
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a1babb77b813..38bdfafe9dd6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -498,7 +498,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
 	if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL)
 		payload_len += length;
 
-	op->payload_len = payload_len;
+	op->indata_len = payload_len;
 }
 EXPORT_SYMBOL(osd_req_op_extent_init);
 
@@ -517,7 +517,7 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
 	BUG_ON(length > previous);
 
 	op->extent.length = length;
-	op->payload_len -= previous - length;
+	op->indata_len -= previous - length;
 }
 EXPORT_SYMBOL(osd_req_op_extent_update);
 
@@ -554,7 +554,7 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
 
 	op->cls.argc = 0;	/* currently unused */
 
-	op->payload_len = payload_len;
+	op->indata_len = payload_len;
 }
 EXPORT_SYMBOL(osd_req_op_cls_init);
 
@@ -587,7 +587,7 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
 	op->xattr.cmp_mode = cmp_mode;
 
 	ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
-	op->payload_len = payload_len;
+	op->indata_len = payload_len;
 	return 0;
 }
 EXPORT_SYMBOL(osd_req_op_xattr_init);
@@ -707,7 +707,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 			BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
 			dst->cls.indata_len = cpu_to_le32(data_length);
 			ceph_osdc_msg_data_add(req->r_request, osd_data);
-			src->payload_len += data_length;
+			src->indata_len += data_length;
 			request_data_len += data_length;
 		}
 		osd_data = &src->cls.response_data;
@@ -750,7 +750,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 
 	dst->op = cpu_to_le16(src->op);
 	dst->flags = cpu_to_le32(src->flags);
-	dst->payload_len = cpu_to_le32(src->payload_len);
+	dst->payload_len = cpu_to_le32(src->indata_len);
 
 	return request_data_len;
 }
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/4] libceph: move r_reply_op_{len,result} into struct ceph_osd_req_op
  2016-02-10 11:24 [PATCH 0/4] wip-alloc-request Ilya Dryomov
  2016-02-10 11:24 ` [PATCH 1/4] libceph: rename ceph_osd_req_op::payload_len to indata_len Ilya Dryomov
@ 2016-02-10 11:24 ` Ilya Dryomov
  2016-02-10 11:24 ` [PATCH 3/4] libceph: osdc->req_mempool should be backed by a slab pool Ilya Dryomov
  2016-02-10 11:24 ` [PATCH 4/4] libceph: enable large, variable-sized OSD requests Ilya Dryomov
  3 siblings, 0 replies; 9+ messages in thread
From: Ilya Dryomov @ 2016-02-10 11:24 UTC (permalink / raw)
  To: ceph-devel; +Cc: Zheng Yan

From: "Yan, Zheng" <zyan@redhat.com>

This avoids defining large array of r_reply_op_{len,result} in
in struct ceph_osd_request.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c             | 2 +-
 include/linux/ceph/osd_client.h | 5 +++--
 net/ceph/osd_client.c           | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4a876785b68c..94f31bde73e8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1854,7 +1854,7 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
 	 * passed to the block layer, which just supports a 32-bit
 	 * length field.
 	 */
-	obj_request->xferred = osd_req->r_reply_op_len[0];
+	obj_request->xferred = osd_req->r_ops[0].outdata_len;
 	rbd_assert(obj_request->xferred < (u64)UINT_MAX);
 
 	opcode = osd_req->r_ops[0].op;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 35c8b006916f..c6d1d603bacf 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -78,6 +78,9 @@ struct ceph_osd_req_op {
 	u16 op;           /* CEPH_OSD_OP_* */
 	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
 	u32 indata_len;   /* request */
+	u32 outdata_len;  /* reply */
+	s32 rval;
+
 	union {
 		struct ceph_osd_data raw_data_in;
 		struct {
@@ -148,8 +151,6 @@ struct ceph_osd_request {
 	struct ceph_eversion *r_request_reassert_version;
 
 	int               r_result;
-	int               r_reply_op_len[CEPH_OSD_MAX_OP];
-	s32               r_reply_op_result[CEPH_OSD_MAX_OP];
 	int               r_got_reply;
 	int		  r_linger;
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 38bdfafe9dd6..d92fd4ad5a66 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1820,7 +1820,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 		int len;
 
 		len = le32_to_cpu(op->payload_len);
-		req->r_reply_op_len[i] = len;
+		req->r_ops[i].outdata_len = len;
 		dout(" op %d has %d bytes\n", i, len);
 		payload_len += len;
 		p += sizeof(*op);
@@ -1835,7 +1835,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 	ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
 	retry_attempt = ceph_decode_32(&p);
 	for (i = 0; i < numops; i++)
-		req->r_reply_op_result[i] = ceph_decode_32(&p);
+		req->r_ops[i].rval = ceph_decode_32(&p);
 
 	if (le16_to_cpu(msg->hdr.version) >= 6) {
 		p += 8 + 4; /* skip replay_version */
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/4] libceph: osdc->req_mempool should be backed by a slab pool
  2016-02-10 11:24 [PATCH 0/4] wip-alloc-request Ilya Dryomov
  2016-02-10 11:24 ` [PATCH 1/4] libceph: rename ceph_osd_req_op::payload_len to indata_len Ilya Dryomov
  2016-02-10 11:24 ` [PATCH 2/4] libceph: move r_reply_op_{len,result} into struct ceph_osd_req_op Ilya Dryomov
@ 2016-02-10 11:24 ` Ilya Dryomov
  2016-02-10 11:24 ` [PATCH 4/4] libceph: enable large, variable-sized OSD requests Ilya Dryomov
  3 siblings, 0 replies; 9+ messages in thread
From: Ilya Dryomov @ 2016-02-10 11:24 UTC (permalink / raw)
  To: ceph-devel; +Cc: Zheng Yan

ceph_osd_request_cache was introduced a long time ago.  Also, osd_req
is about to get a flexible array member, which ceph_osd_request_cache
is going to be aware of.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osd_client.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d92fd4ad5a66..8bf4f74064e5 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2637,8 +2637,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 	    round_jiffies_relative(osdc->client->options->osd_idle_ttl));
 
 	err = -ENOMEM;
-	osdc->req_mempool = mempool_create_kmalloc_pool(10,
-					sizeof(struct ceph_osd_request));
+	osdc->req_mempool = mempool_create_slab_pool(10,
+						     ceph_osd_request_cache);
 	if (!osdc->req_mempool)
 		goto out;
 
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/4] libceph: enable large, variable-sized OSD requests
  2016-02-10 11:24 [PATCH 0/4] wip-alloc-request Ilya Dryomov
                   ` (2 preceding siblings ...)
  2016-02-10 11:24 ` [PATCH 3/4] libceph: osdc->req_mempool should be backed by a slab pool Ilya Dryomov
@ 2016-02-10 11:24 ` Ilya Dryomov
  2016-02-10 11:56   ` Yan, Zheng
  3 siblings, 1 reply; 9+ messages in thread
From: Ilya Dryomov @ 2016-02-10 11:24 UTC (permalink / raw)
  To: ceph-devel; +Cc: Zheng Yan

Turn r_ops into a flexible array member to enable large, consisting of
up to 16 ops, OSD requests.  The use case is scattered writeback in
cephfs and, as far as the kernel client is concerned, 16 is just a made
up number.

r_ops had size 3 for copyup+hint+write, but copyup is really a special
case - it can only happen once.  ceph_osd_request_cache is therefore
stuffed with num_ops=2 requests, anything bigger than that is allocated
with kmalloc().  req_mempool is backed by ceph_osd_request_cache, which
means either num_ops=1 or num_ops=2 for use_mempool=true - all existing
users (ceph_writepages_start(), ceph_osdc_writepages()) are fine with
that.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c             |  2 --
 include/linux/ceph/osd_client.h |  6 ++++--
 net/ceph/osd_client.c           | 32 +++++++++++++++++++-------------
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 94f31bde73e8..08018710f363 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1847,8 +1847,6 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
 	if (osd_req->r_result < 0)
 		obj_request->result = osd_req->r_result;
 
-	rbd_assert(osd_req->r_num_ops <= CEPH_OSD_MAX_OP);
-
 	/*
 	 * We support a 64-bit length, but ultimately it has to be
 	 * passed to the block layer, which just supports a 32-bit
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index c6d1d603bacf..aada6a1383a4 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -43,7 +43,8 @@ struct ceph_osd {
 };
 
 
-#define CEPH_OSD_MAX_OP	3
+#define CEPH_OSD_SLAB_OPS	2
+#define CEPH_OSD_MAX_OPS	16
 
 enum ceph_osd_data_type {
 	CEPH_OSD_DATA_TYPE_NONE = 0,
@@ -139,7 +140,6 @@ struct ceph_osd_request {
 
 	/* request osd ops array  */
 	unsigned int		r_num_ops;
-	struct ceph_osd_req_op	r_ops[CEPH_OSD_MAX_OP];
 
 	/* these are updated on each send */
 	__le32           *r_request_osdmap_epoch;
@@ -175,6 +175,8 @@ struct ceph_osd_request {
 	unsigned long     r_stamp;            /* send OR check time */
 
 	struct ceph_snap_context *r_snapc;    /* snap context for writes */
+
+	struct ceph_osd_req_op r_ops[];
 };
 
 struct ceph_request_redirect {
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 8bf4f74064e5..37a0fc5273d1 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -338,9 +338,10 @@ static void ceph_osdc_release_request(struct kref *kref)
 	ceph_put_snap_context(req->r_snapc);
 	if (req->r_mempool)
 		mempool_free(req, req->r_osdc->req_mempool);
-	else
+	else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS)
 		kmem_cache_free(ceph_osd_request_cache, req);
-
+	else
+		kfree(req);
 }
 
 void ceph_osdc_get_request(struct ceph_osd_request *req)
@@ -369,9 +370,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	struct ceph_msg *msg;
 	size_t msg_size;
 
-	BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
-	BUG_ON(num_ops > CEPH_OSD_MAX_OP);
-
 	msg_size = 4 + 4 + 8 + 8 + 4+8;
 	msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
 	msg_size += 1 + 8 + 4 + 4;     /* pg_t */
@@ -383,14 +381,21 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	msg_size += 4;
 
 	if (use_mempool) {
+		BUG_ON(num_ops > CEPH_OSD_SLAB_OPS);
 		req = mempool_alloc(osdc->req_mempool, gfp_flags);
-		memset(req, 0, sizeof(*req));
+	} else if (num_ops <= CEPH_OSD_SLAB_OPS) {
+		req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags);
 	} else {
-		req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
+		BUG_ON(num_ops > CEPH_OSD_MAX_OPS);
+		req = kmalloc(sizeof(*req) + num_ops * sizeof(req->r_ops[0]),
+			      gfp_flags);
 	}
-	if (req == NULL)
+	if (unlikely(!req))
 		return NULL;
 
+	/* req only, each op is zeroed in _osd_req_op_init() */
+	memset(req, 0, sizeof(*req));
+
 	req->r_osdc = osdc;
 	req->r_mempool = use_mempool;
 	req->r_num_ops = num_ops;
@@ -1809,7 +1814,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 
 	ceph_decode_need(&p, end, 4, bad_put);
 	numops = ceph_decode_32(&p);
-	if (numops > CEPH_OSD_MAX_OP)
+	if (numops > CEPH_OSD_MAX_OPS)
 		goto bad_put;
 	if (numops != req->r_num_ops)
 		goto bad_put;
@@ -2773,11 +2778,12 @@ EXPORT_SYMBOL(ceph_osdc_writepages);
 
 int ceph_osdc_setup(void)
 {
+	size_t size = sizeof(struct ceph_osd_request) +
+	    CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
+
 	BUG_ON(ceph_osd_request_cache);
-	ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
-					sizeof (struct ceph_osd_request),
-					__alignof__(struct ceph_osd_request),
-					0, NULL);
+	ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size,
+						   0, 0, NULL);
 
 	return ceph_osd_request_cache ? 0 : -ENOMEM;
 }
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4] libceph: enable large, variable-sized OSD requests
  2016-02-10 11:24 ` [PATCH 4/4] libceph: enable large, variable-sized OSD requests Ilya Dryomov
@ 2016-02-10 11:56   ` Yan, Zheng
  2016-02-11  9:57     ` Ilya Dryomov
  0 siblings, 1 reply; 9+ messages in thread
From: Yan, Zheng @ 2016-02-10 11:56 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel


> On Feb 10, 2016, at 19:24, Ilya Dryomov <idryomov@gmail.com> wrote:
> 
> Turn r_ops into a flexible array member to enable large, consisting of
> up to 16 ops, OSD requests.  The use case is scattered writeback in
> cephfs and, as far as the kernel client is concerned, 16 is just a made
> up number.
> 
> r_ops had size 3 for copyup+hint+write, but copyup is really a special
> case - it can only happen once.  ceph_osd_request_cache is therefore
> stuffed with num_ops=2 requests, anything bigger than that is allocated
> with kmalloc().  req_mempool is backed by ceph_osd_request_cache, which
> means either num_ops=1 or num_ops=2 for use_mempool=true - all existing
> users (ceph_writepages_start(), ceph_osdc_writepages()) are fine with
> that.
> 
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
> ---
> drivers/block/rbd.c             |  2 --
> include/linux/ceph/osd_client.h |  6 ++++--
> net/ceph/osd_client.c           | 32 +++++++++++++++++++-------------
> 3 files changed, 23 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 94f31bde73e8..08018710f363 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -1847,8 +1847,6 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
> 	if (osd_req->r_result < 0)
> 		obj_request->result = osd_req->r_result;
> 
> -	rbd_assert(osd_req->r_num_ops <= CEPH_OSD_MAX_OP);
> -
> 	/*
> 	 * We support a 64-bit length, but ultimately it has to be
> 	 * passed to the block layer, which just supports a 32-bit
> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> index c6d1d603bacf..aada6a1383a4 100644
> --- a/include/linux/ceph/osd_client.h
> +++ b/include/linux/ceph/osd_client.h
> @@ -43,7 +43,8 @@ struct ceph_osd {
> };
> 
> 
> -#define CEPH_OSD_MAX_OP	3
> +#define CEPH_OSD_SLAB_OPS	2
> +#define CEPH_OSD_MAX_OPS	16
> 
> enum ceph_osd_data_type {
> 	CEPH_OSD_DATA_TYPE_NONE = 0,
> @@ -139,7 +140,6 @@ struct ceph_osd_request {
> 
> 	/* request osd ops array  */
> 	unsigned int		r_num_ops;
> -	struct ceph_osd_req_op	r_ops[CEPH_OSD_MAX_OP];
> 
> 	/* these are updated on each send */
> 	__le32           *r_request_osdmap_epoch;
> @@ -175,6 +175,8 @@ struct ceph_osd_request {
> 	unsigned long     r_stamp;            /* send OR check time */
> 
> 	struct ceph_snap_context *r_snapc;    /* snap context for writes */
> +
> +	struct ceph_osd_req_op r_ops[];
> };
> 
> struct ceph_request_redirect {
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index 8bf4f74064e5..37a0fc5273d1 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -338,9 +338,10 @@ static void ceph_osdc_release_request(struct kref *kref)
> 	ceph_put_snap_context(req->r_snapc);
> 	if (req->r_mempool)
> 		mempool_free(req, req->r_osdc->req_mempool);
> -	else
> +	else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS)
> 		kmem_cache_free(ceph_osd_request_cache, req);
> -
> +	else
> +		kfree(req);
> }
> 
> void ceph_osdc_get_request(struct ceph_osd_request *req)
> @@ -369,9 +370,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
> 	struct ceph_msg *msg;
> 	size_t msg_size;
> 
> -	BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
> -	BUG_ON(num_ops > CEPH_OSD_MAX_OP);
> -
> 	msg_size = 4 + 4 + 8 + 8 + 4+8;
> 	msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
> 	msg_size += 1 + 8 + 4 + 4;     /* pg_t */
> @@ -383,14 +381,21 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
> 	msg_size += 4;
> 
> 	if (use_mempool) {
> +		BUG_ON(num_ops > CEPH_OSD_SLAB_OPS);
> 		req = mempool_alloc(osdc->req_mempool, gfp_flags);
> -		memset(req, 0, sizeof(*req));
> +	} else if (num_ops <= CEPH_OSD_SLAB_OPS) {
> +		req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags);
> 	} else {
> -		req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
> +		BUG_ON(num_ops > CEPH_OSD_MAX_OPS);
> +		req = kmalloc(sizeof(*req) + num_ops * sizeof(req->r_ops[0]),
> +			      gfp_flags);
> 	}
> -	if (req == NULL)
> +	if (unlikely(!req))
> 		return NULL;
> 
> +	/* req only, each op is zeroed in _osd_req_op_init() */
> +	memset(req, 0, sizeof(*req));
> +
> 	req->r_osdc = osdc;
> 	req->r_mempool = use_mempool;
> 	req->r_num_ops = num_ops;
> @@ -1809,7 +1814,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
> 
> 	ceph_decode_need(&p, end, 4, bad_put);
> 	numops = ceph_decode_32(&p);
> -	if (numops > CEPH_OSD_MAX_OP)
> +	if (numops > CEPH_OSD_MAX_OPS)
> 		goto bad_put;
> 	if (numops != req->r_num_ops)
> 		goto bad_put;
> @@ -2773,11 +2778,12 @@ EXPORT_SYMBOL(ceph_osdc_writepages);
> 
> int ceph_osdc_setup(void)
> {
> +	size_t size = sizeof(struct ceph_osd_request) +
> +	    CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
> +
> 	BUG_ON(ceph_osd_request_cache);
> -	ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
> -					sizeof (struct ceph_osd_request),
> -					__alignof__(struct ceph_osd_request),
> -					0, NULL);
> +	ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size,
> +						   0, 0, NULL);
> 
> 	return ceph_osd_request_cache ? 0 : -ENOMEM;
> }

This approach is better than mine. But the code that calculates r_request/r_reply messages size get lost in your patch

Regards
Yan, Zheng

> -- 
> 2.4.3
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] libceph: rename ceph_osd_req_op::payload_len to indata_len
  2016-02-10 11:24 ` [PATCH 1/4] libceph: rename ceph_osd_req_op::payload_len to indata_len Ilya Dryomov
@ 2016-02-10 12:12   ` Yan, Zheng
  2016-02-10 12:23     ` Yan, Zheng
  0 siblings, 1 reply; 9+ messages in thread
From: Yan, Zheng @ 2016-02-10 12:12 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel


> On Feb 10, 2016, at 19:24, Ilya Dryomov <idryomov@gmail.com> wrote:
> 
> Follow userspace nomenclature on this - the next commit adds
> outdata_len.
> 
> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
> ---
> include/linux/ceph/osd_client.h |  2 +-
> net/ceph/osd_client.c           | 12 ++++++------
> 2 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> index 7506b485bb6d..35c8b006916f 100644
> --- a/include/linux/ceph/osd_client.h
> +++ b/include/linux/ceph/osd_client.h
> @@ -77,7 +77,7 @@ struct ceph_osd_data {
> struct ceph_osd_req_op {
> 	u16 op;           /* CEPH_OSD_OP_* */
> 	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
> -	u32 payload_len;
> +	u32 indata_len;   /* request */
> 	union {
> 		struct ceph_osd_data raw_data_in;
> 		struct {
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index a1babb77b813..38bdfafe9dd6 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -498,7 +498,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
> 	if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL)
> 		payload_len += length;
> 
> -	op->payload_len = payload_len;
> +	op->indata_len = payload_len;
> }
> EXPORT_SYMBOL(osd_req_op_extent_init);
> 
> @@ -517,7 +517,7 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
> 	BUG_ON(length > previous);
> 
> 	op->extent.length = length;
> -	op->payload_len -= previous - length;
> +	op->indata_len -= previous - length;
> }
> EXPORT_SYMBOL(os_req_op_extent_update);

this does not seem correct. os_req_op_extent_update() is used for both read and write.

 
> 
> @@ -554,7 +554,7 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
> 
> 	op->cls.argc = 0;	/* currently unused */
> 
> -	op->payload_len = payload_len;
> +	op->indata_len = payload_len;
> }
> EXPORT_SYMBOL(osd_req_op_cls_init);
> 
> @@ -587,7 +587,7 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
> 	op->xattr.cmp_mode = cmp_mode;
> 
> 	ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
> -	op->payload_len = payload_len;
> +	op->indata_len = payload_len;
> 	return 0;
> }
> EXPORT_SYMBOL(osd_req_op_xattr_init);
> @@ -707,7 +707,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
> 			BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
> 			dst->cls.indata_len = cpu_to_le32(data_length);
> 			ceph_osdc_msg_data_add(req->r_request, osd_data);
> -			src->payload_len += data_length;
> +			src->indata_len += data_length;
> 			request_data_len += data_length;
> 		}
> 		osd_data = &src->cls.response_data;
> @@ -750,7 +750,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
> 
> 	dst->op = cpu_to_le16(src->op);
> 	dst->flags = cpu_to_le32(src->flags);
> -	dst->payload_len = cpu_to_le32(src->payload_len);
> +	dst->payload_len = cpu_to_le32(src->indata_len);
> 
> 	return request_data_len;
> }
> -- 
> 2.4.3
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] libceph: rename ceph_osd_req_op::payload_len to indata_len
  2016-02-10 12:12   ` Yan, Zheng
@ 2016-02-10 12:23     ` Yan, Zheng
  0 siblings, 0 replies; 9+ messages in thread
From: Yan, Zheng @ 2016-02-10 12:23 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel


> On Feb 10, 2016, at 20:12, Yan, Zheng <zyan@redhat.com> wrote:
> 
> 
>> On Feb 10, 2016, at 19:24, Ilya Dryomov <idryomov@gmail.com> wrote:
>> 
>> Follow userspace nomenclature on this - the next commit adds
>> outdata_len.
>> 
>> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
>> ---
>> include/linux/ceph/osd_client.h |  2 +-
>> net/ceph/osd_client.c           | 12 ++++++------
>> 2 files changed, 7 insertions(+), 7 deletions(-)
>> 
>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
>> index 7506b485bb6d..35c8b006916f 100644
>> --- a/include/linux/ceph/osd_client.h
>> +++ b/include/linux/ceph/osd_client.h
>> @@ -77,7 +77,7 @@ struct ceph_osd_data {
>> struct ceph_osd_req_op {
>> 	u16 op;           /* CEPH_OSD_OP_* */
>> 	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
>> -	u32 payload_len;
>> +	u32 indata_len;   /* request */
>> 	union {
>> 		struct ceph_osd_data raw_data_in;
>> 		struct {
>> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
>> index a1babb77b813..38bdfafe9dd6 100644
>> --- a/net/ceph/osd_client.c
>> +++ b/net/ceph/osd_client.c
>> @@ -498,7 +498,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
>> 	if (opcode == CEPH_OSD_OP_WRITE || opcode == CEPH_OSD_OP_WRITEFULL)
>> 		payload_len += length;
>> 
>> -	op->payload_len = payload_len;
>> +	op->indata_len = payload_len;
>> }
>> EXPORT_SYMBOL(osd_req_op_extent_init);
>> 
>> @@ -517,7 +517,7 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
>> 	BUG_ON(length > previous);
>> 
>> 	op->extent.length = length;
>> -	op->payload_len -= previous - length;
>> +	op->indata_len -= previous - length;
>> }
>> EXPORT_SYMBOL(os_req_op_extent_update);
> 
> this does not seem correct. os_req_op_extent_update() is used for both read and write.

please ignore this comment

> 
> 
>> 
>> @@ -554,7 +554,7 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
>> 
>> 	op->cls.argc = 0;	/* currently unused */
>> 
>> -	op->payload_len = payload_len;
>> +	op->indata_len = payload_len;
>> }
>> EXPORT_SYMBOL(osd_req_op_cls_init);
>> 
>> @@ -587,7 +587,7 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
>> 	op->xattr.cmp_mode = cmp_mode;
>> 
>> 	ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
>> -	op->payload_len = payload_len;
>> +	op->indata_len = payload_len;
>> 	return 0;
>> }
>> EXPORT_SYMBOL(osd_req_op_xattr_init);
>> @@ -707,7 +707,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
>> 			BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
>> 			dst->cls.indata_len = cpu_to_le32(data_length);
>> 			ceph_osdc_msg_data_add(req->r_request, osd_data);
>> -			src->payload_len += data_length;
>> +			src->indata_len += data_length;
>> 			request_data_len += data_length;
>> 		}
>> 		osd_data = &src->cls.response_data;
>> @@ -750,7 +750,7 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
>> 
>> 	dst->op = cpu_to_le16(src->op);
>> 	dst->flags = cpu_to_le32(src->flags);
>> -	dst->payload_len = cpu_to_le32(src->payload_len);
>> +	dst->payload_len = cpu_to_le32(src->indata_len);
>> 
>> 	return request_data_len;
>> }
>> -- 
>> 2.4.3
>> 
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4] libceph: enable large, variable-sized OSD requests
  2016-02-10 11:56   ` Yan, Zheng
@ 2016-02-11  9:57     ` Ilya Dryomov
  0 siblings, 0 replies; 9+ messages in thread
From: Ilya Dryomov @ 2016-02-11  9:57 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: Ceph Development

On Wed, Feb 10, 2016 at 12:56 PM, Yan, Zheng <zyan@redhat.com> wrote:
>
>> On Feb 10, 2016, at 19:24, Ilya Dryomov <idryomov@gmail.com> wrote:
>>
>> Turn r_ops into a flexible array member to enable large, consisting of
>> up to 16 ops, OSD requests.  The use case is scattered writeback in
>> cephfs and, as far as the kernel client is concerned, 16 is just a made
>> up number.
>>
>> r_ops had size 3 for copyup+hint+write, but copyup is really a special
>> case - it can only happen once.  ceph_osd_request_cache is therefore
>> stuffed with num_ops=2 requests, anything bigger than that is allocated
>> with kmalloc().  req_mempool is backed by ceph_osd_request_cache, which
>> means either num_ops=1 or num_ops=2 for use_mempool=true - all existing
>> users (ceph_writepages_start(), ceph_osdc_writepages()) are fine with
>> that.
>>
>> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
>> ---
>> drivers/block/rbd.c             |  2 --
>> include/linux/ceph/osd_client.h |  6 ++++--
>> net/ceph/osd_client.c           | 32 +++++++++++++++++++-------------
>> 3 files changed, 23 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>> index 94f31bde73e8..08018710f363 100644
>> --- a/drivers/block/rbd.c
>> +++ b/drivers/block/rbd.c
>> @@ -1847,8 +1847,6 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
>>       if (osd_req->r_result < 0)
>>               obj_request->result = osd_req->r_result;
>>
>> -     rbd_assert(osd_req->r_num_ops <= CEPH_OSD_MAX_OP);
>> -
>>       /*
>>        * We support a 64-bit length, but ultimately it has to be
>>        * passed to the block layer, which just supports a 32-bit
>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
>> index c6d1d603bacf..aada6a1383a4 100644
>> --- a/include/linux/ceph/osd_client.h
>> +++ b/include/linux/ceph/osd_client.h
>> @@ -43,7 +43,8 @@ struct ceph_osd {
>> };
>>
>>
>> -#define CEPH_OSD_MAX_OP      3
>> +#define CEPH_OSD_SLAB_OPS    2
>> +#define CEPH_OSD_MAX_OPS     16
>>
>> enum ceph_osd_data_type {
>>       CEPH_OSD_DATA_TYPE_NONE = 0,
>> @@ -139,7 +140,6 @@ struct ceph_osd_request {
>>
>>       /* request osd ops array  */
>>       unsigned int            r_num_ops;
>> -     struct ceph_osd_req_op  r_ops[CEPH_OSD_MAX_OP];
>>
>>       /* these are updated on each send */
>>       __le32           *r_request_osdmap_epoch;
>> @@ -175,6 +175,8 @@ struct ceph_osd_request {
>>       unsigned long     r_stamp;            /* send OR check time */
>>
>>       struct ceph_snap_context *r_snapc;    /* snap context for writes */
>> +
>> +     struct ceph_osd_req_op r_ops[];
>> };
>>
>> struct ceph_request_redirect {
>> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
>> index 8bf4f74064e5..37a0fc5273d1 100644
>> --- a/net/ceph/osd_client.c
>> +++ b/net/ceph/osd_client.c
>> @@ -338,9 +338,10 @@ static void ceph_osdc_release_request(struct kref *kref)
>>       ceph_put_snap_context(req->r_snapc);
>>       if (req->r_mempool)
>>               mempool_free(req, req->r_osdc->req_mempool);
>> -     else
>> +     else if (req->r_num_ops <= CEPH_OSD_SLAB_OPS)
>>               kmem_cache_free(ceph_osd_request_cache, req);
>> -
>> +     else
>> +             kfree(req);
>> }
>>
>> void ceph_osdc_get_request(struct ceph_osd_request *req)
>> @@ -369,9 +370,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
>>       struct ceph_msg *msg;
>>       size_t msg_size;
>>
>> -     BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
>> -     BUG_ON(num_ops > CEPH_OSD_MAX_OP);
>> -
>>       msg_size = 4 + 4 + 8 + 8 + 4+8;
>>       msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
>>       msg_size += 1 + 8 + 4 + 4;     /* pg_t */
>> @@ -383,14 +381,21 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
>>       msg_size += 4;
>>
>>       if (use_mempool) {
>> +             BUG_ON(num_ops > CEPH_OSD_SLAB_OPS);
>>               req = mempool_alloc(osdc->req_mempool, gfp_flags);
>> -             memset(req, 0, sizeof(*req));
>> +     } else if (num_ops <= CEPH_OSD_SLAB_OPS) {
>> +             req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags);
>>       } else {
>> -             req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
>> +             BUG_ON(num_ops > CEPH_OSD_MAX_OPS);
>> +             req = kmalloc(sizeof(*req) + num_ops * sizeof(req->r_ops[0]),
>> +                           gfp_flags);
>>       }
>> -     if (req == NULL)
>> +     if (unlikely(!req))
>>               return NULL;
>>
>> +     /* req only, each op is zeroed in _osd_req_op_init() */
>> +     memset(req, 0, sizeof(*req));
>> +
>>       req->r_osdc = osdc;
>>       req->r_mempool = use_mempool;
>>       req->r_num_ops = num_ops;
>> @@ -1809,7 +1814,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
>>
>>       ceph_decode_need(&p, end, 4, bad_put);
>>       numops = ceph_decode_32(&p);
>> -     if (numops > CEPH_OSD_MAX_OP)
>> +     if (numops > CEPH_OSD_MAX_OPS)
>>               goto bad_put;
>>       if (numops != req->r_num_ops)
>>               goto bad_put;
>> @@ -2773,11 +2778,12 @@ EXPORT_SYMBOL(ceph_osdc_writepages);
>>
>> int ceph_osdc_setup(void)
>> {
>> +     size_t size = sizeof(struct ceph_osd_request) +
>> +         CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
>> +
>>       BUG_ON(ceph_osd_request_cache);
>> -     ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
>> -                                     sizeof (struct ceph_osd_request),
>> -                                     __alignof__(struct ceph_osd_request),
>> -                                     0, NULL);
>> +     ceph_osd_request_cache = kmem_cache_create("ceph_osd_request", size,
>> +                                                0, 0, NULL);
>>
>>       return ceph_osd_request_cache ? 0 : -ENOMEM;
>> }
>
> This approach is better than mine. But the code that calculates r_request/r_reply messages size get lost in your patch

r_request calculation is there, r_reply was missing because I wanted to
take another look at both r_request and r_reply - it can be a separate
commit though.  I'll pull wip-alloc-request into testing later today.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-02-11  9:57 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-10 11:24 [PATCH 0/4] wip-alloc-request Ilya Dryomov
2016-02-10 11:24 ` [PATCH 1/4] libceph: rename ceph_osd_req_op::payload_len to indata_len Ilya Dryomov
2016-02-10 12:12   ` Yan, Zheng
2016-02-10 12:23     ` Yan, Zheng
2016-02-10 11:24 ` [PATCH 2/4] libceph: move r_reply_op_{len,result} into struct ceph_osd_req_op Ilya Dryomov
2016-02-10 11:24 ` [PATCH 3/4] libceph: osdc->req_mempool should be backed by a slab pool Ilya Dryomov
2016-02-10 11:24 ` [PATCH 4/4] libceph: enable large, variable-sized OSD requests Ilya Dryomov
2016-02-10 11:56   ` Yan, Zheng
2016-02-11  9:57     ` Ilya Dryomov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.