* [PATCHv4 for-next 08/19] block/rnbd-clt: Replace {NO_WAIT,WAIT} with RTRS_PERMIT_{WAIT,NOWAIT}
[not found] <20210414122402.203388-1-gi-oh.kim@ionos.com>
@ 2021-04-14 12:23 ` Gioh Kim
2021-04-14 12:23 ` [PATCHv4 for-next 13/19] block/rnbd-clt: Support polling mode for IO latency optimization Gioh Kim
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: Gioh Kim @ 2021-04-14 12:23 UTC (permalink / raw)
To: linux-block
Cc: axboe, hch, sagi, bvanassche, haris.iqbal, jinpu.wang, Gioh Kim,
Leon Romanovsky, linux-rdma, Guoqing Jiang, Gioh Kim,
Chaitanya Kulkarni, Jason Gunthorpe
From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
They are defined with the same value and similar meaning, let's remove
one of them, then we can remove {WAIT,NOWAIT}.
Also change the type of 'wait' from 'int' to 'enum wait_type' to make
it clear.
Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: linux-rdma@vger.kernel.org
Signed-off-by: Guoqing Jiang <guoqing.jiang@ionos.com>
Reviewed-by: Md Haris Iqbal <haris.iqbal@ionos.com>
Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/block/rnbd/rnbd-clt.c | 42 +++++++++++---------------
drivers/infiniband/ulp/rtrs/rtrs-clt.c | 4 +--
drivers/infiniband/ulp/rtrs/rtrs.h | 6 ++--
3 files changed, 22 insertions(+), 30 deletions(-)
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 4e687ec88721..652b41cc4492 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -312,13 +312,11 @@ static void rnbd_rerun_all_if_idle(struct rnbd_clt_session *sess)
static struct rtrs_permit *rnbd_get_permit(struct rnbd_clt_session *sess,
enum rtrs_clt_con_type con_type,
- int wait)
+ enum wait_type wait)
{
struct rtrs_permit *permit;
- permit = rtrs_clt_get_permit(sess->rtrs, con_type,
- wait ? RTRS_PERMIT_WAIT :
- RTRS_PERMIT_NOWAIT);
+ permit = rtrs_clt_get_permit(sess->rtrs, con_type, wait);
if (likely(permit))
/* We have a subtle rare case here, when all permits can be
* consumed before busy counter increased. This is safe,
@@ -344,7 +342,7 @@ static void rnbd_put_permit(struct rnbd_clt_session *sess,
static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess,
enum rtrs_clt_con_type con_type,
- int wait)
+ enum wait_type wait)
{
struct rnbd_iu *iu;
struct rtrs_permit *permit;
@@ -354,9 +352,7 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess,
return NULL;
}
- permit = rnbd_get_permit(sess, con_type,
- wait ? RTRS_PERMIT_WAIT :
- RTRS_PERMIT_NOWAIT);
+ permit = rnbd_get_permit(sess, con_type, wait);
if (unlikely(!permit)) {
kfree(iu);
return NULL;
@@ -435,16 +431,11 @@ static void msg_conf(void *priv, int errno)
schedule_work(&iu->work);
}
-enum wait_type {
- NO_WAIT = 0,
- WAIT = 1
-};
-
static int send_usr_msg(struct rtrs_clt *rtrs, int dir,
struct rnbd_iu *iu, struct kvec *vec,
size_t len, struct scatterlist *sg, unsigned int sg_len,
void (*conf)(struct work_struct *work),
- int *errno, enum wait_type wait)
+ int *errno, int wait)
{
int err;
struct rtrs_clt_req_ops req_ops;
@@ -476,7 +467,8 @@ static void msg_close_conf(struct work_struct *work)
rnbd_clt_put_dev(dev);
}
-static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait)
+static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id,
+ enum wait_type wait)
{
struct rnbd_clt_session *sess = dev->sess;
struct rnbd_msg_close msg;
@@ -530,7 +522,7 @@ static void msg_open_conf(struct work_struct *work)
* If server thinks its fine, but we fail to process
* then be nice and send a close to server.
*/
- (void)send_msg_close(dev, device_id, NO_WAIT);
+ send_msg_close(dev, device_id, RTRS_PERMIT_NOWAIT);
}
}
kfree(rsp);
@@ -554,7 +546,7 @@ static void msg_sess_info_conf(struct work_struct *work)
rnbd_clt_put_sess(sess);
}
-static int send_msg_open(struct rnbd_clt_dev *dev, bool wait)
+static int send_msg_open(struct rnbd_clt_dev *dev, enum wait_type wait)
{
struct rnbd_clt_session *sess = dev->sess;
struct rnbd_msg_open_rsp *rsp;
@@ -601,7 +593,7 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait)
return err;
}
-static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait)
+static int send_msg_sess_info(struct rnbd_clt_session *sess, enum wait_type wait)
{
struct rnbd_msg_sess_info_rsp *rsp;
struct rnbd_msg_sess_info msg;
@@ -687,7 +679,7 @@ static void remap_devs(struct rnbd_clt_session *sess)
* be asynchronous.
*/
- err = send_msg_sess_info(sess, NO_WAIT);
+ err = send_msg_sess_info(sess, RTRS_PERMIT_NOWAIT);
if (err) {
pr_err("send_msg_sess_info(\"%s\"): %d\n", sess->sessname, err);
return;
@@ -711,7 +703,7 @@ static void remap_devs(struct rnbd_clt_session *sess)
continue;
rnbd_clt_info(dev, "session reconnected, remapping device\n");
- err = send_msg_open(dev, NO_WAIT);
+ err = send_msg_open(dev, RTRS_PERMIT_NOWAIT);
if (err) {
rnbd_clt_err(dev, "send_msg_open(): %d\n", err);
break;
@@ -1242,7 +1234,7 @@ find_and_get_or_create_sess(const char *sessname,
if (err)
goto close_rtrs;
- err = send_msg_sess_info(sess, WAIT);
+ err = send_msg_sess_info(sess, RTRS_PERMIT_WAIT);
if (err)
goto close_rtrs;
@@ -1525,7 +1517,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
ret = -EEXIST;
goto put_dev;
}
- ret = send_msg_open(dev, WAIT);
+ ret = send_msg_open(dev, RTRS_PERMIT_WAIT);
if (ret) {
rnbd_clt_err(dev,
"map_device: failed, can't open remote device, err: %d\n",
@@ -1559,7 +1551,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
return dev;
send_close:
- send_msg_close(dev, dev->device_id, WAIT);
+ send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT);
del_dev:
delete_dev(dev);
put_dev:
@@ -1619,7 +1611,7 @@ int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
destroy_sysfs(dev, sysfs_self);
destroy_gen_disk(dev);
if (was_mapped && sess->rtrs)
- send_msg_close(dev, dev->device_id, WAIT);
+ send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT);
rnbd_clt_info(dev, "Device is unmapped\n");
@@ -1653,7 +1645,7 @@ int rnbd_clt_remap_device(struct rnbd_clt_dev *dev)
mutex_unlock(&dev->lock);
if (!err) {
rnbd_clt_info(dev, "Remapping device.\n");
- err = send_msg_open(dev, WAIT);
+ err = send_msg_open(dev, RTRS_PERMIT_WAIT);
if (err)
rnbd_clt_err(dev, "remap_device: %d\n", err);
}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 0a08b4b742a3..7efd49bdc78c 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -103,11 +103,11 @@ static inline void __rtrs_put_permit(struct rtrs_clt *clt,
* up earlier.
*
* Context:
- * Can sleep if @wait == RTRS_TAG_WAIT
+ * Can sleep if @wait == RTRS_PERMIT_WAIT
*/
struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *clt,
enum rtrs_clt_con_type con_type,
- int can_wait)
+ enum wait_type can_wait)
{
struct rtrs_permit *permit;
DEFINE_WAIT(wait);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h
index 8738e90e715a..2db1b5eb3ab0 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs.h
@@ -63,9 +63,9 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
void rtrs_clt_close(struct rtrs_clt *sess);
-enum {
+enum wait_type {
RTRS_PERMIT_NOWAIT = 0,
- RTRS_PERMIT_WAIT = 1,
+ RTRS_PERMIT_WAIT = 1
};
/**
@@ -81,7 +81,7 @@ enum rtrs_clt_con_type {
struct rtrs_permit *rtrs_clt_get_permit(struct rtrs_clt *sess,
enum rtrs_clt_con_type con_type,
- int wait);
+ enum wait_type wait);
void rtrs_clt_put_permit(struct rtrs_clt *sess, struct rtrs_permit *permit);
--
2.25.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCHv4 for-next 13/19] block/rnbd-clt: Support polling mode for IO latency optimization
[not found] <20210414122402.203388-1-gi-oh.kim@ionos.com>
2021-04-14 12:23 ` [PATCHv4 for-next 08/19] block/rnbd-clt: Replace {NO_WAIT,WAIT} with RTRS_PERMIT_{WAIT,NOWAIT} Gioh Kim
@ 2021-04-14 12:23 ` Gioh Kim
2021-04-18 8:36 ` Leon Romanovsky
2021-04-14 12:23 ` [PATCHv4 for-next 15/19] block/rnbd-srv: Remove unused arguments of rnbd_srv_rdma_ev Gioh Kim
2021-04-14 12:24 ` [PATCHv4 for-next 17/19] block/rnbd-clt: Remove max_segment_size Gioh Kim
3 siblings, 1 reply; 10+ messages in thread
From: Gioh Kim @ 2021-04-14 12:23 UTC (permalink / raw)
To: linux-block
Cc: axboe, hch, sagi, bvanassche, haris.iqbal, jinpu.wang, Gioh Kim,
Leon Romanovsky, linux-rdma, Gioh Kim, Jason Gunthorpe
From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
RNBD can make double-queues for irq-mode and poll-mode.
For example, on 4-CPU system 8 request-queues are created,
4 for irq-mode and 4 for poll-mode.
If the IO has HIPRI flag, the block-layer will call .poll function
of RNBD. Then IO is sent to the poll-mode queue.
Add optional nr_poll_queues argument for map_devices interface.
To support polling of RNBD, RTRS client creates connections
for both of irq-mode and direct-poll-mode.
For example, on 4-CPU system it could've create 5 connections:
con[0] => user message (softirq cq)
con[1:4] => softirq cq
After this patch, it can create 9 connections:
con[0] => user message (softirq cq)
con[1:4] => softirq cq
con[5:8] => DIRECT-POLL cq
Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: linux-rdma@vger.kernel.org
Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/block/rnbd/rnbd-clt-sysfs.c | 56 +++++++++++++----
drivers/block/rnbd/rnbd-clt.c | 85 +++++++++++++++++++++++---
drivers/block/rnbd/rnbd-clt.h | 5 +-
drivers/infiniband/ulp/rtrs/rtrs-clt.c | 62 +++++++++++++++----
drivers/infiniband/ulp/rtrs/rtrs-pri.h | 1 +
drivers/infiniband/ulp/rtrs/rtrs.h | 3 +-
6 files changed, 178 insertions(+), 34 deletions(-)
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
index 49015f428e67..bd111ebceb75 100644
--- a/drivers/block/rnbd/rnbd-clt-sysfs.c
+++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
@@ -34,6 +34,7 @@ enum {
RNBD_OPT_DEV_PATH = 1 << 2,
RNBD_OPT_ACCESS_MODE = 1 << 3,
RNBD_OPT_SESSNAME = 1 << 6,
+ RNBD_OPT_NR_POLL_QUEUES = 1 << 7,
};
static const unsigned int rnbd_opt_mandatory[] = {
@@ -42,12 +43,13 @@ static const unsigned int rnbd_opt_mandatory[] = {
};
static const match_table_t rnbd_opt_tokens = {
- {RNBD_OPT_PATH, "path=%s" },
- {RNBD_OPT_DEV_PATH, "device_path=%s"},
- {RNBD_OPT_DEST_PORT, "dest_port=%d" },
- {RNBD_OPT_ACCESS_MODE, "access_mode=%s"},
- {RNBD_OPT_SESSNAME, "sessname=%s" },
- {RNBD_OPT_ERR, NULL },
+ {RNBD_OPT_PATH, "path=%s" },
+ {RNBD_OPT_DEV_PATH, "device_path=%s" },
+ {RNBD_OPT_DEST_PORT, "dest_port=%d" },
+ {RNBD_OPT_ACCESS_MODE, "access_mode=%s" },
+ {RNBD_OPT_SESSNAME, "sessname=%s" },
+ {RNBD_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" },
+ {RNBD_OPT_ERR, NULL },
};
struct rnbd_map_options {
@@ -57,6 +59,7 @@ struct rnbd_map_options {
char *pathname;
u16 *dest_port;
enum rnbd_access_mode *access_mode;
+ u32 *nr_poll_queues;
};
static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
@@ -68,7 +71,7 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
int opt_mask = 0;
int token;
int ret = -EINVAL;
- int i, dest_port;
+ int i, dest_port, nr_poll_queues;
int p_cnt = 0;
options = kstrdup(buf, GFP_KERNEL);
@@ -178,6 +181,19 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
kfree(p);
break;
+ case RNBD_OPT_NR_POLL_QUEUES:
+ if (match_int(args, &nr_poll_queues) || nr_poll_queues < -1 ||
+ nr_poll_queues > (int)nr_cpu_ids) {
+ pr_err("bad nr_poll_queues parameter '%d'\n",
+ nr_poll_queues);
+ ret = -EINVAL;
+ goto out;
+ }
+ if (nr_poll_queues == -1)
+ nr_poll_queues = nr_cpu_ids;
+ *opt->nr_poll_queues = nr_poll_queues;
+ break;
+
default:
pr_err("map_device: Unknown parameter or missing value '%s'\n",
p);
@@ -227,6 +243,20 @@ static ssize_t state_show(struct kobject *kobj,
static struct kobj_attribute rnbd_clt_state_attr = __ATTR_RO(state);
+static ssize_t nr_poll_queues_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ struct rnbd_clt_dev *dev;
+
+ dev = container_of(kobj, struct rnbd_clt_dev, kobj);
+
+ return snprintf(page, PAGE_SIZE, "%d\n",
+ dev->nr_poll_queues);
+}
+
+static struct kobj_attribute rnbd_clt_nr_poll_queues =
+ __ATTR_RO(nr_poll_queues);
+
static ssize_t mapping_path_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
@@ -421,6 +451,7 @@ static struct attribute *rnbd_dev_attrs[] = {
&rnbd_clt_state_attr.attr,
&rnbd_clt_session_attr.attr,
&rnbd_clt_access_mode.attr,
+ &rnbd_clt_nr_poll_queues.attr,
NULL,
};
@@ -469,7 +500,7 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj,
char *page)
{
return scnprintf(page, PAGE_SIZE,
- "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
+ "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
attr->attr.name);
}
@@ -541,6 +572,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
char sessname[NAME_MAX];
enum rnbd_access_mode access_mode = RNBD_ACCESS_RW;
u16 port_nr = RTRS_PORT;
+ u32 nr_poll_queues = 0;
struct sockaddr_storage *addrs;
struct rtrs_addr paths[6];
@@ -552,6 +584,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
opt.pathname = pathname;
opt.dest_port = &port_nr;
opt.access_mode = &access_mode;
+ opt.nr_poll_queues = &nr_poll_queues;
addrs = kcalloc(ARRAY_SIZE(paths) * 2, sizeof(*addrs), GFP_KERNEL);
if (!addrs)
return -ENOMEM;
@@ -565,12 +598,13 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
if (ret)
goto out;
- pr_info("Mapping device %s on session %s, (access_mode: %s)\n",
+ pr_info("Mapping device %s on session %s, (access_mode: %s, nr_poll_queues: %d)\n",
pathname, sessname,
- rnbd_access_mode_str(access_mode));
+ rnbd_access_mode_str(access_mode),
+ nr_poll_queues);
dev = rnbd_clt_map_device(sessname, paths, path_cnt, port_nr, pathname,
- access_mode);
+ access_mode, nr_poll_queues);
if (IS_ERR(dev)) {
ret = PTR_ERR(dev);
goto out;
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 9b44aac680d5..63719ec04d58 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1165,9 +1165,54 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
return ret;
}
+static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx)
+{
+ struct rnbd_queue *q = hctx->driver_data;
+ struct rnbd_clt_dev *dev = q->dev;
+ int cnt;
+
+ cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num);
+ return cnt;
+}
+
+static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
+{
+ struct rnbd_clt_session *sess = set->driver_data;
+
+ /* shared read/write queues */
+ set->map[HCTX_TYPE_DEFAULT].nr_queues = num_online_cpus();
+ set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+ set->map[HCTX_TYPE_READ].nr_queues = num_online_cpus();
+ set->map[HCTX_TYPE_READ].queue_offset = 0;
+ blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
+ blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
+
+ if (sess->nr_poll_queues) {
+ /* dedicated queue for poll */
+ set->map[HCTX_TYPE_POLL].nr_queues = sess->nr_poll_queues;
+ set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_READ].queue_offset +
+ set->map[HCTX_TYPE_READ].nr_queues;
+ blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
+ pr_info("[session=%s] mapped %d/%d/%d default/read/poll queues.\n",
+ sess->sessname,
+ set->map[HCTX_TYPE_DEFAULT].nr_queues,
+ set->map[HCTX_TYPE_READ].nr_queues,
+ set->map[HCTX_TYPE_POLL].nr_queues);
+ } else {
+ pr_info("[session=%s] mapped %d/%d default/read queues.\n",
+ sess->sessname,
+ set->map[HCTX_TYPE_DEFAULT].nr_queues,
+ set->map[HCTX_TYPE_READ].nr_queues);
+ }
+
+ return 0;
+}
+
static struct blk_mq_ops rnbd_mq_ops = {
.queue_rq = rnbd_queue_rq,
.complete = rnbd_softirq_done_fn,
+ .map_queues = rnbd_rdma_map_queues,
+ .poll = rnbd_rdma_poll,
};
static int setup_mq_tags(struct rnbd_clt_session *sess)
@@ -1181,7 +1226,15 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
tag_set->flags = BLK_MQ_F_SHOULD_MERGE |
BLK_MQ_F_TAG_QUEUE_SHARED;
tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE;
- tag_set->nr_hw_queues = num_online_cpus();
+
+ /* for HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL */
+ tag_set->nr_maps = sess->nr_poll_queues ? HCTX_MAX_TYPES : 2;
+ /*
+ * HCTX_TYPE_DEFAULT and HCTX_TYPE_READ share one set of queues
+ * others are for HCTX_TYPE_POLL
+ */
+ tag_set->nr_hw_queues = num_online_cpus() + sess->nr_poll_queues;
+ tag_set->driver_data = sess;
return blk_mq_alloc_tag_set(tag_set);
}
@@ -1189,7 +1242,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
static struct rnbd_clt_session *
find_and_get_or_create_sess(const char *sessname,
const struct rtrs_addr *paths,
- size_t path_cnt, u16 port_nr)
+ size_t path_cnt, u16 port_nr, u32 nr_poll_queues)
{
struct rnbd_clt_session *sess;
struct rtrs_attrs attrs;
@@ -1198,6 +1251,17 @@ find_and_get_or_create_sess(const char *sessname,
struct rtrs_clt_ops rtrs_ops;
sess = find_or_create_sess(sessname, &first);
+ if (sess == ERR_PTR(-ENOMEM))
+ return ERR_PTR(-ENOMEM);
+ else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) {
+ /*
+ * A device MUST have its own session to use the polling-mode.
+ * It must fail to map new device with the same session.
+ */
+ err = -EINVAL;
+ goto put_sess;
+ }
+
if (!first)
return sess;
@@ -1219,7 +1283,7 @@ find_and_get_or_create_sess(const char *sessname,
0, /* Do not use pdu of rtrs */
RECONNECT_DELAY, BMAX_SEGMENTS,
BLK_MAX_SEGMENT_SIZE,
- MAX_RECONNECTS);
+ MAX_RECONNECTS, nr_poll_queues);
if (IS_ERR(sess->rtrs)) {
err = PTR_ERR(sess->rtrs);
goto wake_up_and_put;
@@ -1227,6 +1291,7 @@ find_and_get_or_create_sess(const char *sessname,
rtrs_clt_query(sess->rtrs, &attrs);
sess->max_io_size = attrs.max_io_size;
sess->queue_depth = attrs.queue_depth;
+ sess->nr_poll_queues = nr_poll_queues;
err = setup_mq_tags(sess);
if (err)
@@ -1370,7 +1435,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
enum rnbd_access_mode access_mode,
- const char *pathname)
+ const char *pathname,
+ u32 nr_poll_queues)
{
struct rnbd_clt_dev *dev;
int ret;
@@ -1379,7 +1445,8 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
if (!dev)
return ERR_PTR(-ENOMEM);
- dev->hw_queues = kcalloc(nr_cpu_ids, sizeof(*dev->hw_queues),
+ dev->hw_queues = kcalloc(nr_cpu_ids /* softirq */ + nr_poll_queues /* poll */,
+ sizeof(*dev->hw_queues),
GFP_KERNEL);
if (!dev->hw_queues) {
ret = -ENOMEM;
@@ -1405,6 +1472,7 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
dev->clt_device_id = ret;
dev->sess = sess;
dev->access_mode = access_mode;
+ dev->nr_poll_queues = nr_poll_queues;
mutex_init(&dev->lock);
refcount_set(&dev->refcount, 1);
dev->dev_state = DEV_STATE_INIT;
@@ -1491,7 +1559,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
struct rtrs_addr *paths,
size_t path_cnt, u16 port_nr,
const char *pathname,
- enum rnbd_access_mode access_mode)
+ enum rnbd_access_mode access_mode,
+ u32 nr_poll_queues)
{
struct rnbd_clt_session *sess;
struct rnbd_clt_dev *dev;
@@ -1500,11 +1569,11 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
if (unlikely(exists_devpath(pathname, sessname)))
return ERR_PTR(-EEXIST);
- sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr);
+ sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues);
if (IS_ERR(sess))
return ERR_CAST(sess);
- dev = init_dev(sess, access_mode, pathname);
+ dev = init_dev(sess, access_mode, pathname, nr_poll_queues);
if (IS_ERR(dev)) {
pr_err("map_device: failed to map device '%s' from session %s, can't initialize device, err: %ld\n",
pathname, sess->sessname, PTR_ERR(dev));
diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h
index 714d426b449b..451e7383738f 100644
--- a/drivers/block/rnbd/rnbd-clt.h
+++ b/drivers/block/rnbd/rnbd-clt.h
@@ -90,6 +90,7 @@ struct rnbd_clt_session {
int queue_depth;
u32 max_io_size;
struct blk_mq_tag_set tag_set;
+ u32 nr_poll_queues;
struct mutex lock; /* protects state and devs_list */
struct list_head devs_list; /* list of struct rnbd_clt_dev */
refcount_t refcount;
@@ -118,6 +119,7 @@ struct rnbd_clt_dev {
enum rnbd_clt_dev_state dev_state;
char *pathname;
enum rnbd_access_mode access_mode;
+ u32 nr_poll_queues;
bool read_only;
bool rotational;
bool wc;
@@ -147,7 +149,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
struct rtrs_addr *paths,
size_t path_cnt, u16 port_nr,
const char *pathname,
- enum rnbd_access_mode access_mode);
+ enum rnbd_access_mode access_mode,
+ u32 nr_poll_queues);
int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
const struct attribute *sysfs_self);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 7efd49bdc78c..467d135a82cf 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -174,7 +174,7 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
int id = 0;
if (likely(permit->con_type == RTRS_IO_CON))
- id = (permit->cpu_id % (sess->s.con_num - 1)) + 1;
+ id = (permit->cpu_id % (sess->s.irq_con_num - 1)) + 1;
return to_clt_con(sess->s.con[id]);
}
@@ -1400,23 +1400,29 @@ static void rtrs_clt_close_work(struct work_struct *work);
static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
const struct rtrs_addr *path,
size_t con_num, u16 max_segments,
- size_t max_segment_size)
+ size_t max_segment_size, u32 nr_poll_queues)
{
struct rtrs_clt_sess *sess;
int err = -ENOMEM;
int cpu;
+ size_t total_con;
sess = kzalloc(sizeof(*sess), GFP_KERNEL);
if (!sess)
goto err;
- /* Extra connection for user messages */
- con_num += 1;
-
- sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL);
+ /*
+ * irqmode and poll
+ * +1: Extra connection for user messages
+ */
+ total_con = con_num + nr_poll_queues + 1;
+ sess->s.con = kcalloc(total_con, sizeof(*sess->s.con), GFP_KERNEL);
if (!sess->s.con)
goto err_free_sess;
+ sess->s.con_num = total_con;
+ sess->s.irq_con_num = con_num + 1;
+
sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL);
if (!sess->stats)
goto err_free_con;
@@ -1435,7 +1441,6 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
memcpy(&sess->s.src_addr, path->src,
rdma_addr_size((struct sockaddr *)path->src));
strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname));
- sess->s.con_num = con_num;
sess->clt = clt;
sess->max_pages_per_mr = max_segments * max_segment_size >> 12;
init_waitqueue_head(&sess->state_wq);
@@ -1576,9 +1581,14 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
}
cq_size = max_send_wr + max_recv_wr;
cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors;
- err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
- cq_vector, cq_size, max_send_wr,
- max_recv_wr, IB_POLL_SOFTIRQ);
+ if (con->c.cid >= sess->s.irq_con_num)
+ err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
+ cq_vector, cq_size, max_send_wr,
+ max_recv_wr, IB_POLL_DIRECT);
+ else
+ err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
+ cq_vector, cq_size, max_send_wr,
+ max_recv_wr, IB_POLL_SOFTIRQ);
/*
* In case of error we do not bother to clean previous allocations,
* since destroy_con_cq_qp() must be called.
@@ -2631,6 +2641,7 @@ static void free_clt(struct rtrs_clt *clt)
* @max_segment_size: Max. size of one segment
* @max_reconnect_attempts: Number of times to reconnect on error before giving
* up, 0 for * disabled, -1 for forever
+ * @nr_poll_queues: number of polling mode connection using IB_POLL_DIRECT flag
*
* Starts session establishment with the rtrs_server. The function can block
* up to ~2000ms before it returns.
@@ -2644,7 +2655,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
size_t pdu_sz, u8 reconnect_delay_sec,
u16 max_segments,
size_t max_segment_size,
- s16 max_reconnect_attempts)
+ s16 max_reconnect_attempts, u32 nr_poll_queues)
{
struct rtrs_clt_sess *sess, *tmp;
struct rtrs_clt *clt;
@@ -2662,7 +2673,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
struct rtrs_clt_sess *sess;
sess = alloc_sess(clt, &paths[i], nr_cpu_ids,
- max_segments, max_segment_size);
+ max_segments, max_segment_size, nr_poll_queues);
if (IS_ERR(sess)) {
err = PTR_ERR(sess);
goto close_all_sess;
@@ -2887,6 +2898,31 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
}
EXPORT_SYMBOL(rtrs_clt_request);
+int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
+{
+ int cnt;
+ struct rtrs_con *con;
+ struct rtrs_clt_sess *sess;
+ struct path_it it;
+
+ rcu_read_lock();
+ for (path_it_init(&it, clt);
+ (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
+ if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED))
+ continue;
+
+ con = sess->s.con[index + 1];
+ cnt = ib_process_cq_direct(con->cq, -1);
+ if (likely(cnt))
+ break;
+ }
+ path_it_deinit(&it);
+ rcu_read_unlock();
+
+ return cnt;
+}
+EXPORT_SYMBOL(rtrs_clt_rdma_cq_direct);
+
/**
* rtrs_clt_query() - queries RTRS session attributes
*@clt: session pointer
@@ -2916,7 +2952,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
int err;
sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments,
- clt->max_segment_size);
+ clt->max_segment_size, 0);
if (IS_ERR(sess))
return PTR_ERR(sess);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index 8caad0a2322b..00eb45053339 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -101,6 +101,7 @@ struct rtrs_sess {
uuid_t uuid;
struct rtrs_con **con;
unsigned int con_num;
+ unsigned int irq_con_num;
unsigned int recon_cnt;
struct rtrs_ib_dev *dev;
int dev_ref;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h
index 2db1b5eb3ab0..f891fbe7abe6 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs.h
@@ -59,7 +59,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
size_t pdu_sz, u8 reconnect_delay_sec,
u16 max_segments,
size_t max_segment_size,
- s16 max_reconnect_attempts);
+ s16 max_reconnect_attempts, u32 nr_poll_queues);
void rtrs_clt_close(struct rtrs_clt *sess);
@@ -103,6 +103,7 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
struct rtrs_clt *sess, struct rtrs_permit *permit,
const struct kvec *vec, size_t nr, size_t len,
struct scatterlist *sg, unsigned int sg_cnt);
+int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index);
/**
* rtrs_attrs - RTRS session attributes
--
2.25.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCHv4 for-next 15/19] block/rnbd-srv: Remove unused arguments of rnbd_srv_rdma_ev
[not found] <20210414122402.203388-1-gi-oh.kim@ionos.com>
2021-04-14 12:23 ` [PATCHv4 for-next 08/19] block/rnbd-clt: Replace {NO_WAIT,WAIT} with RTRS_PERMIT_{WAIT,NOWAIT} Gioh Kim
2021-04-14 12:23 ` [PATCHv4 for-next 13/19] block/rnbd-clt: Support polling mode for IO latency optimization Gioh Kim
@ 2021-04-14 12:23 ` Gioh Kim
2021-04-14 12:24 ` [PATCHv4 for-next 17/19] block/rnbd-clt: Remove max_segment_size Gioh Kim
3 siblings, 0 replies; 10+ messages in thread
From: Gioh Kim @ 2021-04-14 12:23 UTC (permalink / raw)
To: linux-block
Cc: axboe, hch, sagi, bvanassche, haris.iqbal, jinpu.wang, Gioh Kim,
Leon Romanovsky, linux-rdma, Aleksei Marov, Gioh Kim,
Chaitanya Kulkarni, Jason Gunthorpe
From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
struct rtrs_srv is not used when handling rnbd_srv_rdma_ev messages, so
cleaned up
rdma_ev function pointer in rtrs_srv_ops also is changed.
Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: linux-rdma@vger.kernel.org
Signed-off-by: Aleksei Marov <aleksei.marov@ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/block/rnbd/rnbd-srv.c | 39 ++++++++++----------------
drivers/infiniband/ulp/rtrs/rtrs-srv.c | 4 +--
drivers/infiniband/ulp/rtrs/rtrs.h | 3 +-
3 files changed, 18 insertions(+), 28 deletions(-)
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index a9bb414f7442..abacd9ef10d6 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -114,8 +114,7 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
return sess_dev;
}
-static int process_rdma(struct rtrs_srv *sess,
- struct rnbd_srv_session *srv_sess,
+static int process_rdma(struct rnbd_srv_session *srv_sess,
struct rtrs_srv_op *id, void *data, u32 datalen,
const void *usr, size_t usrlen)
{
@@ -344,8 +343,7 @@ void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev,
mutex_unlock(&sess->lock);
}
-static int process_msg_close(struct rtrs_srv *rtrs,
- struct rnbd_srv_session *srv_sess,
+static int process_msg_close(struct rnbd_srv_session *srv_sess,
void *data, size_t datalen, const void *usr,
size_t usrlen)
{
@@ -364,20 +362,18 @@ static int process_msg_close(struct rtrs_srv *rtrs,
return 0;
}
-static int process_msg_open(struct rtrs_srv *rtrs,
- struct rnbd_srv_session *srv_sess,
+static int process_msg_open(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen);
-static int process_msg_sess_info(struct rtrs_srv *rtrs,
- struct rnbd_srv_session *srv_sess,
+static int process_msg_sess_info(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen);
-static int rnbd_srv_rdma_ev(struct rtrs_srv *rtrs, void *priv,
- struct rtrs_srv_op *id, int dir,
- void *data, size_t datalen, const void *usr,
- size_t usrlen)
+static int rnbd_srv_rdma_ev(void *priv,
+ struct rtrs_srv_op *id, int dir,
+ void *data, size_t datalen, const void *usr,
+ size_t usrlen)
{
struct rnbd_srv_session *srv_sess = priv;
const struct rnbd_msg_hdr *hdr = usr;
@@ -391,19 +387,16 @@ static int rnbd_srv_rdma_ev(struct rtrs_srv *rtrs, void *priv,
switch (type) {
case RNBD_MSG_IO:
- return process_rdma(rtrs, srv_sess, id, data, datalen, usr,
- usrlen);
+ return process_rdma(srv_sess, id, data, datalen, usr, usrlen);
case RNBD_MSG_CLOSE:
- ret = process_msg_close(rtrs, srv_sess, data, datalen,
- usr, usrlen);
+ ret = process_msg_close(srv_sess, data, datalen, usr, usrlen);
break;
case RNBD_MSG_OPEN:
- ret = process_msg_open(rtrs, srv_sess, usr, usrlen,
- data, datalen);
+ ret = process_msg_open(srv_sess, usr, usrlen, data, datalen);
break;
case RNBD_MSG_SESS_INFO:
- ret = process_msg_sess_info(rtrs, srv_sess, usr, usrlen,
- data, datalen);
+ ret = process_msg_sess_info(srv_sess, usr, usrlen, data,
+ datalen);
break;
default:
pr_warn("Received unexpected message type %d with dir %d from session %s\n",
@@ -656,8 +649,7 @@ static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess,
return full_path;
}
-static int process_msg_sess_info(struct rtrs_srv *rtrs,
- struct rnbd_srv_session *srv_sess,
+static int process_msg_sess_info(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen)
{
@@ -698,8 +690,7 @@ find_srv_sess_dev(struct rnbd_srv_session *srv_sess, const char *dev_name)
return NULL;
}
-static int process_msg_open(struct rtrs_srv *rtrs,
- struct rnbd_srv_session *srv_sess,
+static int process_msg_open(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen)
{
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index d071809e3ed2..f7aa2a7e7442 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -998,7 +998,7 @@ static void process_read(struct rtrs_srv_con *con,
usr_len = le16_to_cpu(msg->usr_len);
data_len = off - usr_len;
data = page_address(srv->chunks[buf_id]);
- ret = ctx->ops.rdma_ev(srv, srv->priv, id, READ, data, data_len,
+ ret = ctx->ops.rdma_ev(srv->priv, id, READ, data, data_len,
data + data_len, usr_len);
if (unlikely(ret)) {
@@ -1051,7 +1051,7 @@ static void process_write(struct rtrs_srv_con *con,
usr_len = le16_to_cpu(req->usr_len);
data_len = off - usr_len;
data = page_address(srv->chunks[buf_id]);
- ret = ctx->ops.rdma_ev(srv, srv->priv, id, WRITE, data, data_len,
+ ret = ctx->ops.rdma_ev(srv->priv, id, WRITE, data, data_len,
data + data_len, usr_len);
if (unlikely(ret)) {
rtrs_err_rl(s,
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h
index f891fbe7abe6..b0f56ffeff88 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs.h
@@ -139,7 +139,6 @@ struct rtrs_srv_ops {
* message for the data transfer will be sent to
* the client.
- * @sess: Session
* @priv: Private data set by rtrs_srv_set_sess_priv()
* @id: internal RTRS operation id
* @dir: READ/WRITE
@@ -153,7 +152,7 @@ struct rtrs_srv_ops {
* @usr: The extra user message sent by the client (%vec)
* @usrlen: Size of the user message
*/
- int (*rdma_ev)(struct rtrs_srv *sess, void *priv,
+ int (*rdma_ev)(void *priv,
struct rtrs_srv_op *id, int dir,
void *data, size_t datalen, const void *usr,
size_t usrlen);
--
2.25.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCHv4 for-next 17/19] block/rnbd-clt: Remove max_segment_size
[not found] <20210414122402.203388-1-gi-oh.kim@ionos.com>
` (2 preceding siblings ...)
2021-04-14 12:23 ` [PATCHv4 for-next 15/19] block/rnbd-srv: Remove unused arguments of rnbd_srv_rdma_ev Gioh Kim
@ 2021-04-14 12:24 ` Gioh Kim
3 siblings, 0 replies; 10+ messages in thread
From: Gioh Kim @ 2021-04-14 12:24 UTC (permalink / raw)
To: linux-block
Cc: axboe, hch, sagi, bvanassche, haris.iqbal, jinpu.wang, Jack Wang,
Leon Romanovsky, linux-rdma, Gioh Kim, Jason Gunthorpe
From: Jack Wang <jinpu.wang@cloud.ionos.com>
We always map with SZ_4K, so do not need max_segment_size.
Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: linux-rdma@vger.kernel.org
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Md Haris Iqbal <haris.iqbal@ionos.com>
Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
---
drivers/block/rnbd/rnbd-clt.c | 1 -
drivers/infiniband/ulp/rtrs/rtrs-clt.c | 15 +++++----------
drivers/infiniband/ulp/rtrs/rtrs-clt.h | 1 -
drivers/infiniband/ulp/rtrs/rtrs.h | 1 -
4 files changed, 5 insertions(+), 13 deletions(-)
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 1fe010ed6f69..7446660eb7f2 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1289,7 +1289,6 @@ find_and_get_or_create_sess(const char *sessname,
paths, path_cnt, port_nr,
0, /* Do not use pdu of rtrs */
RECONNECT_DELAY, BMAX_SEGMENTS,
- BLK_MAX_SEGMENT_SIZE,
MAX_RECONNECTS, nr_poll_queues);
if (IS_ERR(sess->rtrs)) {
err = PTR_ERR(sess->rtrs);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 467d135a82cf..1603e0c399e8 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -1400,7 +1400,7 @@ static void rtrs_clt_close_work(struct work_struct *work);
static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
const struct rtrs_addr *path,
size_t con_num, u16 max_segments,
- size_t max_segment_size, u32 nr_poll_queues)
+ u32 nr_poll_queues)
{
struct rtrs_clt_sess *sess;
int err = -ENOMEM;
@@ -1442,7 +1442,7 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
rdma_addr_size((struct sockaddr *)path->src));
strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname));
sess->clt = clt;
- sess->max_pages_per_mr = max_segments * max_segment_size >> 12;
+ sess->max_pages_per_mr = max_segments;
init_waitqueue_head(&sess->state_wq);
sess->state = RTRS_CLT_CONNECTING;
atomic_set(&sess->connected_cnt, 0);
@@ -2538,7 +2538,6 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
void (*link_ev)(void *priv,
enum rtrs_clt_link_ev ev),
unsigned int max_segments,
- size_t max_segment_size,
unsigned int reconnect_delay_sec,
unsigned int max_reconnect_attempts)
{
@@ -2568,7 +2567,6 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num,
clt->port = port;
clt->pdu_sz = pdu_sz;
clt->max_segments = max_segments;
- clt->max_segment_size = max_segment_size;
clt->reconnect_delay_sec = reconnect_delay_sec;
clt->max_reconnect_attempts = max_reconnect_attempts;
clt->priv = priv;
@@ -2638,7 +2636,6 @@ static void free_clt(struct rtrs_clt *clt)
* @pdu_sz: Size of extra payload which can be accessed after permit allocation.
* @reconnect_delay_sec: time between reconnect tries
* @max_segments: Max. number of segments per IO request
- * @max_segment_size: Max. size of one segment
* @max_reconnect_attempts: Number of times to reconnect on error before giving
* up, 0 for * disabled, -1 for forever
* @nr_poll_queues: number of polling mode connection using IB_POLL_DIRECT flag
@@ -2654,7 +2651,6 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
size_t paths_num, u16 port,
size_t pdu_sz, u8 reconnect_delay_sec,
u16 max_segments,
- size_t max_segment_size,
s16 max_reconnect_attempts, u32 nr_poll_queues)
{
struct rtrs_clt_sess *sess, *tmp;
@@ -2663,7 +2659,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv,
ops->link_ev,
- max_segments, max_segment_size, reconnect_delay_sec,
+ max_segments, reconnect_delay_sec,
max_reconnect_attempts);
if (IS_ERR(clt)) {
err = PTR_ERR(clt);
@@ -2673,7 +2669,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
struct rtrs_clt_sess *sess;
sess = alloc_sess(clt, &paths[i], nr_cpu_ids,
- max_segments, max_segment_size, nr_poll_queues);
+ max_segments, nr_poll_queues);
if (IS_ERR(sess)) {
err = PTR_ERR(sess);
goto close_all_sess;
@@ -2951,8 +2947,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
struct rtrs_clt_sess *sess;
int err;
- sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments,
- clt->max_segment_size, 0);
+ sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments, 0);
if (IS_ERR(sess))
return PTR_ERR(sess);
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
index 692bc83e1f09..98ba5d0a48b8 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
@@ -166,7 +166,6 @@ struct rtrs_clt {
unsigned int max_reconnect_attempts;
unsigned int reconnect_delay_sec;
unsigned int max_segments;
- size_t max_segment_size;
void *permits;
unsigned long *permits_map;
size_t queue_depth;
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h
index b0f56ffeff88..bebaa94c4728 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs.h
@@ -58,7 +58,6 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
size_t path_cnt, u16 port,
size_t pdu_sz, u8 reconnect_delay_sec,
u16 max_segments,
- size_t max_segment_size,
s16 max_reconnect_attempts, u32 nr_poll_queues);
void rtrs_clt_close(struct rtrs_clt *sess);
--
2.25.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCHv4 for-next 13/19] block/rnbd-clt: Support polling mode for IO latency optimization
2021-04-14 12:23 ` [PATCHv4 for-next 13/19] block/rnbd-clt: Support polling mode for IO latency optimization Gioh Kim
@ 2021-04-18 8:36 ` Leon Romanovsky
2021-04-19 5:12 ` Gioh Kim
0 siblings, 1 reply; 10+ messages in thread
From: Leon Romanovsky @ 2021-04-18 8:36 UTC (permalink / raw)
To: Gioh Kim
Cc: linux-block, axboe, hch, sagi, bvanassche, haris.iqbal,
jinpu.wang, Gioh Kim, linux-rdma, Jason Gunthorpe
On Wed, Apr 14, 2021 at 02:23:56PM +0200, Gioh Kim wrote:
> From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
>
> RNBD can make double-queues for irq-mode and poll-mode.
> For example, on 4-CPU system 8 request-queues are created,
> 4 for irq-mode and 4 for poll-mode.
> If the IO has HIPRI flag, the block-layer will call .poll function
> of RNBD. Then IO is sent to the poll-mode queue.
> Add optional nr_poll_queues argument for map_devices interface.
>
> To support polling of RNBD, RTRS client creates connections
> for both of irq-mode and direct-poll-mode.
>
> For example, on 4-CPU system it could've create 5 connections:
> con[0] => user message (softirq cq)
> con[1:4] => softirq cq
>
> After this patch, it can create 9 connections:
> con[0] => user message (softirq cq)
> con[1:4] => softirq cq
> con[5:8] => DIRECT-POLL cq
>
> Cc: Leon Romanovsky <leonro@nvidia.com>
> Cc: linux-rdma@vger.kernel.org
> Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
> Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
> Acked-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
> drivers/block/rnbd/rnbd-clt-sysfs.c | 56 +++++++++++++----
> drivers/block/rnbd/rnbd-clt.c | 85 +++++++++++++++++++++++---
> drivers/block/rnbd/rnbd-clt.h | 5 +-
> drivers/infiniband/ulp/rtrs/rtrs-clt.c | 62 +++++++++++++++----
> drivers/infiniband/ulp/rtrs/rtrs-pri.h | 1 +
> drivers/infiniband/ulp/rtrs/rtrs.h | 3 +-
> 6 files changed, 178 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
> index 49015f428e67..bd111ebceb75 100644
> --- a/drivers/block/rnbd/rnbd-clt-sysfs.c
> +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
> @@ -34,6 +34,7 @@ enum {
> RNBD_OPT_DEV_PATH = 1 << 2,
> RNBD_OPT_ACCESS_MODE = 1 << 3,
> RNBD_OPT_SESSNAME = 1 << 6,
> + RNBD_OPT_NR_POLL_QUEUES = 1 << 7,
> };
>
> static const unsigned int rnbd_opt_mandatory[] = {
> @@ -42,12 +43,13 @@ static const unsigned int rnbd_opt_mandatory[] = {
> };
>
> static const match_table_t rnbd_opt_tokens = {
> - {RNBD_OPT_PATH, "path=%s" },
> - {RNBD_OPT_DEV_PATH, "device_path=%s"},
> - {RNBD_OPT_DEST_PORT, "dest_port=%d" },
> - {RNBD_OPT_ACCESS_MODE, "access_mode=%s"},
> - {RNBD_OPT_SESSNAME, "sessname=%s" },
> - {RNBD_OPT_ERR, NULL },
> + {RNBD_OPT_PATH, "path=%s" },
> + {RNBD_OPT_DEV_PATH, "device_path=%s" },
> + {RNBD_OPT_DEST_PORT, "dest_port=%d" },
> + {RNBD_OPT_ACCESS_MODE, "access_mode=%s" },
> + {RNBD_OPT_SESSNAME, "sessname=%s" },
> + {RNBD_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" },
> + {RNBD_OPT_ERR, NULL },
> };
>
> struct rnbd_map_options {
> @@ -57,6 +59,7 @@ struct rnbd_map_options {
> char *pathname;
> u16 *dest_port;
> enum rnbd_access_mode *access_mode;
> + u32 *nr_poll_queues;
> };
>
> static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
> @@ -68,7 +71,7 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
> int opt_mask = 0;
> int token;
> int ret = -EINVAL;
> - int i, dest_port;
> + int i, dest_port, nr_poll_queues;
> int p_cnt = 0;
>
> options = kstrdup(buf, GFP_KERNEL);
> @@ -178,6 +181,19 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
> kfree(p);
> break;
>
> + case RNBD_OPT_NR_POLL_QUEUES:
> + if (match_int(args, &nr_poll_queues) || nr_poll_queues < -1 ||
> + nr_poll_queues > (int)nr_cpu_ids) {
> + pr_err("bad nr_poll_queues parameter '%d'\n",
> + nr_poll_queues);
> + ret = -EINVAL;
> + goto out;
> + }
> + if (nr_poll_queues == -1)
> + nr_poll_queues = nr_cpu_ids;
> + *opt->nr_poll_queues = nr_poll_queues;
> + break;
> +
> default:
> pr_err("map_device: Unknown parameter or missing value '%s'\n",
> p);
> @@ -227,6 +243,20 @@ static ssize_t state_show(struct kobject *kobj,
>
> static struct kobj_attribute rnbd_clt_state_attr = __ATTR_RO(state);
>
> +static ssize_t nr_poll_queues_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *page)
> +{
> + struct rnbd_clt_dev *dev;
> +
> + dev = container_of(kobj, struct rnbd_clt_dev, kobj);
> +
> + return snprintf(page, PAGE_SIZE, "%d\n",
> + dev->nr_poll_queues);
> +}
Didn't Greg ask you to use sysfs_emit() here?
> +
> +static struct kobj_attribute rnbd_clt_nr_poll_queues =
> + __ATTR_RO(nr_poll_queues);
> +
> static ssize_t mapping_path_show(struct kobject *kobj,
> struct kobj_attribute *attr, char *page)
> {
> @@ -421,6 +451,7 @@ static struct attribute *rnbd_dev_attrs[] = {
> &rnbd_clt_state_attr.attr,
> &rnbd_clt_session_attr.attr,
> &rnbd_clt_access_mode.attr,
> + &rnbd_clt_nr_poll_queues.attr,
> NULL,
> };
>
> @@ -469,7 +500,7 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj,
> char *page)
> {
> return scnprintf(page, PAGE_SIZE,
> - "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
> + "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
> attr->attr.name);
> }
>
> @@ -541,6 +572,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
> char sessname[NAME_MAX];
> enum rnbd_access_mode access_mode = RNBD_ACCESS_RW;
> u16 port_nr = RTRS_PORT;
> + u32 nr_poll_queues = 0;
>
> struct sockaddr_storage *addrs;
> struct rtrs_addr paths[6];
> @@ -552,6 +584,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
> opt.pathname = pathname;
> opt.dest_port = &port_nr;
> opt.access_mode = &access_mode;
> + opt.nr_poll_queues = &nr_poll_queues;
> addrs = kcalloc(ARRAY_SIZE(paths) * 2, sizeof(*addrs), GFP_KERNEL);
> if (!addrs)
> return -ENOMEM;
> @@ -565,12 +598,13 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
> if (ret)
> goto out;
>
> - pr_info("Mapping device %s on session %s, (access_mode: %s)\n",
> + pr_info("Mapping device %s on session %s, (access_mode: %s, nr_poll_queues: %d)\n",
> pathname, sessname,
> - rnbd_access_mode_str(access_mode));
> + rnbd_access_mode_str(access_mode),
> + nr_poll_queues);
>
> dev = rnbd_clt_map_device(sessname, paths, path_cnt, port_nr, pathname,
> - access_mode);
> + access_mode, nr_poll_queues);
> if (IS_ERR(dev)) {
> ret = PTR_ERR(dev);
> goto out;
> diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
> index 9b44aac680d5..63719ec04d58 100644
> --- a/drivers/block/rnbd/rnbd-clt.c
> +++ b/drivers/block/rnbd/rnbd-clt.c
> @@ -1165,9 +1165,54 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
> return ret;
> }
>
> +static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx)
> +{
> + struct rnbd_queue *q = hctx->driver_data;
> + struct rnbd_clt_dev *dev = q->dev;
> + int cnt;
> +
> + cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num);
> + return cnt;
> +}
> +
> +static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
> +{
> + struct rnbd_clt_session *sess = set->driver_data;
> +
> + /* shared read/write queues */
> + set->map[HCTX_TYPE_DEFAULT].nr_queues = num_online_cpus();
> + set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> + set->map[HCTX_TYPE_READ].nr_queues = num_online_cpus();
> + set->map[HCTX_TYPE_READ].queue_offset = 0;
> + blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
> + blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
> +
> + if (sess->nr_poll_queues) {
> + /* dedicated queue for poll */
> + set->map[HCTX_TYPE_POLL].nr_queues = sess->nr_poll_queues;
> + set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_READ].queue_offset +
> + set->map[HCTX_TYPE_READ].nr_queues;
> + blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
> + pr_info("[session=%s] mapped %d/%d/%d default/read/poll queues.\n",
> + sess->sessname,
> + set->map[HCTX_TYPE_DEFAULT].nr_queues,
> + set->map[HCTX_TYPE_READ].nr_queues,
> + set->map[HCTX_TYPE_POLL].nr_queues);
> + } else {
> + pr_info("[session=%s] mapped %d/%d default/read queues.\n",
> + sess->sessname,
> + set->map[HCTX_TYPE_DEFAULT].nr_queues,
> + set->map[HCTX_TYPE_READ].nr_queues);
> + }
> +
> + return 0;
> +}
> +
> static struct blk_mq_ops rnbd_mq_ops = {
> .queue_rq = rnbd_queue_rq,
> .complete = rnbd_softirq_done_fn,
> + .map_queues = rnbd_rdma_map_queues,
> + .poll = rnbd_rdma_poll,
> };
>
> static int setup_mq_tags(struct rnbd_clt_session *sess)
> @@ -1181,7 +1226,15 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
> tag_set->flags = BLK_MQ_F_SHOULD_MERGE |
> BLK_MQ_F_TAG_QUEUE_SHARED;
> tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE;
> - tag_set->nr_hw_queues = num_online_cpus();
> +
> + /* for HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL */
> + tag_set->nr_maps = sess->nr_poll_queues ? HCTX_MAX_TYPES : 2;
> + /*
> + * HCTX_TYPE_DEFAULT and HCTX_TYPE_READ share one set of queues
> + * others are for HCTX_TYPE_POLL
> + */
> + tag_set->nr_hw_queues = num_online_cpus() + sess->nr_poll_queues;
> + tag_set->driver_data = sess;
>
> return blk_mq_alloc_tag_set(tag_set);
> }
> @@ -1189,7 +1242,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
> static struct rnbd_clt_session *
> find_and_get_or_create_sess(const char *sessname,
> const struct rtrs_addr *paths,
> - size_t path_cnt, u16 port_nr)
> + size_t path_cnt, u16 port_nr, u32 nr_poll_queues)
> {
> struct rnbd_clt_session *sess;
> struct rtrs_attrs attrs;
> @@ -1198,6 +1251,17 @@ find_and_get_or_create_sess(const char *sessname,
> struct rtrs_clt_ops rtrs_ops;
>
> sess = find_or_create_sess(sessname, &first);
> + if (sess == ERR_PTR(-ENOMEM))
> + return ERR_PTR(-ENOMEM);
> + else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) {
> + /*
> + * A device MUST have its own session to use the polling-mode.
> + * It must fail to map new device with the same session.
> + */
> + err = -EINVAL;
> + goto put_sess;
> + }
> +
> if (!first)
> return sess;
>
> @@ -1219,7 +1283,7 @@ find_and_get_or_create_sess(const char *sessname,
> 0, /* Do not use pdu of rtrs */
> RECONNECT_DELAY, BMAX_SEGMENTS,
> BLK_MAX_SEGMENT_SIZE,
> - MAX_RECONNECTS);
> + MAX_RECONNECTS, nr_poll_queues);
> if (IS_ERR(sess->rtrs)) {
> err = PTR_ERR(sess->rtrs);
> goto wake_up_and_put;
> @@ -1227,6 +1291,7 @@ find_and_get_or_create_sess(const char *sessname,
> rtrs_clt_query(sess->rtrs, &attrs);
> sess->max_io_size = attrs.max_io_size;
> sess->queue_depth = attrs.queue_depth;
> + sess->nr_poll_queues = nr_poll_queues;
>
> err = setup_mq_tags(sess);
> if (err)
> @@ -1370,7 +1435,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
>
> static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
> enum rnbd_access_mode access_mode,
> - const char *pathname)
> + const char *pathname,
> + u32 nr_poll_queues)
> {
> struct rnbd_clt_dev *dev;
> int ret;
> @@ -1379,7 +1445,8 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
> if (!dev)
> return ERR_PTR(-ENOMEM);
>
> - dev->hw_queues = kcalloc(nr_cpu_ids, sizeof(*dev->hw_queues),
> + dev->hw_queues = kcalloc(nr_cpu_ids /* softirq */ + nr_poll_queues /* poll */,
Please don't add comments in the middle of function call.
> + sizeof(*dev->hw_queues),
> GFP_KERNEL);
> if (!dev->hw_queues) {
> ret = -ENOMEM;
> @@ -1405,6 +1472,7 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
> dev->clt_device_id = ret;
> dev->sess = sess;
> dev->access_mode = access_mode;
> + dev->nr_poll_queues = nr_poll_queues;
> mutex_init(&dev->lock);
> refcount_set(&dev->refcount, 1);
> dev->dev_state = DEV_STATE_INIT;
> @@ -1491,7 +1559,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
> struct rtrs_addr *paths,
> size_t path_cnt, u16 port_nr,
> const char *pathname,
> - enum rnbd_access_mode access_mode)
> + enum rnbd_access_mode access_mode,
> + u32 nr_poll_queues)
> {
> struct rnbd_clt_session *sess;
> struct rnbd_clt_dev *dev;
> @@ -1500,11 +1569,11 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
> if (unlikely(exists_devpath(pathname, sessname)))
> return ERR_PTR(-EEXIST);
>
> - sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr);
> + sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues);
> if (IS_ERR(sess))
> return ERR_CAST(sess);
>
> - dev = init_dev(sess, access_mode, pathname);
> + dev = init_dev(sess, access_mode, pathname, nr_poll_queues);
> if (IS_ERR(dev)) {
> pr_err("map_device: failed to map device '%s' from session %s, can't initialize device, err: %ld\n",
> pathname, sess->sessname, PTR_ERR(dev));
> diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h
> index 714d426b449b..451e7383738f 100644
> --- a/drivers/block/rnbd/rnbd-clt.h
> +++ b/drivers/block/rnbd/rnbd-clt.h
> @@ -90,6 +90,7 @@ struct rnbd_clt_session {
> int queue_depth;
> u32 max_io_size;
> struct blk_mq_tag_set tag_set;
> + u32 nr_poll_queues;
> struct mutex lock; /* protects state and devs_list */
> struct list_head devs_list; /* list of struct rnbd_clt_dev */
> refcount_t refcount;
> @@ -118,6 +119,7 @@ struct rnbd_clt_dev {
> enum rnbd_clt_dev_state dev_state;
> char *pathname;
> enum rnbd_access_mode access_mode;
> + u32 nr_poll_queues;
> bool read_only;
> bool rotational;
> bool wc;
> @@ -147,7 +149,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
> struct rtrs_addr *paths,
> size_t path_cnt, u16 port_nr,
> const char *pathname,
> - enum rnbd_access_mode access_mode);
> + enum rnbd_access_mode access_mode,
> + u32 nr_poll_queues);
> int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
> const struct attribute *sysfs_self);
>
> diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
> index 7efd49bdc78c..467d135a82cf 100644
> --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
> +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
> @@ -174,7 +174,7 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
> int id = 0;
>
> if (likely(permit->con_type == RTRS_IO_CON))
> - id = (permit->cpu_id % (sess->s.con_num - 1)) + 1;
> + id = (permit->cpu_id % (sess->s.irq_con_num - 1)) + 1;
>
> return to_clt_con(sess->s.con[id]);
> }
> @@ -1400,23 +1400,29 @@ static void rtrs_clt_close_work(struct work_struct *work);
> static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
> const struct rtrs_addr *path,
> size_t con_num, u16 max_segments,
> - size_t max_segment_size)
> + size_t max_segment_size, u32 nr_poll_queues)
> {
> struct rtrs_clt_sess *sess;
> int err = -ENOMEM;
> int cpu;
> + size_t total_con;
>
> sess = kzalloc(sizeof(*sess), GFP_KERNEL);
> if (!sess)
> goto err;
>
> - /* Extra connection for user messages */
> - con_num += 1;
> -
> - sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL);
> + /*
> + * irqmode and poll
> + * +1: Extra connection for user messages
> + */
> + total_con = con_num + nr_poll_queues + 1;
> + sess->s.con = kcalloc(total_con, sizeof(*sess->s.con), GFP_KERNEL);
> if (!sess->s.con)
> goto err_free_sess;
>
> + sess->s.con_num = total_con;
> + sess->s.irq_con_num = con_num + 1;
> +
> sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL);
> if (!sess->stats)
> goto err_free_con;
> @@ -1435,7 +1441,6 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
> memcpy(&sess->s.src_addr, path->src,
> rdma_addr_size((struct sockaddr *)path->src));
> strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname));
> - sess->s.con_num = con_num;
> sess->clt = clt;
> sess->max_pages_per_mr = max_segments * max_segment_size >> 12;
> init_waitqueue_head(&sess->state_wq);
> @@ -1576,9 +1581,14 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
> }
> cq_size = max_send_wr + max_recv_wr;
> cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors;
> - err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
> - cq_vector, cq_size, max_send_wr,
> - max_recv_wr, IB_POLL_SOFTIRQ);
> + if (con->c.cid >= sess->s.irq_con_num)
> + err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
> + cq_vector, cq_size, max_send_wr,
> + max_recv_wr, IB_POLL_DIRECT);
> + else
> + err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
> + cq_vector, cq_size, max_send_wr,
> + max_recv_wr, IB_POLL_SOFTIRQ);
> /*
> * In case of error we do not bother to clean previous allocations,
> * since destroy_con_cq_qp() must be called.
> @@ -2631,6 +2641,7 @@ static void free_clt(struct rtrs_clt *clt)
> * @max_segment_size: Max. size of one segment
> * @max_reconnect_attempts: Number of times to reconnect on error before giving
> * up, 0 for * disabled, -1 for forever
> + * @nr_poll_queues: number of polling mode connection using IB_POLL_DIRECT flag
> *
> * Starts session establishment with the rtrs_server. The function can block
> * up to ~2000ms before it returns.
> @@ -2644,7 +2655,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
> size_t pdu_sz, u8 reconnect_delay_sec,
> u16 max_segments,
> size_t max_segment_size,
> - s16 max_reconnect_attempts)
> + s16 max_reconnect_attempts, u32 nr_poll_queues)
> {
> struct rtrs_clt_sess *sess, *tmp;
> struct rtrs_clt *clt;
> @@ -2662,7 +2673,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
> struct rtrs_clt_sess *sess;
>
> sess = alloc_sess(clt, &paths[i], nr_cpu_ids,
> - max_segments, max_segment_size);
> + max_segments, max_segment_size, nr_poll_queues);
> if (IS_ERR(sess)) {
> err = PTR_ERR(sess);
> goto close_all_sess;
> @@ -2887,6 +2898,31 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
> }
> EXPORT_SYMBOL(rtrs_clt_request);
>
> +int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
> +{
> + int cnt;
> + struct rtrs_con *con;
> + struct rtrs_clt_sess *sess;
> + struct path_it it;
> +
> + rcu_read_lock();
> + for (path_it_init(&it, clt);
> + (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
> + if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED))
We talked about useless likely/unlikely in your workloads.
> + continue;
> +
> + con = sess->s.con[index + 1];
> + cnt = ib_process_cq_direct(con->cq, -1);
> + if (likely(cnt))
> + break;
> + }
> + path_it_deinit(&it);
> + rcu_read_unlock();
> +
> + return cnt;
> +}
> +EXPORT_SYMBOL(rtrs_clt_rdma_cq_direct);
> +
> /**
> * rtrs_clt_query() - queries RTRS session attributes
> *@clt: session pointer
> @@ -2916,7 +2952,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
> int err;
>
> sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments,
> - clt->max_segment_size);
> + clt->max_segment_size, 0);
> if (IS_ERR(sess))
> return PTR_ERR(sess);
>
> diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
> index 8caad0a2322b..00eb45053339 100644
> --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
> +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
> @@ -101,6 +101,7 @@ struct rtrs_sess {
> uuid_t uuid;
> struct rtrs_con **con;
> unsigned int con_num;
> + unsigned int irq_con_num;
> unsigned int recon_cnt;
> struct rtrs_ib_dev *dev;
> int dev_ref;
> diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h
> index 2db1b5eb3ab0..f891fbe7abe6 100644
> --- a/drivers/infiniband/ulp/rtrs/rtrs.h
> +++ b/drivers/infiniband/ulp/rtrs/rtrs.h
> @@ -59,7 +59,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
> size_t pdu_sz, u8 reconnect_delay_sec,
> u16 max_segments,
> size_t max_segment_size,
> - s16 max_reconnect_attempts);
> + s16 max_reconnect_attempts, u32 nr_poll_queues);
>
> void rtrs_clt_close(struct rtrs_clt *sess);
>
> @@ -103,6 +103,7 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
> struct rtrs_clt *sess, struct rtrs_permit *permit,
> const struct kvec *vec, size_t nr, size_t len,
> struct scatterlist *sg, unsigned int sg_cnt);
> +int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index);
>
> /**
> * rtrs_attrs - RTRS session attributes
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCHv4 for-next 13/19] block/rnbd-clt: Support polling mode for IO latency optimization
2021-04-18 8:36 ` Leon Romanovsky
@ 2021-04-19 5:12 ` Gioh Kim
2021-04-19 5:20 ` Leon Romanovsky
0 siblings, 1 reply; 10+ messages in thread
From: Gioh Kim @ 2021-04-19 5:12 UTC (permalink / raw)
To: Leon Romanovsky
Cc: linux-block, Jens Axboe, hch, sagi, Bart Van Assche, Haris Iqbal,
Jinpu Wang, Gioh Kim, linux-rdma, Jason Gunthorpe
On Sun, Apr 18, 2021 at 10:36 AM Leon Romanovsky <leon@kernel.org> wrote:
>
> On Wed, Apr 14, 2021 at 02:23:56PM +0200, Gioh Kim wrote:
> > From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
> >
> > RNBD can make double-queues for irq-mode and poll-mode.
> > For example, on 4-CPU system 8 request-queues are created,
> > 4 for irq-mode and 4 for poll-mode.
> > If the IO has HIPRI flag, the block-layer will call .poll function
> > of RNBD. Then IO is sent to the poll-mode queue.
> > Add optional nr_poll_queues argument for map_devices interface.
> >
> > To support polling of RNBD, RTRS client creates connections
> > for both of irq-mode and direct-poll-mode.
> >
> > For example, on 4-CPU system it could've create 5 connections:
> > con[0] => user message (softirq cq)
> > con[1:4] => softirq cq
> >
> > After this patch, it can create 9 connections:
> > con[0] => user message (softirq cq)
> > con[1:4] => softirq cq
> > con[5:8] => DIRECT-POLL cq
> >
> > Cc: Leon Romanovsky <leonro@nvidia.com>
> > Cc: linux-rdma@vger.kernel.org
> > Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
> > Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
> > Acked-by: Jason Gunthorpe <jgg@nvidia.com>
> > ---
> > drivers/block/rnbd/rnbd-clt-sysfs.c | 56 +++++++++++++----
> > drivers/block/rnbd/rnbd-clt.c | 85 +++++++++++++++++++++++---
> > drivers/block/rnbd/rnbd-clt.h | 5 +-
> > drivers/infiniband/ulp/rtrs/rtrs-clt.c | 62 +++++++++++++++----
> > drivers/infiniband/ulp/rtrs/rtrs-pri.h | 1 +
> > drivers/infiniband/ulp/rtrs/rtrs.h | 3 +-
> > 6 files changed, 178 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
> > index 49015f428e67..bd111ebceb75 100644
> > --- a/drivers/block/rnbd/rnbd-clt-sysfs.c
> > +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
> > @@ -34,6 +34,7 @@ enum {
> > RNBD_OPT_DEV_PATH = 1 << 2,
> > RNBD_OPT_ACCESS_MODE = 1 << 3,
> > RNBD_OPT_SESSNAME = 1 << 6,
> > + RNBD_OPT_NR_POLL_QUEUES = 1 << 7,
> > };
> >
> > static const unsigned int rnbd_opt_mandatory[] = {
> > @@ -42,12 +43,13 @@ static const unsigned int rnbd_opt_mandatory[] = {
> > };
> >
> > static const match_table_t rnbd_opt_tokens = {
> > - {RNBD_OPT_PATH, "path=%s" },
> > - {RNBD_OPT_DEV_PATH, "device_path=%s"},
> > - {RNBD_OPT_DEST_PORT, "dest_port=%d" },
> > - {RNBD_OPT_ACCESS_MODE, "access_mode=%s"},
> > - {RNBD_OPT_SESSNAME, "sessname=%s" },
> > - {RNBD_OPT_ERR, NULL },
> > + {RNBD_OPT_PATH, "path=%s" },
> > + {RNBD_OPT_DEV_PATH, "device_path=%s" },
> > + {RNBD_OPT_DEST_PORT, "dest_port=%d" },
> > + {RNBD_OPT_ACCESS_MODE, "access_mode=%s" },
> > + {RNBD_OPT_SESSNAME, "sessname=%s" },
> > + {RNBD_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" },
> > + {RNBD_OPT_ERR, NULL },
> > };
> >
> > struct rnbd_map_options {
> > @@ -57,6 +59,7 @@ struct rnbd_map_options {
> > char *pathname;
> > u16 *dest_port;
> > enum rnbd_access_mode *access_mode;
> > + u32 *nr_poll_queues;
> > };
> >
> > static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
> > @@ -68,7 +71,7 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
> > int opt_mask = 0;
> > int token;
> > int ret = -EINVAL;
> > - int i, dest_port;
> > + int i, dest_port, nr_poll_queues;
> > int p_cnt = 0;
> >
> > options = kstrdup(buf, GFP_KERNEL);
> > @@ -178,6 +181,19 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt,
> > kfree(p);
> > break;
> >
> > + case RNBD_OPT_NR_POLL_QUEUES:
> > + if (match_int(args, &nr_poll_queues) || nr_poll_queues < -1 ||
> > + nr_poll_queues > (int)nr_cpu_ids) {
> > + pr_err("bad nr_poll_queues parameter '%d'\n",
> > + nr_poll_queues);
> > + ret = -EINVAL;
> > + goto out;
> > + }
> > + if (nr_poll_queues == -1)
> > + nr_poll_queues = nr_cpu_ids;
> > + *opt->nr_poll_queues = nr_poll_queues;
> > + break;
> > +
> > default:
> > pr_err("map_device: Unknown parameter or missing value '%s'\n",
> > p);
> > @@ -227,6 +243,20 @@ static ssize_t state_show(struct kobject *kobj,
> >
> > static struct kobj_attribute rnbd_clt_state_attr = __ATTR_RO(state);
> >
> > +static ssize_t nr_poll_queues_show(struct kobject *kobj,
> > + struct kobj_attribute *attr, char *page)
> > +{
> > + struct rnbd_clt_dev *dev;
> > +
> > + dev = container_of(kobj, struct rnbd_clt_dev, kobj);
> > +
> > + return snprintf(page, PAGE_SIZE, "%d\n",
> > + dev->nr_poll_queues);
> > +}
>
> Didn't Greg ask you to use sysfs_emit() here?
Right, I missed it.
I will fix it for next round.
>
> > +
> > +static struct kobj_attribute rnbd_clt_nr_poll_queues =
> > + __ATTR_RO(nr_poll_queues);
> > +
> > static ssize_t mapping_path_show(struct kobject *kobj,
> > struct kobj_attribute *attr, char *page)
> > {
> > @@ -421,6 +451,7 @@ static struct attribute *rnbd_dev_attrs[] = {
> > &rnbd_clt_state_attr.attr,
> > &rnbd_clt_session_attr.attr,
> > &rnbd_clt_access_mode.attr,
> > + &rnbd_clt_nr_poll_queues.attr,
> > NULL,
> > };
> >
> > @@ -469,7 +500,7 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj,
> > char *page)
> > {
> > return scnprintf(page, PAGE_SIZE,
> > - "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
> > + "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n",
> > attr->attr.name);
> > }
> >
> > @@ -541,6 +572,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
> > char sessname[NAME_MAX];
> > enum rnbd_access_mode access_mode = RNBD_ACCESS_RW;
> > u16 port_nr = RTRS_PORT;
> > + u32 nr_poll_queues = 0;
> >
> > struct sockaddr_storage *addrs;
> > struct rtrs_addr paths[6];
> > @@ -552,6 +584,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
> > opt.pathname = pathname;
> > opt.dest_port = &port_nr;
> > opt.access_mode = &access_mode;
> > + opt.nr_poll_queues = &nr_poll_queues;
> > addrs = kcalloc(ARRAY_SIZE(paths) * 2, sizeof(*addrs), GFP_KERNEL);
> > if (!addrs)
> > return -ENOMEM;
> > @@ -565,12 +598,13 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
> > if (ret)
> > goto out;
> >
> > - pr_info("Mapping device %s on session %s, (access_mode: %s)\n",
> > + pr_info("Mapping device %s on session %s, (access_mode: %s, nr_poll_queues: %d)\n",
> > pathname, sessname,
> > - rnbd_access_mode_str(access_mode));
> > + rnbd_access_mode_str(access_mode),
> > + nr_poll_queues);
> >
> > dev = rnbd_clt_map_device(sessname, paths, path_cnt, port_nr, pathname,
> > - access_mode);
> > + access_mode, nr_poll_queues);
> > if (IS_ERR(dev)) {
> > ret = PTR_ERR(dev);
> > goto out;
> > diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
> > index 9b44aac680d5..63719ec04d58 100644
> > --- a/drivers/block/rnbd/rnbd-clt.c
> > +++ b/drivers/block/rnbd/rnbd-clt.c
> > @@ -1165,9 +1165,54 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
> > return ret;
> > }
> >
> > +static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx)
> > +{
> > + struct rnbd_queue *q = hctx->driver_data;
> > + struct rnbd_clt_dev *dev = q->dev;
> > + int cnt;
> > +
> > + cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num);
> > + return cnt;
> > +}
> > +
> > +static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set)
> > +{
> > + struct rnbd_clt_session *sess = set->driver_data;
> > +
> > + /* shared read/write queues */
> > + set->map[HCTX_TYPE_DEFAULT].nr_queues = num_online_cpus();
> > + set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> > + set->map[HCTX_TYPE_READ].nr_queues = num_online_cpus();
> > + set->map[HCTX_TYPE_READ].queue_offset = 0;
> > + blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
> > + blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
> > +
> > + if (sess->nr_poll_queues) {
> > + /* dedicated queue for poll */
> > + set->map[HCTX_TYPE_POLL].nr_queues = sess->nr_poll_queues;
> > + set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_READ].queue_offset +
> > + set->map[HCTX_TYPE_READ].nr_queues;
> > + blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
> > + pr_info("[session=%s] mapped %d/%d/%d default/read/poll queues.\n",
> > + sess->sessname,
> > + set->map[HCTX_TYPE_DEFAULT].nr_queues,
> > + set->map[HCTX_TYPE_READ].nr_queues,
> > + set->map[HCTX_TYPE_POLL].nr_queues);
> > + } else {
> > + pr_info("[session=%s] mapped %d/%d default/read queues.\n",
> > + sess->sessname,
> > + set->map[HCTX_TYPE_DEFAULT].nr_queues,
> > + set->map[HCTX_TYPE_READ].nr_queues);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > static struct blk_mq_ops rnbd_mq_ops = {
> > .queue_rq = rnbd_queue_rq,
> > .complete = rnbd_softirq_done_fn,
> > + .map_queues = rnbd_rdma_map_queues,
> > + .poll = rnbd_rdma_poll,
> > };
> >
> > static int setup_mq_tags(struct rnbd_clt_session *sess)
> > @@ -1181,7 +1226,15 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
> > tag_set->flags = BLK_MQ_F_SHOULD_MERGE |
> > BLK_MQ_F_TAG_QUEUE_SHARED;
> > tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE;
> > - tag_set->nr_hw_queues = num_online_cpus();
> > +
> > + /* for HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL */
> > + tag_set->nr_maps = sess->nr_poll_queues ? HCTX_MAX_TYPES : 2;
> > + /*
> > + * HCTX_TYPE_DEFAULT and HCTX_TYPE_READ share one set of queues
> > + * others are for HCTX_TYPE_POLL
> > + */
> > + tag_set->nr_hw_queues = num_online_cpus() + sess->nr_poll_queues;
> > + tag_set->driver_data = sess;
> >
> > return blk_mq_alloc_tag_set(tag_set);
> > }
> > @@ -1189,7 +1242,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess)
> > static struct rnbd_clt_session *
> > find_and_get_or_create_sess(const char *sessname,
> > const struct rtrs_addr *paths,
> > - size_t path_cnt, u16 port_nr)
> > + size_t path_cnt, u16 port_nr, u32 nr_poll_queues)
> > {
> > struct rnbd_clt_session *sess;
> > struct rtrs_attrs attrs;
> > @@ -1198,6 +1251,17 @@ find_and_get_or_create_sess(const char *sessname,
> > struct rtrs_clt_ops rtrs_ops;
> >
> > sess = find_or_create_sess(sessname, &first);
> > + if (sess == ERR_PTR(-ENOMEM))
> > + return ERR_PTR(-ENOMEM);
> > + else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) {
> > + /*
> > + * A device MUST have its own session to use the polling-mode.
> > + * It must fail to map new device with the same session.
> > + */
> > + err = -EINVAL;
> > + goto put_sess;
> > + }
> > +
> > if (!first)
> > return sess;
> >
> > @@ -1219,7 +1283,7 @@ find_and_get_or_create_sess(const char *sessname,
> > 0, /* Do not use pdu of rtrs */
> > RECONNECT_DELAY, BMAX_SEGMENTS,
> > BLK_MAX_SEGMENT_SIZE,
> > - MAX_RECONNECTS);
> > + MAX_RECONNECTS, nr_poll_queues);
> > if (IS_ERR(sess->rtrs)) {
> > err = PTR_ERR(sess->rtrs);
> > goto wake_up_and_put;
> > @@ -1227,6 +1291,7 @@ find_and_get_or_create_sess(const char *sessname,
> > rtrs_clt_query(sess->rtrs, &attrs);
> > sess->max_io_size = attrs.max_io_size;
> > sess->queue_depth = attrs.queue_depth;
> > + sess->nr_poll_queues = nr_poll_queues;
> >
> > err = setup_mq_tags(sess);
> > if (err)
> > @@ -1370,7 +1435,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
> >
> > static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
> > enum rnbd_access_mode access_mode,
> > - const char *pathname)
> > + const char *pathname,
> > + u32 nr_poll_queues)
> > {
> > struct rnbd_clt_dev *dev;
> > int ret;
> > @@ -1379,7 +1445,8 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
> > if (!dev)
> > return ERR_PTR(-ENOMEM);
> >
> > - dev->hw_queues = kcalloc(nr_cpu_ids, sizeof(*dev->hw_queues),
> > + dev->hw_queues = kcalloc(nr_cpu_ids /* softirq */ + nr_poll_queues /* poll */,
>
> Please don't add comments in the middle of function call.
Ok, I will fix it for next round.
>
> > + sizeof(*dev->hw_queues),
> > GFP_KERNEL);
> > if (!dev->hw_queues) {
> > ret = -ENOMEM;
> > @@ -1405,6 +1472,7 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
> > dev->clt_device_id = ret;
> > dev->sess = sess;
> > dev->access_mode = access_mode;
> > + dev->nr_poll_queues = nr_poll_queues;
> > mutex_init(&dev->lock);
> > refcount_set(&dev->refcount, 1);
> > dev->dev_state = DEV_STATE_INIT;
> > @@ -1491,7 +1559,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
> > struct rtrs_addr *paths,
> > size_t path_cnt, u16 port_nr,
> > const char *pathname,
> > - enum rnbd_access_mode access_mode)
> > + enum rnbd_access_mode access_mode,
> > + u32 nr_poll_queues)
> > {
> > struct rnbd_clt_session *sess;
> > struct rnbd_clt_dev *dev;
> > @@ -1500,11 +1569,11 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
> > if (unlikely(exists_devpath(pathname, sessname)))
> > return ERR_PTR(-EEXIST);
> >
> > - sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr);
> > + sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues);
> > if (IS_ERR(sess))
> > return ERR_CAST(sess);
> >
> > - dev = init_dev(sess, access_mode, pathname);
> > + dev = init_dev(sess, access_mode, pathname, nr_poll_queues);
> > if (IS_ERR(dev)) {
> > pr_err("map_device: failed to map device '%s' from session %s, can't initialize device, err: %ld\n",
> > pathname, sess->sessname, PTR_ERR(dev));
> > diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h
> > index 714d426b449b..451e7383738f 100644
> > --- a/drivers/block/rnbd/rnbd-clt.h
> > +++ b/drivers/block/rnbd/rnbd-clt.h
> > @@ -90,6 +90,7 @@ struct rnbd_clt_session {
> > int queue_depth;
> > u32 max_io_size;
> > struct blk_mq_tag_set tag_set;
> > + u32 nr_poll_queues;
> > struct mutex lock; /* protects state and devs_list */
> > struct list_head devs_list; /* list of struct rnbd_clt_dev */
> > refcount_t refcount;
> > @@ -118,6 +119,7 @@ struct rnbd_clt_dev {
> > enum rnbd_clt_dev_state dev_state;
> > char *pathname;
> > enum rnbd_access_mode access_mode;
> > + u32 nr_poll_queues;
> > bool read_only;
> > bool rotational;
> > bool wc;
> > @@ -147,7 +149,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
> > struct rtrs_addr *paths,
> > size_t path_cnt, u16 port_nr,
> > const char *pathname,
> > - enum rnbd_access_mode access_mode);
> > + enum rnbd_access_mode access_mode,
> > + u32 nr_poll_queues);
> > int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
> > const struct attribute *sysfs_self);
> >
> > diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
> > index 7efd49bdc78c..467d135a82cf 100644
> > --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
> > +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
> > @@ -174,7 +174,7 @@ struct rtrs_clt_con *rtrs_permit_to_clt_con(struct rtrs_clt_sess *sess,
> > int id = 0;
> >
> > if (likely(permit->con_type == RTRS_IO_CON))
> > - id = (permit->cpu_id % (sess->s.con_num - 1)) + 1;
> > + id = (permit->cpu_id % (sess->s.irq_con_num - 1)) + 1;
> >
> > return to_clt_con(sess->s.con[id]);
> > }
> > @@ -1400,23 +1400,29 @@ static void rtrs_clt_close_work(struct work_struct *work);
> > static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
> > const struct rtrs_addr *path,
> > size_t con_num, u16 max_segments,
> > - size_t max_segment_size)
> > + size_t max_segment_size, u32 nr_poll_queues)
> > {
> > struct rtrs_clt_sess *sess;
> > int err = -ENOMEM;
> > int cpu;
> > + size_t total_con;
> >
> > sess = kzalloc(sizeof(*sess), GFP_KERNEL);
> > if (!sess)
> > goto err;
> >
> > - /* Extra connection for user messages */
> > - con_num += 1;
> > -
> > - sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL);
> > + /*
> > + * irqmode and poll
> > + * +1: Extra connection for user messages
> > + */
> > + total_con = con_num + nr_poll_queues + 1;
> > + sess->s.con = kcalloc(total_con, sizeof(*sess->s.con), GFP_KERNEL);
> > if (!sess->s.con)
> > goto err_free_sess;
> >
> > + sess->s.con_num = total_con;
> > + sess->s.irq_con_num = con_num + 1;
> > +
> > sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL);
> > if (!sess->stats)
> > goto err_free_con;
> > @@ -1435,7 +1441,6 @@ static struct rtrs_clt_sess *alloc_sess(struct rtrs_clt *clt,
> > memcpy(&sess->s.src_addr, path->src,
> > rdma_addr_size((struct sockaddr *)path->src));
> > strlcpy(sess->s.sessname, clt->sessname, sizeof(sess->s.sessname));
> > - sess->s.con_num = con_num;
> > sess->clt = clt;
> > sess->max_pages_per_mr = max_segments * max_segment_size >> 12;
> > init_waitqueue_head(&sess->state_wq);
> > @@ -1576,9 +1581,14 @@ static int create_con_cq_qp(struct rtrs_clt_con *con)
> > }
> > cq_size = max_send_wr + max_recv_wr;
> > cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors;
> > - err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
> > - cq_vector, cq_size, max_send_wr,
> > - max_recv_wr, IB_POLL_SOFTIRQ);
> > + if (con->c.cid >= sess->s.irq_con_num)
> > + err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
> > + cq_vector, cq_size, max_send_wr,
> > + max_recv_wr, IB_POLL_DIRECT);
> > + else
> > + err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge,
> > + cq_vector, cq_size, max_send_wr,
> > + max_recv_wr, IB_POLL_SOFTIRQ);
> > /*
> > * In case of error we do not bother to clean previous allocations,
> > * since destroy_con_cq_qp() must be called.
> > @@ -2631,6 +2641,7 @@ static void free_clt(struct rtrs_clt *clt)
> > * @max_segment_size: Max. size of one segment
> > * @max_reconnect_attempts: Number of times to reconnect on error before giving
> > * up, 0 for * disabled, -1 for forever
> > + * @nr_poll_queues: number of polling mode connection using IB_POLL_DIRECT flag
> > *
> > * Starts session establishment with the rtrs_server. The function can block
> > * up to ~2000ms before it returns.
> > @@ -2644,7 +2655,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
> > size_t pdu_sz, u8 reconnect_delay_sec,
> > u16 max_segments,
> > size_t max_segment_size,
> > - s16 max_reconnect_attempts)
> > + s16 max_reconnect_attempts, u32 nr_poll_queues)
> > {
> > struct rtrs_clt_sess *sess, *tmp;
> > struct rtrs_clt *clt;
> > @@ -2662,7 +2673,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
> > struct rtrs_clt_sess *sess;
> >
> > sess = alloc_sess(clt, &paths[i], nr_cpu_ids,
> > - max_segments, max_segment_size);
> > + max_segments, max_segment_size, nr_poll_queues);
> > if (IS_ERR(sess)) {
> > err = PTR_ERR(sess);
> > goto close_all_sess;
> > @@ -2887,6 +2898,31 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
> > }
> > EXPORT_SYMBOL(rtrs_clt_request);
> >
> > +int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
> > +{
> > + int cnt;
> > + struct rtrs_con *con;
> > + struct rtrs_clt_sess *sess;
> > + struct path_it it;
> > +
> > + rcu_read_lock();
> > + for (path_it_init(&it, clt);
> > + (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
> > + if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED))
>
> We talked about useless likely/unlikely in your workloads.
Right, I've made a patch to remove all likely/unlikely
and will send with the next patch set.
I thought it could be better for review to keep the patches
in the patch set. So if this set is applied, I will send a small patch set
to remove likely/unlikely and do some cleanup.
>
> > + continue;
> > +
> > + con = sess->s.con[index + 1];
> > + cnt = ib_process_cq_direct(con->cq, -1);
> > + if (likely(cnt))
> > + break;
> > + }
> > + path_it_deinit(&it);
> > + rcu_read_unlock();
> > +
> > + return cnt;
> > +}
> > +EXPORT_SYMBOL(rtrs_clt_rdma_cq_direct);
> > +
> > /**
> > * rtrs_clt_query() - queries RTRS session attributes
> > *@clt: session pointer
> > @@ -2916,7 +2952,7 @@ int rtrs_clt_create_path_from_sysfs(struct rtrs_clt *clt,
> > int err;
> >
> > sess = alloc_sess(clt, addr, nr_cpu_ids, clt->max_segments,
> > - clt->max_segment_size);
> > + clt->max_segment_size, 0);
> > if (IS_ERR(sess))
> > return PTR_ERR(sess);
> >
> > diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
> > index 8caad0a2322b..00eb45053339 100644
> > --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
> > +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
> > @@ -101,6 +101,7 @@ struct rtrs_sess {
> > uuid_t uuid;
> > struct rtrs_con **con;
> > unsigned int con_num;
> > + unsigned int irq_con_num;
> > unsigned int recon_cnt;
> > struct rtrs_ib_dev *dev;
> > int dev_ref;
> > diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h
> > index 2db1b5eb3ab0..f891fbe7abe6 100644
> > --- a/drivers/infiniband/ulp/rtrs/rtrs.h
> > +++ b/drivers/infiniband/ulp/rtrs/rtrs.h
> > @@ -59,7 +59,7 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops,
> > size_t pdu_sz, u8 reconnect_delay_sec,
> > u16 max_segments,
> > size_t max_segment_size,
> > - s16 max_reconnect_attempts);
> > + s16 max_reconnect_attempts, u32 nr_poll_queues);
> >
> > void rtrs_clt_close(struct rtrs_clt *sess);
> >
> > @@ -103,6 +103,7 @@ int rtrs_clt_request(int dir, struct rtrs_clt_req_ops *ops,
> > struct rtrs_clt *sess, struct rtrs_permit *permit,
> > const struct kvec *vec, size_t nr, size_t len,
> > struct scatterlist *sg, unsigned int sg_cnt);
> > +int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index);
> >
> > /**
> > * rtrs_attrs - RTRS session attributes
> > --
> > 2.25.1
> >
Thank you for the review.
I will send V5 soon.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCHv4 for-next 13/19] block/rnbd-clt: Support polling mode for IO latency optimization
2021-04-19 5:12 ` Gioh Kim
@ 2021-04-19 5:20 ` Leon Romanovsky
2021-04-19 5:51 ` Gioh Kim
0 siblings, 1 reply; 10+ messages in thread
From: Leon Romanovsky @ 2021-04-19 5:20 UTC (permalink / raw)
To: Gioh Kim
Cc: linux-block, Jens Axboe, hch, sagi, Bart Van Assche, Haris Iqbal,
Jinpu Wang, Gioh Kim, linux-rdma, Jason Gunthorpe
On Mon, Apr 19, 2021 at 07:12:09AM +0200, Gioh Kim wrote:
> On Sun, Apr 18, 2021 at 10:36 AM Leon Romanovsky <leon@kernel.org> wrote:
> >
> > On Wed, Apr 14, 2021 at 02:23:56PM +0200, Gioh Kim wrote:
> > > From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
> > >
> > > RNBD can make double-queues for irq-mode and poll-mode.
> > > For example, on 4-CPU system 8 request-queues are created,
> > > 4 for irq-mode and 4 for poll-mode.
> > > If the IO has HIPRI flag, the block-layer will call .poll function
> > > of RNBD. Then IO is sent to the poll-mode queue.
> > > Add optional nr_poll_queues argument for map_devices interface.
> > >
> > > To support polling of RNBD, RTRS client creates connections
> > > for both of irq-mode and direct-poll-mode.
> > >
> > > For example, on 4-CPU system it could've create 5 connections:
> > > con[0] => user message (softirq cq)
> > > con[1:4] => softirq cq
> > >
> > > After this patch, it can create 9 connections:
> > > con[0] => user message (softirq cq)
> > > con[1:4] => softirq cq
> > > con[5:8] => DIRECT-POLL cq
<...>
> > > +int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
> > > +{
> > > + int cnt;
> > > + struct rtrs_con *con;
> > > + struct rtrs_clt_sess *sess;
> > > + struct path_it it;
> > > +
> > > + rcu_read_lock();
> > > + for (path_it_init(&it, clt);
> > > + (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
> > > + if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED))
> >
> > We talked about useless likely/unlikely in your workloads.
>
> Right, I've made a patch to remove all likely/unlikely
> and will send with the next patch set.
This specific line is "brand new". We don't add code that will be
removed in next patch.
Thanks
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCHv4 for-next 13/19] block/rnbd-clt: Support polling mode for IO latency optimization
2021-04-19 5:20 ` Leon Romanovsky
@ 2021-04-19 5:51 ` Gioh Kim
2021-04-19 6:09 ` Leon Romanovsky
0 siblings, 1 reply; 10+ messages in thread
From: Gioh Kim @ 2021-04-19 5:51 UTC (permalink / raw)
To: Leon Romanovsky
Cc: linux-block, Jens Axboe, hch, sagi, Bart Van Assche, Haris Iqbal,
Jinpu Wang, Gioh Kim, linux-rdma, Jason Gunthorpe
On Mon, Apr 19, 2021 at 7:46 AM Leon Romanovsky <leon@kernel.org> wrote:
>
> On Mon, Apr 19, 2021 at 07:12:09AM +0200, Gioh Kim wrote:
> > On Sun, Apr 18, 2021 at 10:36 AM Leon Romanovsky <leon@kernel.org> wrote:
> > >
> > > On Wed, Apr 14, 2021 at 02:23:56PM +0200, Gioh Kim wrote:
> > > > From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
> > > >
> > > > RNBD can make double-queues for irq-mode and poll-mode.
> > > > For example, on 4-CPU system 8 request-queues are created,
> > > > 4 for irq-mode and 4 for poll-mode.
> > > > If the IO has HIPRI flag, the block-layer will call .poll function
> > > > of RNBD. Then IO is sent to the poll-mode queue.
> > > > Add optional nr_poll_queues argument for map_devices interface.
> > > >
> > > > To support polling of RNBD, RTRS client creates connections
> > > > for both of irq-mode and direct-poll-mode.
> > > >
> > > > For example, on 4-CPU system it could've create 5 connections:
> > > > con[0] => user message (softirq cq)
> > > > con[1:4] => softirq cq
> > > >
> > > > After this patch, it can create 9 connections:
> > > > con[0] => user message (softirq cq)
> > > > con[1:4] => softirq cq
> > > > con[5:8] => DIRECT-POLL cq
>
> <...>
I am sorry that I don't understand exactly.
Do I need to change them to "con<5..8>"?
>
> > > > +int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
> > > > +{
> > > > + int cnt;
> > > > + struct rtrs_con *con;
> > > > + struct rtrs_clt_sess *sess;
> > > > + struct path_it it;
> > > > +
> > > > + rcu_read_lock();
> > > > + for (path_it_init(&it, clt);
> > > > + (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
> > > > + if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED))
> > >
> > > We talked about useless likely/unlikely in your workloads.
> >
> > Right, I've made a patch to remove all likely/unlikely
> > and will send with the next patch set.
>
> This specific line is "brand new". We don't add code that will be
> removed in next patch.
Ah, ok. So you mean,
1. remove unlikely from that line
2. send a patch to remove all likely/unlikely for next round
Am I right?
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCHv4 for-next 13/19] block/rnbd-clt: Support polling mode for IO latency optimization
2021-04-19 5:51 ` Gioh Kim
@ 2021-04-19 6:09 ` Leon Romanovsky
2021-04-19 6:15 ` Gioh Kim
0 siblings, 1 reply; 10+ messages in thread
From: Leon Romanovsky @ 2021-04-19 6:09 UTC (permalink / raw)
To: Gioh Kim
Cc: linux-block, Jens Axboe, hch, sagi, Bart Van Assche, Haris Iqbal,
Jinpu Wang, Gioh Kim, linux-rdma, Jason Gunthorpe
On Mon, Apr 19, 2021 at 07:51:34AM +0200, Gioh Kim wrote:
> On Mon, Apr 19, 2021 at 7:46 AM Leon Romanovsky <leon@kernel.org> wrote:
> >
> > On Mon, Apr 19, 2021 at 07:12:09AM +0200, Gioh Kim wrote:
> > > On Sun, Apr 18, 2021 at 10:36 AM Leon Romanovsky <leon@kernel.org> wrote:
> > > >
> > > > On Wed, Apr 14, 2021 at 02:23:56PM +0200, Gioh Kim wrote:
> > > > > From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
> > > > >
> > > > > RNBD can make double-queues for irq-mode and poll-mode.
> > > > > For example, on 4-CPU system 8 request-queues are created,
> > > > > 4 for irq-mode and 4 for poll-mode.
> > > > > If the IO has HIPRI flag, the block-layer will call .poll function
> > > > > of RNBD. Then IO is sent to the poll-mode queue.
> > > > > Add optional nr_poll_queues argument for map_devices interface.
> > > > >
> > > > > To support polling of RNBD, RTRS client creates connections
> > > > > for both of irq-mode and direct-poll-mode.
> > > > >
> > > > > For example, on 4-CPU system it could've create 5 connections:
> > > > > con[0] => user message (softirq cq)
> > > > > con[1:4] => softirq cq
> > > > >
> > > > > After this patch, it can create 9 connections:
> > > > > con[0] => user message (softirq cq)
> > > > > con[1:4] => softirq cq
> > > > > con[5:8] => DIRECT-POLL cq
> >
> > <...>
>
> I am sorry that I don't understand exactly.
> Do I need to change them to "con<5..8>"?
No, I just removed not relevant text and replaced it with <...> in
automatic way :).
>
>
> >
> > > > > +int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
> > > > > +{
> > > > > + int cnt;
> > > > > + struct rtrs_con *con;
> > > > > + struct rtrs_clt_sess *sess;
> > > > > + struct path_it it;
> > > > > +
> > > > > + rcu_read_lock();
> > > > > + for (path_it_init(&it, clt);
> > > > > + (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
> > > > > + if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED))
> > > >
> > > > We talked about useless likely/unlikely in your workloads.
> > >
> > > Right, I've made a patch to remove all likely/unlikely
> > > and will send with the next patch set.
> >
> > This specific line is "brand new". We don't add code that will be
> > removed in next patch.
>
> Ah, ok. So you mean,
> 1. remove unlikely from that line
> 2. send a patch to remove all likely/unlikely for next round
>
> Am I right?
Right
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCHv4 for-next 13/19] block/rnbd-clt: Support polling mode for IO latency optimization
2021-04-19 6:09 ` Leon Romanovsky
@ 2021-04-19 6:15 ` Gioh Kim
0 siblings, 0 replies; 10+ messages in thread
From: Gioh Kim @ 2021-04-19 6:15 UTC (permalink / raw)
To: Leon Romanovsky
Cc: linux-block, Jens Axboe, hch, sagi, Bart Van Assche, Haris Iqbal,
Jinpu Wang, Gioh Kim, linux-rdma, Jason Gunthorpe
On Mon, Apr 19, 2021 at 8:09 AM Leon Romanovsky <leon@kernel.org> wrote:
>
> On Mon, Apr 19, 2021 at 07:51:34AM +0200, Gioh Kim wrote:
> > On Mon, Apr 19, 2021 at 7:46 AM Leon Romanovsky <leon@kernel.org> wrote:
> > >
> > > On Mon, Apr 19, 2021 at 07:12:09AM +0200, Gioh Kim wrote:
> > > > On Sun, Apr 18, 2021 at 10:36 AM Leon Romanovsky <leon@kernel.org> wrote:
> > > > >
> > > > > On Wed, Apr 14, 2021 at 02:23:56PM +0200, Gioh Kim wrote:
> > > > > > From: Gioh Kim <gi-oh.kim@cloud.ionos.com>
> > > > > >
> > > > > > RNBD can make double-queues for irq-mode and poll-mode.
> > > > > > For example, on 4-CPU system 8 request-queues are created,
> > > > > > 4 for irq-mode and 4 for poll-mode.
> > > > > > If the IO has HIPRI flag, the block-layer will call .poll function
> > > > > > of RNBD. Then IO is sent to the poll-mode queue.
> > > > > > Add optional nr_poll_queues argument for map_devices interface.
> > > > > >
> > > > > > To support polling of RNBD, RTRS client creates connections
> > > > > > for both of irq-mode and direct-poll-mode.
> > > > > >
> > > > > > For example, on 4-CPU system it could've create 5 connections:
> > > > > > con[0] => user message (softirq cq)
> > > > > > con[1:4] => softirq cq
> > > > > >
> > > > > > After this patch, it can create 9 connections:
> > > > > > con[0] => user message (softirq cq)
> > > > > > con[1:4] => softirq cq
> > > > > > con[5:8] => DIRECT-POLL cq
> > >
> > > <...>
> >
> > I am sorry that I don't understand exactly.
> > Do I need to change them to "con<5..8>"?
>
> No, I just removed not relevant text and replaced it with <...> in
> automatic way :).
Oh ;-)
>
> >
> >
> > >
> > > > > > +int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
> > > > > > +{
> > > > > > + int cnt;
> > > > > > + struct rtrs_con *con;
> > > > > > + struct rtrs_clt_sess *sess;
> > > > > > + struct path_it it;
> > > > > > +
> > > > > > + rcu_read_lock();
> > > > > > + for (path_it_init(&it, clt);
> > > > > > + (sess = it.next_path(&it)) && it.i < it.clt->paths_num; it.i++) {
> > > > > > + if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED))
> > > > >
> > > > > We talked about useless likely/unlikely in your workloads.
> > > >
> > > > Right, I've made a patch to remove all likely/unlikely
> > > > and will send with the next patch set.
> > >
> > > This specific line is "brand new". We don't add code that will be
> > > removed in next patch.
> >
> > Ah, ok. So you mean,
> > 1. remove unlikely from that line
> > 2. send a patch to remove all likely/unlikely for next round
> >
> > Am I right?
>
> Right
Thank you very much.
I will send V5 soon.
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2021-04-19 6:16 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20210414122402.203388-1-gi-oh.kim@ionos.com>
2021-04-14 12:23 ` [PATCHv4 for-next 08/19] block/rnbd-clt: Replace {NO_WAIT,WAIT} with RTRS_PERMIT_{WAIT,NOWAIT} Gioh Kim
2021-04-14 12:23 ` [PATCHv4 for-next 13/19] block/rnbd-clt: Support polling mode for IO latency optimization Gioh Kim
2021-04-18 8:36 ` Leon Romanovsky
2021-04-19 5:12 ` Gioh Kim
2021-04-19 5:20 ` Leon Romanovsky
2021-04-19 5:51 ` Gioh Kim
2021-04-19 6:09 ` Leon Romanovsky
2021-04-19 6:15 ` Gioh Kim
2021-04-14 12:23 ` [PATCHv4 for-next 15/19] block/rnbd-srv: Remove unused arguments of rnbd_srv_rdma_ev Gioh Kim
2021-04-14 12:24 ` [PATCHv4 for-next 17/19] block/rnbd-clt: Remove max_segment_size Gioh Kim
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).