All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited
@ 2019-05-29  5:49 Sagi Grimberg
  2019-05-29  5:49 ` [PATCH v2 2/2] nvme-tcp: " Sagi Grimberg
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Sagi Grimberg @ 2019-05-29  5:49 UTC (permalink / raw)


When the controller supports less queues than requested, we
should make sure that queue mapping does the right thing and
not assume that all queues are available. This fixes a crash
when the controller supports less queues than requested.

The rules are:
1. if no write/poll queues are requested, we assign the available queues
   to the default queue map. The default and read queue maps share the
   existing queues.
2. if write queues are requested:
  - first make sure that read queue map gets the requested
    nr_io_queues count
  - then grant the default queue map the minimum between the requested
    nr_write_queues and the remaining queues. If there are no available
    queues to dedicate to the default queue map, fallback to (1) and
    share all the queues in the existing queue map.
3. if poll queues are requested:
  - map the remaining queues to the poll queue map.

Also, provide a log indication on how we constructed the different
queue maps.

Reported-by: Harris, James R <james.r.harris at intel.com>
Cc: <stable at vger.kernel.org> # v5.0+
Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
Changes from v1:
- keep the num_comp_vectors upper limit on number of io queues
- keep the queue sets sizes in local variables for readability

 drivers/nvme/host/rdma.c | 99 +++++++++++++++++++++++++---------------
 1 file changed, 61 insertions(+), 38 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f383146e7d0f..26709a2ab593 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -641,34 +641,16 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
 {
 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
 	struct ib_device *ibdev = ctrl->device->dev;
-	unsigned int nr_io_queues;
+	unsigned int nr_io_queues, nr_default_queues;
+	unsigned int nr_read_queues, nr_poll_queues;
 	int i, ret;
 
-	nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
-
-	/*
-	 * we map queues according to the device irq vectors for
-	 * optimal locality so we don't need more queues than
-	 * completion vectors.
-	 */
-	nr_io_queues = min_t(unsigned int, nr_io_queues,
-				ibdev->num_comp_vectors);
-
-	if (opts->nr_write_queues) {
-		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
-				min(opts->nr_write_queues, nr_io_queues);
-		nr_io_queues += ctrl->io_queues[HCTX_TYPE_DEFAULT];
-	} else {
-		ctrl->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues;
-	}
-
-	ctrl->io_queues[HCTX_TYPE_READ] = nr_io_queues;
-
-	if (opts->nr_poll_queues) {
-		ctrl->io_queues[HCTX_TYPE_POLL] =
-			min(opts->nr_poll_queues, num_online_cpus());
-		nr_io_queues += ctrl->io_queues[HCTX_TYPE_POLL];
-	}
+	nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors,
+				min(opts->nr_io_queues, num_online_cpus()));
+	nr_default_queues =  min_t(unsigned int, ibdev->num_comp_vectors,
+				min(opts->nr_write_queues, num_online_cpus()));
+	nr_poll_queues = min(opts->nr_poll_queues, num_online_cpus());
+	nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues;
 
 	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
 	if (ret)
@@ -681,6 +663,34 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
 	dev_info(ctrl->ctrl.device,
 		"creating %d I/O queues.\n", nr_io_queues);
 
+	if (opts->nr_write_queues && nr_read_queues < nr_io_queues) {
+		/*
+		 * separate read/write queues
+		 * hand out dedicated default queues only after we have
+		 * sufficient read queues.
+		 */
+		ctrl->io_queues[HCTX_TYPE_READ] = nr_read_queues;
+		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
+		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
+			min(nr_default_queues, nr_io_queues);
+		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
+	} else {
+		/*
+		 * shared read/write queues
+		 * either no write queues were requested, or we don't have
+		 * sufficient queue count to have dedicated default queues.
+		 */
+		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
+			min(nr_read_queues, nr_io_queues);
+		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
+	}
+
+	if (opts->nr_poll_queues && nr_io_queues) {
+		/* map dedicated poll queues only if we have queues left */
+		ctrl->io_queues[HCTX_TYPE_POLL] =
+			min(nr_poll_queues, nr_io_queues);
+	}
+
 	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
 		ret = nvme_rdma_alloc_queue(ctrl, i,
 				ctrl->ctrl.sqsize + 1);
@@ -1763,17 +1773,24 @@ static void nvme_rdma_complete_rq(struct request *rq)
 static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
 {
 	struct nvme_rdma_ctrl *ctrl = set->driver_data;
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
 
-	set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
-	set->map[HCTX_TYPE_DEFAULT].nr_queues =
-			ctrl->io_queues[HCTX_TYPE_DEFAULT];
-	set->map[HCTX_TYPE_READ].nr_queues = ctrl->io_queues[HCTX_TYPE_READ];
-	if (ctrl->ctrl.opts->nr_write_queues) {
+	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
 		/* separate read/write queues */
+		set->map[HCTX_TYPE_DEFAULT].nr_queues =
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
+		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+		set->map[HCTX_TYPE_READ].nr_queues =
+			ctrl->io_queues[HCTX_TYPE_READ];
 		set->map[HCTX_TYPE_READ].queue_offset =
-				ctrl->io_queues[HCTX_TYPE_DEFAULT];
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
 	} else {
-		/* mixed read/write queues */
+		/* shared read/write queues */
+		set->map[HCTX_TYPE_DEFAULT].nr_queues =
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
+		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+		set->map[HCTX_TYPE_READ].nr_queues =
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
 		set->map[HCTX_TYPE_READ].queue_offset = 0;
 	}
 	blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
@@ -1781,16 +1798,22 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
 	blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
 			ctrl->device->dev, 0);
 
-	if (ctrl->ctrl.opts->nr_poll_queues) {
+	if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
+		/* map dedicated poll queues only if we have queues left */
 		set->map[HCTX_TYPE_POLL].nr_queues =
 				ctrl->io_queues[HCTX_TYPE_POLL];
 		set->map[HCTX_TYPE_POLL].queue_offset =
-				ctrl->io_queues[HCTX_TYPE_DEFAULT];
-		if (ctrl->ctrl.opts->nr_write_queues)
-			set->map[HCTX_TYPE_POLL].queue_offset +=
-				ctrl->io_queues[HCTX_TYPE_READ];
+			ctrl->io_queues[HCTX_TYPE_DEFAULT] +
+			ctrl->io_queues[HCTX_TYPE_READ];
 		blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
 	}
+
+	dev_info(ctrl->ctrl.device,
+		"mapped %d/%d/%d default/read/poll queues.\n",
+		ctrl->io_queues[HCTX_TYPE_DEFAULT],
+		ctrl->io_queues[HCTX_TYPE_READ],
+		ctrl->io_queues[HCTX_TYPE_POLL]);
+
 	return 0;
 }
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 2/2] nvme-tcp: fix queue mapping when queue count is limited
  2019-05-29  5:49 [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited Sagi Grimberg
@ 2019-05-29  5:49 ` Sagi Grimberg
  2019-05-29 15:55   ` Harris, James R
  2019-06-01  8:58   ` Christoph Hellwig
  2019-05-29 15:50 ` [PATCH v2 1/2] nvme-rdma: " Max Gurtovoy
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 11+ messages in thread
From: Sagi Grimberg @ 2019-05-29  5:49 UTC (permalink / raw)


When the controller supports less queues than requested, we
should make sure that queue mapping does the right thing and
not assume that all queues are available. This fixes a crash
when the controller supports less queues than requested.

The rules are:
1. if no write queues are requested, we assign the available queues
   to the default queue map. The default and read queue maps share the
   existing queues.
2. if write queues are requested:
  - first make sure that read queue map gets the requested
    nr_io_queues count
  - then grant the default queue map the minimum between the requested
    nr_write_queues and the remaining queues. If there are no available
    queues to dedicate to the default queue map, fallback to (1) and
    share all the queues in the existing queue map.

Also, provide a log indication on how we constructed the different
queue maps.

Reported-by: Harris, James R <james.r.harris at intel.com>
Cc: <stable at vger.kernel.org> # v5.0+
Suggested-by: Roy Shterman <roys at lightbitslabs.com>
Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
 drivers/nvme/host/tcp.c | 57 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 2b107a1d152b..08a2501b9357 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -111,6 +111,7 @@ struct nvme_tcp_ctrl {
 	struct work_struct	err_work;
 	struct delayed_work	connect_work;
 	struct nvme_tcp_request async_req;
+	u32			io_queues[HCTX_MAX_TYPES];
 };
 
 static LIST_HEAD(nvme_tcp_ctrl_list);
@@ -1564,6 +1565,35 @@ static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
 	return nr_io_queues;
 }
 
+static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
+		unsigned int nr_io_queues)
+{
+	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+	struct nvmf_ctrl_options *opts = nctrl->opts;
+
+	if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
+		/*
+		 * separate read/write queues
+		 * hand out dedicated default queues only after we have
+		 * sufficient read queues.
+		 */
+		ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
+		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
+		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
+			min(opts->nr_write_queues, nr_io_queues);
+		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
+	} else {
+		/*
+		 * shared read/write queues
+		 * either no write queues were requested, or we don't have
+		 * sufficient queue count to have dedicated default queues.
+		 */
+		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
+			min(opts->nr_io_queues, nr_io_queues);
+		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
+	}
+}
+
 static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
 {
 	unsigned int nr_io_queues;
@@ -1581,6 +1611,8 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
 	dev_info(ctrl->device,
 		"creating %d I/O queues.\n", nr_io_queues);
 
+	nvme_tcp_set_io_queues(ctrl, nr_io_queues);
+
 	return __nvme_tcp_alloc_io_queues(ctrl);
 }
 
@@ -2089,23 +2121,34 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
 static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
 {
 	struct nvme_tcp_ctrl *ctrl = set->driver_data;
+	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
 
-	set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
-	set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues;
-	if (ctrl->ctrl.opts->nr_write_queues) {
+	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
 		/* separate read/write queues */
 		set->map[HCTX_TYPE_DEFAULT].nr_queues =
-				ctrl->ctrl.opts->nr_write_queues;
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
+		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+		set->map[HCTX_TYPE_READ].nr_queues =
+			ctrl->io_queues[HCTX_TYPE_READ];
 		set->map[HCTX_TYPE_READ].queue_offset =
-				ctrl->ctrl.opts->nr_write_queues;
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
 	} else {
-		/* mixed read/write queues */
+		/* shared read/write queues */
 		set->map[HCTX_TYPE_DEFAULT].nr_queues =
-				ctrl->ctrl.opts->nr_io_queues;
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
+		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+		set->map[HCTX_TYPE_READ].nr_queues =
+			ctrl->io_queues[HCTX_TYPE_DEFAULT];
 		set->map[HCTX_TYPE_READ].queue_offset = 0;
 	}
 	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
 	blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
+
+	dev_info(ctrl->ctrl.device,
+		"mapped %d/%d default/read queues.\n",
+		ctrl->io_queues[HCTX_TYPE_DEFAULT],
+		ctrl->io_queues[HCTX_TYPE_READ]);
+
 	return 0;
 }
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited
  2019-05-29  5:49 [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited Sagi Grimberg
  2019-05-29  5:49 ` [PATCH v2 2/2] nvme-tcp: " Sagi Grimberg
@ 2019-05-29 15:50 ` Max Gurtovoy
  2019-05-29 17:58   ` Sagi Grimberg
  2019-05-29 15:55 ` Harris, James R
  2019-06-01  8:59 ` Christoph Hellwig
  3 siblings, 1 reply; 11+ messages in thread
From: Max Gurtovoy @ 2019-05-29 15:50 UTC (permalink / raw)



On 5/29/2019 8:49 AM, Sagi Grimberg wrote:
> When the controller supports less queues than requested, we
> should make sure that queue mapping does the right thing and
> not assume that all queues are available. This fixes a crash
> when the controller supports less queues than requested.
>
> The rules are:
> 1. if no write/poll queues are requested, we assign the available queues
>     to the default queue map. The default and read queue maps share the
>     existing queues.
> 2. if write queues are requested:
>    - first make sure that read queue map gets the requested
>      nr_io_queues count
>    - then grant the default queue map the minimum between the requested
>      nr_write_queues and the remaining queues. If there are no available
>      queues to dedicate to the default queue map, fallback to (1) and
>      share all the queues in the existing queue map.
> 3. if poll queues are requested:
>    - map the remaining queues to the poll queue map.
>
> Also, provide a log indication on how we constructed the different
> queue maps.
>
> Reported-by: Harris, James R <james.r.harris at intel.com>
> Cc: <stable at vger.kernel.org> # v5.0+
> Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
> ---
> Changes from v1:
> - keep the num_comp_vectors upper limit on number of io queues
> - keep the queue sets sizes in local variables for readability
>
>   drivers/nvme/host/rdma.c | 99 +++++++++++++++++++++++++---------------
>   1 file changed, 61 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index f383146e7d0f..26709a2ab593 100644
> --- a/drivers/nvme/host/rdma.c
> +++ b/drivers/nvme/host/rdma.c
> @@ -641,34 +641,16 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
>   {
>   	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
>   	struct ib_device *ibdev = ctrl->device->dev;
> -	unsigned int nr_io_queues;
> +	unsigned int nr_io_queues, nr_default_queues;
> +	unsigned int nr_read_queues, nr_poll_queues;
>   	int i, ret;
>   
> -	nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
> -
> -	/*
> -	 * we map queues according to the device irq vectors for
> -	 * optimal locality so we don't need more queues than
> -	 * completion vectors.
> -	 */
> -	nr_io_queues = min_t(unsigned int, nr_io_queues,
> -				ibdev->num_comp_vectors);
> -
> -	if (opts->nr_write_queues) {
> -		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
> -				min(opts->nr_write_queues, nr_io_queues);
> -		nr_io_queues += ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -	} else {
> -		ctrl->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues;
> -	}
> -
> -	ctrl->io_queues[HCTX_TYPE_READ] = nr_io_queues;
> -
> -	if (opts->nr_poll_queues) {
> -		ctrl->io_queues[HCTX_TYPE_POLL] =
> -			min(opts->nr_poll_queues, num_online_cpus());
> -		nr_io_queues += ctrl->io_queues[HCTX_TYPE_POLL];
> -	}
> +	nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors,
> +				min(opts->nr_io_queues, num_online_cpus()));
> +	nr_default_queues =  min_t(unsigned int, ibdev->num_comp_vectors,
> +				min(opts->nr_write_queues, num_online_cpus()));
> +	nr_poll_queues = min(opts->nr_poll_queues, num_online_cpus());
> +	nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues;
>   
>   	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
>   	if (ret)
> @@ -681,6 +663,34 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
>   	dev_info(ctrl->ctrl.device,
>   		"creating %d I/O queues.\n", nr_io_queues);
>   
> +	if (opts->nr_write_queues && nr_read_queues < nr_io_queues) {
> +		/*
> +		 * separate read/write queues
> +		 * hand out dedicated default queues only after we have
> +		 * sufficient read queues.
> +		 */
> +		ctrl->io_queues[HCTX_TYPE_READ] = nr_read_queues;
> +		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
> +		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
> +			min(nr_default_queues, nr_io_queues);
> +		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
> +	} else {
> +		/*
> +		 * shared read/write queues
> +		 * either no write queues were requested, or we don't have
> +		 * sufficient queue count to have dedicated default queues.
> +		 */
> +		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
> +			min(nr_read_queues, nr_io_queues);
> +		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
> +	}
> +
> +	if (opts->nr_poll_queues && nr_io_queues) {
> +		/* map dedicated poll queues only if we have queues left */
> +		ctrl->io_queues[HCTX_TYPE_POLL] =
> +			min(nr_poll_queues, nr_io_queues);
> +	}
> +
>   	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
>   		ret = nvme_rdma_alloc_queue(ctrl, i,
>   				ctrl->ctrl.sqsize + 1);
> @@ -1763,17 +1773,24 @@ static void nvme_rdma_complete_rq(struct request *rq)
>   static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
>   {
>   	struct nvme_rdma_ctrl *ctrl = set->driver_data;
> +	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
>   
> -	set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> -	set->map[HCTX_TYPE_DEFAULT].nr_queues =
> -			ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -	set->map[HCTX_TYPE_READ].nr_queues = ctrl->io_queues[HCTX_TYPE_READ];
> -	if (ctrl->ctrl.opts->nr_write_queues) {
> +	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
>   		/* separate read/write queues */
> +		set->map[HCTX_TYPE_DEFAULT].nr_queues =
> +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
> +		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> +		set->map[HCTX_TYPE_READ].nr_queues =
> +			ctrl->io_queues[HCTX_TYPE_READ];
>   		set->map[HCTX_TYPE_READ].queue_offset =
> -				ctrl->io_queues[HCTX_TYPE_DEFAULT];
> +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
>   	} else {
> -		/* mixed read/write queues */
> +		/* shared read/write queues */
> +		set->map[HCTX_TYPE_DEFAULT].nr_queues =
> +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
> +		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
> +		set->map[HCTX_TYPE_READ].nr_queues =
> +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
>   		set->map[HCTX_TYPE_READ].queue_offset = 0;
>   	}
>   	blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
> @@ -1781,16 +1798,22 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
>   	blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
>   			ctrl->device->dev, 0);
>   
> -	if (ctrl->ctrl.opts->nr_poll_queues) {
> +	if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
> +		/* map dedicated poll queues only if we have queues left */
>   		set->map[HCTX_TYPE_POLL].nr_queues =
>   				ctrl->io_queues[HCTX_TYPE_POLL];
>   		set->map[HCTX_TYPE_POLL].queue_offset =
> -				ctrl->io_queues[HCTX_TYPE_DEFAULT];
> -		if (ctrl->ctrl.opts->nr_write_queues)
> -			set->map[HCTX_TYPE_POLL].queue_offset +=
> -				ctrl->io_queues[HCTX_TYPE_READ];
> +			ctrl->io_queues[HCTX_TYPE_DEFAULT] +
> +			ctrl->io_queues[HCTX_TYPE_READ];

in case of shared queues? ctrl->io_queues[HCTX_TYPE_READ] != 
set->map[HCTX_TYPE_READ].nr_queues.

Shouldn't we jump over (the queue_offset) the set->map values ?

>   		blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
>   	}
> +
> +	dev_info(ctrl->ctrl.device,
> +		"mapped %d/%d/%d default/read/poll queues.\n",
> +		ctrl->io_queues[HCTX_TYPE_DEFAULT],
> +		ctrl->io_queues[HCTX_TYPE_READ],
> +		ctrl->io_queues[HCTX_TYPE_POLL]);
> +
>   	return 0;
>   }
>   

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited
  2019-05-29  5:49 [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited Sagi Grimberg
  2019-05-29  5:49 ` [PATCH v2 2/2] nvme-tcp: " Sagi Grimberg
  2019-05-29 15:50 ` [PATCH v2 1/2] nvme-rdma: " Max Gurtovoy
@ 2019-05-29 15:55 ` Harris, James R
  2019-06-01  8:59 ` Christoph Hellwig
  3 siblings, 0 replies; 11+ messages in thread
From: Harris, James R @ 2019-05-29 15:55 UTC (permalink / raw)




?On 5/28/19, 10:49 PM, "Sagi Grimberg" <sagi@grimberg.me> wrote:

    When the controller supports less queues than requested, we
    should make sure that queue mapping does the right thing and
    not assume that all queues are available. This fixes a crash
    when the controller supports less queues than requested.
    
    The rules are:
    1. if no write/poll queues are requested, we assign the available queues
       to the default queue map. The default and read queue maps share the
       existing queues.
    2. if write queues are requested:
      - first make sure that read queue map gets the requested
        nr_io_queues count
      - then grant the default queue map the minimum between the requested
        nr_write_queues and the remaining queues. If there are no available
        queues to dedicate to the default queue map, fallback to (1) and
        share all the queues in the existing queue map.
    3. if poll queues are requested:
      - map the remaining queues to the poll queue map.
    
    Also, provide a log indication on how we constructed the different
    queue maps.
    
    Reported-by: Harris, James R <james.r.harris at intel.com>
    Cc: <stable at vger.kernel.org> # v5.0+
    Signed-off-by: Sagi Grimberg <sagi at grimberg.me>

v2 looks good.

Tested-by: Jim Harris <james.r.harris at intel.com>

    ---
    Changes from v1:
    - keep the num_comp_vectors upper limit on number of io queues
    - keep the queue sets sizes in local variables for readability
    
     drivers/nvme/host/rdma.c | 99 +++++++++++++++++++++++++---------------
     1 file changed, 61 insertions(+), 38 deletions(-)
    
    diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
    index f383146e7d0f..26709a2ab593 100644
    --- a/drivers/nvme/host/rdma.c
    +++ b/drivers/nvme/host/rdma.c
    @@ -641,34 +641,16 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
     {
     	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
     	struct ib_device *ibdev = ctrl->device->dev;
    -	unsigned int nr_io_queues;
    +	unsigned int nr_io_queues, nr_default_queues;
    +	unsigned int nr_read_queues, nr_poll_queues;
     	int i, ret;
     
    -	nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
    -
    -	/*
    -	 * we map queues according to the device irq vectors for
    -	 * optimal locality so we don't need more queues than
    -	 * completion vectors.
    -	 */
    -	nr_io_queues = min_t(unsigned int, nr_io_queues,
    -				ibdev->num_comp_vectors);
    -
    -	if (opts->nr_write_queues) {
    -		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
    -				min(opts->nr_write_queues, nr_io_queues);
    -		nr_io_queues += ctrl->io_queues[HCTX_TYPE_DEFAULT];
    -	} else {
    -		ctrl->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues;
    -	}
    -
    -	ctrl->io_queues[HCTX_TYPE_READ] = nr_io_queues;
    -
    -	if (opts->nr_poll_queues) {
    -		ctrl->io_queues[HCTX_TYPE_POLL] =
    -			min(opts->nr_poll_queues, num_online_cpus());
    -		nr_io_queues += ctrl->io_queues[HCTX_TYPE_POLL];
    -	}
    +	nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors,
    +				min(opts->nr_io_queues, num_online_cpus()));
    +	nr_default_queues =  min_t(unsigned int, ibdev->num_comp_vectors,
    +				min(opts->nr_write_queues, num_online_cpus()));
    +	nr_poll_queues = min(opts->nr_poll_queues, num_online_cpus());
    +	nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues;
     
     	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
     	if (ret)
    @@ -681,6 +663,34 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
     	dev_info(ctrl->ctrl.device,
     		"creating %d I/O queues.\n", nr_io_queues);
     
    +	if (opts->nr_write_queues && nr_read_queues < nr_io_queues) {
    +		/*
    +		 * separate read/write queues
    +		 * hand out dedicated default queues only after we have
    +		 * sufficient read queues.
    +		 */
    +		ctrl->io_queues[HCTX_TYPE_READ] = nr_read_queues;
    +		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
    +		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
    +			min(nr_default_queues, nr_io_queues);
    +		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
    +	} else {
    +		/*
    +		 * shared read/write queues
    +		 * either no write queues were requested, or we don't have
    +		 * sufficient queue count to have dedicated default queues.
    +		 */
    +		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
    +			min(nr_read_queues, nr_io_queues);
    +		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
    +	}
    +
    +	if (opts->nr_poll_queues && nr_io_queues) {
    +		/* map dedicated poll queues only if we have queues left */
    +		ctrl->io_queues[HCTX_TYPE_POLL] =
    +			min(nr_poll_queues, nr_io_queues);
    +	}
    +
     	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
     		ret = nvme_rdma_alloc_queue(ctrl, i,
     				ctrl->ctrl.sqsize + 1);
    @@ -1763,17 +1773,24 @@ static void nvme_rdma_complete_rq(struct request *rq)
     static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
     {
     	struct nvme_rdma_ctrl *ctrl = set->driver_data;
    +	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
     
    -	set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
    -	set->map[HCTX_TYPE_DEFAULT].nr_queues =
    -			ctrl->io_queues[HCTX_TYPE_DEFAULT];
    -	set->map[HCTX_TYPE_READ].nr_queues = ctrl->io_queues[HCTX_TYPE_READ];
    -	if (ctrl->ctrl.opts->nr_write_queues) {
    +	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
     		/* separate read/write queues */
    +		set->map[HCTX_TYPE_DEFAULT].nr_queues =
    +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
    +		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
    +		set->map[HCTX_TYPE_READ].nr_queues =
    +			ctrl->io_queues[HCTX_TYPE_READ];
     		set->map[HCTX_TYPE_READ].queue_offset =
    -				ctrl->io_queues[HCTX_TYPE_DEFAULT];
    +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
     	} else {
    -		/* mixed read/write queues */
    +		/* shared read/write queues */
    +		set->map[HCTX_TYPE_DEFAULT].nr_queues =
    +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
    +		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
    +		set->map[HCTX_TYPE_READ].nr_queues =
    +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
     		set->map[HCTX_TYPE_READ].queue_offset = 0;
     	}
     	blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
    @@ -1781,16 +1798,22 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
     	blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
     			ctrl->device->dev, 0);
     
    -	if (ctrl->ctrl.opts->nr_poll_queues) {
    +	if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
    +		/* map dedicated poll queues only if we have queues left */
     		set->map[HCTX_TYPE_POLL].nr_queues =
     				ctrl->io_queues[HCTX_TYPE_POLL];
     		set->map[HCTX_TYPE_POLL].queue_offset =
    -				ctrl->io_queues[HCTX_TYPE_DEFAULT];
    -		if (ctrl->ctrl.opts->nr_write_queues)
    -			set->map[HCTX_TYPE_POLL].queue_offset +=
    -				ctrl->io_queues[HCTX_TYPE_READ];
    +			ctrl->io_queues[HCTX_TYPE_DEFAULT] +
    +			ctrl->io_queues[HCTX_TYPE_READ];
     		blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
     	}
    +
    +	dev_info(ctrl->ctrl.device,
    +		"mapped %d/%d/%d default/read/poll queues.\n",
    +		ctrl->io_queues[HCTX_TYPE_DEFAULT],
    +		ctrl->io_queues[HCTX_TYPE_READ],
    +		ctrl->io_queues[HCTX_TYPE_POLL]);
    +
     	return 0;
     }
     
    -- 
    2.17.1
    
    

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 2/2] nvme-tcp: fix queue mapping when queue count is limited
  2019-05-29  5:49 ` [PATCH v2 2/2] nvme-tcp: " Sagi Grimberg
@ 2019-05-29 15:55   ` Harris, James R
  2019-06-01  8:58   ` Christoph Hellwig
  1 sibling, 0 replies; 11+ messages in thread
From: Harris, James R @ 2019-05-29 15:55 UTC (permalink / raw)




?On 5/28/19, 10:49 PM, "Sagi Grimberg" <sagi@grimberg.me> wrote:

    When the controller supports less queues than requested, we
    should make sure that queue mapping does the right thing and
    not assume that all queues are available. This fixes a crash
    when the controller supports less queues than requested.
    
    The rules are:
    1. if no write queues are requested, we assign the available queues
       to the default queue map. The default and read queue maps share the
       existing queues.
    2. if write queues are requested:
      - first make sure that read queue map gets the requested
        nr_io_queues count
      - then grant the default queue map the minimum between the requested
        nr_write_queues and the remaining queues. If there are no available
        queues to dedicate to the default queue map, fallback to (1) and
        share all the queues in the existing queue map.
    
    Also, provide a log indication on how we constructed the different
    queue maps.
    
    Reported-by: Harris, James R <james.r.harris at intel.com>
    Cc: <stable at vger.kernel.org> # v5.0+
    Suggested-by: Roy Shterman <roys at lightbitslabs.com>
    Signed-off-by: Sagi Grimberg <sagi at grimberg.me>

v2 looks good.

Tested-by: Jim Harris <james.r.harris at intel.com>
    ---
     drivers/nvme/host/tcp.c | 57 ++++++++++++++++++++++++++++++++++++-----
     1 file changed, 50 insertions(+), 7 deletions(-)
    
    diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
    index 2b107a1d152b..08a2501b9357 100644
    --- a/drivers/nvme/host/tcp.c
    +++ b/drivers/nvme/host/tcp.c
    @@ -111,6 +111,7 @@ struct nvme_tcp_ctrl {
     	struct work_struct	err_work;
     	struct delayed_work	connect_work;
     	struct nvme_tcp_request async_req;
    +	u32			io_queues[HCTX_MAX_TYPES];
     };
     
     static LIST_HEAD(nvme_tcp_ctrl_list);
    @@ -1564,6 +1565,35 @@ static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
     	return nr_io_queues;
     }
     
    +static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
    +		unsigned int nr_io_queues)
    +{
    +	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
    +	struct nvmf_ctrl_options *opts = nctrl->opts;
    +
    +	if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
    +		/*
    +		 * separate read/write queues
    +		 * hand out dedicated default queues only after we have
    +		 * sufficient read queues.
    +		 */
    +		ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
    +		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
    +		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
    +			min(opts->nr_write_queues, nr_io_queues);
    +		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
    +	} else {
    +		/*
    +		 * shared read/write queues
    +		 * either no write queues were requested, or we don't have
    +		 * sufficient queue count to have dedicated default queues.
    +		 */
    +		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
    +			min(opts->nr_io_queues, nr_io_queues);
    +		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
    +	}
    +}
    +
     static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
     {
     	unsigned int nr_io_queues;
    @@ -1581,6 +1611,8 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
     	dev_info(ctrl->device,
     		"creating %d I/O queues.\n", nr_io_queues);
     
    +	nvme_tcp_set_io_queues(ctrl, nr_io_queues);
    +
     	return __nvme_tcp_alloc_io_queues(ctrl);
     }
     
    @@ -2089,23 +2121,34 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
     static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
     {
     	struct nvme_tcp_ctrl *ctrl = set->driver_data;
    +	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
     
    -	set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
    -	set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues;
    -	if (ctrl->ctrl.opts->nr_write_queues) {
    +	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
     		/* separate read/write queues */
     		set->map[HCTX_TYPE_DEFAULT].nr_queues =
    -				ctrl->ctrl.opts->nr_write_queues;
    +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
    +		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
    +		set->map[HCTX_TYPE_READ].nr_queues =
    +			ctrl->io_queues[HCTX_TYPE_READ];
     		set->map[HCTX_TYPE_READ].queue_offset =
    -				ctrl->ctrl.opts->nr_write_queues;
    +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
     	} else {
    -		/* mixed read/write queues */
    +		/* shared read/write queues */
     		set->map[HCTX_TYPE_DEFAULT].nr_queues =
    -				ctrl->ctrl.opts->nr_io_queues;
    +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
    +		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
    +		set->map[HCTX_TYPE_READ].nr_queues =
    +			ctrl->io_queues[HCTX_TYPE_DEFAULT];
     		set->map[HCTX_TYPE_READ].queue_offset = 0;
     	}
     	blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
     	blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
    +
    +	dev_info(ctrl->ctrl.device,
    +		"mapped %d/%d default/read queues.\n",
    +		ctrl->io_queues[HCTX_TYPE_DEFAULT],
    +		ctrl->io_queues[HCTX_TYPE_READ]);
    +
     	return 0;
     }
     
    -- 
    2.17.1
    
    

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited
  2019-05-29 15:50 ` [PATCH v2 1/2] nvme-rdma: " Max Gurtovoy
@ 2019-05-29 17:58   ` Sagi Grimberg
  2019-05-29 20:27     ` Sagi Grimberg
  2019-05-29 22:22     ` Max Gurtovoy
  0 siblings, 2 replies; 11+ messages in thread
From: Sagi Grimberg @ 2019-05-29 17:58 UTC (permalink / raw)



>> @@ -1781,16 +1798,22 @@ static int nvme_rdma_map_queues(struct 
>> blk_mq_tag_set *set)
>> ????? blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
>> ????????????? ctrl->device->dev, 0);
>> -??? if (ctrl->ctrl.opts->nr_poll_queues) {
>> +??? if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
>> +??????? /* map dedicated poll queues only if we have queues left */
>> ????????? set->map[HCTX_TYPE_POLL].nr_queues =
>> ????????????????? ctrl->io_queues[HCTX_TYPE_POLL];
>> ????????? set->map[HCTX_TYPE_POLL].queue_offset =
>> -??????????????? ctrl->io_queues[HCTX_TYPE_DEFAULT];
>> -??????? if (ctrl->ctrl.opts->nr_write_queues)
>> -??????????? set->map[HCTX_TYPE_POLL].queue_offset +=
>> -??????????????? ctrl->io_queues[HCTX_TYPE_READ];
>> +??????????? ctrl->io_queues[HCTX_TYPE_DEFAULT] +
>> +??????????? ctrl->io_queues[HCTX_TYPE_READ];
> 
> in case of shared queues? ctrl->io_queues[HCTX_TYPE_READ] != 
> set->map[HCTX_TYPE_READ].nr_queues.
> 
> Shouldn't we jump over (the queue_offset) the set->map values ?

This is exactly why we shouldn't. In the shared case, we only have
HCTX_TYPE_DEFAULT and HCTX_TYPE_READ is zero so this case jumps
exactly how much it needs to.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited
  2019-05-29 17:58   ` Sagi Grimberg
@ 2019-05-29 20:27     ` Sagi Grimberg
  2019-05-29 22:22     ` Max Gurtovoy
  1 sibling, 0 replies; 11+ messages in thread
From: Sagi Grimberg @ 2019-05-29 20:27 UTC (permalink / raw)



>>> @@ -1781,16 +1798,22 @@ static int nvme_rdma_map_queues(struct 
>>> blk_mq_tag_set *set)
>>> ????? blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
>>> ????????????? ctrl->device->dev, 0);
>>> -??? if (ctrl->ctrl.opts->nr_poll_queues) {
>>> +??? if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
>>> +??????? /* map dedicated poll queues only if we have queues left */
>>> ????????? set->map[HCTX_TYPE_POLL].nr_queues =
>>> ????????????????? ctrl->io_queues[HCTX_TYPE_POLL];
>>> ????????? set->map[HCTX_TYPE_POLL].queue_offset =
>>> -??????????????? ctrl->io_queues[HCTX_TYPE_DEFAULT];
>>> -??????? if (ctrl->ctrl.opts->nr_write_queues)
>>> -??????????? set->map[HCTX_TYPE_POLL].queue_offset +=
>>> -??????????????? ctrl->io_queues[HCTX_TYPE_READ];
>>> +??????????? ctrl->io_queues[HCTX_TYPE_DEFAULT] +
>>> +??????????? ctrl->io_queues[HCTX_TYPE_READ];
>>
>> in case of shared queues? ctrl->io_queues[HCTX_TYPE_READ] != 
>> set->map[HCTX_TYPE_READ].nr_queues.
>>
>> Shouldn't we jump over (the queue_offset) the set->map values ?
> 
> This is exactly why we shouldn't. In the shared case, we only have
> HCTX_TYPE_DEFAULT and HCTX_TYPE_READ is zero so this case jumps
> exactly how much it needs to.

btw, I want a review tag from you Max before taking this in.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited
  2019-05-29 17:58   ` Sagi Grimberg
  2019-05-29 20:27     ` Sagi Grimberg
@ 2019-05-29 22:22     ` Max Gurtovoy
  2019-05-29 22:41       ` Sagi Grimberg
  1 sibling, 1 reply; 11+ messages in thread
From: Max Gurtovoy @ 2019-05-29 22:22 UTC (permalink / raw)



On 5/29/2019 8:58 PM, Sagi Grimberg wrote:
>
>>> @@ -1781,16 +1798,22 @@ static int nvme_rdma_map_queues(struct 
>>> blk_mq_tag_set *set)
>>> ????? blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
>>> ????????????? ctrl->device->dev, 0);
>>> -??? if (ctrl->ctrl.opts->nr_poll_queues) {
>>> +??? if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
>>> +??????? /* map dedicated poll queues only if we have queues left */
>>> ????????? set->map[HCTX_TYPE_POLL].nr_queues =
>>> ????????????????? ctrl->io_queues[HCTX_TYPE_POLL];
>>> ????????? set->map[HCTX_TYPE_POLL].queue_offset =
>>> -??????????????? ctrl->io_queues[HCTX_TYPE_DEFAULT];
>>> -??????? if (ctrl->ctrl.opts->nr_write_queues)
>>> -??????????? set->map[HCTX_TYPE_POLL].queue_offset +=
>>> -??????????????? ctrl->io_queues[HCTX_TYPE_READ];
>>> +??????????? ctrl->io_queues[HCTX_TYPE_DEFAULT] +
>>> +??????????? ctrl->io_queues[HCTX_TYPE_READ];
>>
>> in case of shared queues? ctrl->io_queues[HCTX_TYPE_READ] != 
>> set->map[HCTX_TYPE_READ].nr_queues.
>>
>> Shouldn't we jump over (the queue_offset) the set->map values ?
>
> This is exactly why we shouldn't. In the shared case, we only have
> HCTX_TYPE_DEFAULT and HCTX_TYPE_READ is zero so this case jumps
> exactly how much it needs to.

I see. It will work (in case I understand the idea).

I guess that set->map[HCTX_TYPE_READ].queue_offset is always == 
set->map[HCTX_TYPE_DEFAULT].nr_queues in case of separate queues.

I really don't understand the answer I got in V1, that we need to set 
the READ map always (also in the shared case). In the documentation it 
says that DEFAULT refers to queues that are not set explicitly. maybe 
it's because the nr_maps that we ask during tagset creation.

BTW,

does this feature really improve performance ? what is the scenario ?

I thing that a user might configure the queues in a wrong way that will 
cause perf degradation.


This patch fixes the reported bug,

Reviewed-by: Max Gurtovoy <maxg at mellanox.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited
  2019-05-29 22:22     ` Max Gurtovoy
@ 2019-05-29 22:41       ` Sagi Grimberg
  0 siblings, 0 replies; 11+ messages in thread
From: Sagi Grimberg @ 2019-05-29 22:41 UTC (permalink / raw)



> I see. It will work (in case I understand the idea).
> 
> I guess that set->map[HCTX_TYPE_READ].queue_offset is always == 
> set->map[HCTX_TYPE_DEFAULT].nr_queues in case of separate queues.

Yes.

> I really don't understand the answer I got in V1, that we need to set 
> the READ map always (also in the shared case). In the documentation it 
> says that DEFAULT refers to queues that are not set explicitly. maybe 
> it's because the nr_maps that we ask during tagset creation.

Exactly.

> BTW,
> 
> does this feature really improve performance ? what is the scenario ?

It could. It pretty much gives dedicated queues for reads that are
usually more latency sensitive than writes.

Also, I have a patch that I plan to propose that ties priority queue
arbitration to this. This way we can hint the controller the priority
of poll queues over read queues over default queues.

> I thing that a user might configure the queues in a wrong way that will 
> cause perf degradation.

The default case remains intact. The user can always theoretically get
it wrong, but this is why its not the default.

> This patch fixes the reported bug,
> 
> Reviewed-by: Max Gurtovoy <maxg at mellanox.com>

Thanks Max. We'll queue it up.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 2/2] nvme-tcp: fix queue mapping when queue count is limited
  2019-05-29  5:49 ` [PATCH v2 2/2] nvme-tcp: " Sagi Grimberg
  2019-05-29 15:55   ` Harris, James R
@ 2019-06-01  8:58   ` Christoph Hellwig
  1 sibling, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2019-06-01  8:58 UTC (permalink / raw)


Looks good,

Reviewed-by: Christoph Hellwig <hch at lst.de>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited
  2019-05-29  5:49 [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited Sagi Grimberg
                   ` (2 preceding siblings ...)
  2019-05-29 15:55 ` Harris, James R
@ 2019-06-01  8:59 ` Christoph Hellwig
  3 siblings, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2019-06-01  8:59 UTC (permalink / raw)


Looks good,

Reviewed-by: Christoph Hellwig <hch at lst.de>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-06-01  8:59 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-29  5:49 [PATCH v2 1/2] nvme-rdma: fix queue mapping when queue count is limited Sagi Grimberg
2019-05-29  5:49 ` [PATCH v2 2/2] nvme-tcp: " Sagi Grimberg
2019-05-29 15:55   ` Harris, James R
2019-06-01  8:58   ` Christoph Hellwig
2019-05-29 15:50 ` [PATCH v2 1/2] nvme-rdma: " Max Gurtovoy
2019-05-29 17:58   ` Sagi Grimberg
2019-05-29 20:27     ` Sagi Grimberg
2019-05-29 22:22     ` Max Gurtovoy
2019-05-29 22:41       ` Sagi Grimberg
2019-05-29 15:55 ` Harris, James R
2019-06-01  8:59 ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.