All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2a 0/6]: Fixup mtip32xx for scheduling
@ 2017-04-28 14:31 Jens Axboe
  2017-04-28 14:31 ` [PATCH 1/6] mtip32xx: get rid of 'atomic' argument to mtip_exec_internal_command() Jens Axboe
                   ` (5 more replies)
  0 siblings, 6 replies; 19+ messages in thread
From: Jens Axboe @ 2017-04-28 14:31 UTC (permalink / raw)
  To: linux-block; +Cc: ming.lei, hch

Since I fat-fingered the previous posting of v2, here's a v2a that
is what v2 should have been. No changes since v2, just for easier
review. Sorry about that.

To recap, changes since v1:

- Mark internal commands as REQ_OP_DRV_IN. Doesn't really matter
  what the data direction is, the important bit is that we need
  to ensure the request is seen as a passthrough.

- Remove redundant active = 1 setting in mtip_commands_active().

- Utilize blk-mq timeout infrastructure, to avoid racing with
  cleanup.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/6] mtip32xx: get rid of 'atomic' argument to mtip_exec_internal_command()
  2017-04-28 14:31 [PATCH v2a 0/6]: Fixup mtip32xx for scheduling Jens Axboe
@ 2017-04-28 14:31 ` Jens Axboe
  2017-04-28 14:38   ` Christoph Hellwig
  2017-04-28 14:31 ` [PATCH 2/6] mtip32xx: kill atomic argument to mtip_quiesce_io() Jens Axboe
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: Jens Axboe @ 2017-04-28 14:31 UTC (permalink / raw)
  To: linux-block; +Cc: ming.lei, hch, Jens Axboe

All callers can safely block. Kill the atomic/block argument, and
remove the argument from all callers.

Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 148 +++++++++++---------------------------
 1 file changed, 43 insertions(+), 105 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 02804cc79d82..d81d797ee65d 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -609,11 +609,6 @@ static void mtip_completion(struct mtip_port *port,
 	complete(waiting);
 }
 
-static void mtip_null_completion(struct mtip_port *port,
-			    int tag, struct mtip_cmd *command, int status)
-{
-}
-
 static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
 				dma_addr_t buffer_dma, unsigned int sectors);
 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
@@ -1117,7 +1112,6 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 					dma_addr_t buffer,
 					int buf_len,
 					u32 opts,
-					gfp_t atomic,
 					unsigned long timeout)
 {
 	struct mtip_cmd_sg *command_sg;
@@ -1146,30 +1140,22 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 
 	clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
 
-	if (atomic == GFP_KERNEL) {
-		if (fis->command != ATA_CMD_STANDBYNOW1) {
-			/* wait for io to complete if non atomic */
-			if (mtip_quiesce_io(port,
-				MTIP_QUIESCE_IO_TIMEOUT_MS, atomic) < 0) {
-				dev_warn(&dd->pdev->dev,
-					"Failed to quiesce IO\n");
-				mtip_put_int_command(dd, int_cmd);
-				clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
-				wake_up_interruptible(&port->svc_wait);
-				return -EBUSY;
-			}
+	if (fis->command != ATA_CMD_STANDBYNOW1) {
+		/* wait for io to complete if non atomic */
+		if (mtip_quiesce_io(port,
+			MTIP_QUIESCE_IO_TIMEOUT_MS, GFP_KERNEL) < 0) {
+			dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n");
+			mtip_put_int_command(dd, int_cmd);
+			clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
+			wake_up_interruptible(&port->svc_wait);
+			return -EBUSY;
 		}
-
-		/* Set the completion function and data for the command. */
-		int_cmd->comp_data = &wait;
-		int_cmd->comp_func = mtip_completion;
-
-	} else {
-		/* Clear completion - we're going to poll */
-		int_cmd->comp_data = NULL;
-		int_cmd->comp_func = mtip_null_completion;
 	}
 
+	/* Set the completion function and data for the command. */
+	int_cmd->comp_data = &wait;
+	int_cmd->comp_func = mtip_completion;
+
 	/* Copy the command to the command table */
 	memcpy(int_cmd->command, fis, fis_len*4);
 
@@ -1198,81 +1184,41 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 	/* Issue the command to the hardware */
 	mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL);
 
-	if (atomic == GFP_KERNEL) {
-		/* Wait for the command to complete or timeout. */
-		if ((rv = wait_for_completion_interruptible_timeout(
-				&wait,
-				msecs_to_jiffies(timeout))) <= 0) {
-
-			if (rv == -ERESTARTSYS) { /* interrupted */
-				dev_err(&dd->pdev->dev,
-					"Internal command [%02X] was interrupted after %u ms\n",
-					fis->command,
-					jiffies_to_msecs(jiffies - start));
-				rv = -EINTR;
-				goto exec_ic_exit;
-			} else if (rv == 0) /* timeout */
-				dev_err(&dd->pdev->dev,
-					"Internal command did not complete [%02X] within timeout of  %lu ms\n",
-					fis->command, timeout);
-			else
-				dev_err(&dd->pdev->dev,
-					"Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
-					fis->command, rv, timeout);
-
-			if (mtip_check_surprise_removal(dd->pdev) ||
-				test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
-						&dd->dd_flag)) {
-				dev_err(&dd->pdev->dev,
-					"Internal command [%02X] wait returned due to SR\n",
-					fis->command);
-				rv = -ENXIO;
-				goto exec_ic_exit;
-			}
-			mtip_device_reset(dd); /* recover from timeout issue */
-			rv = -EAGAIN;
+	/* Wait for the command to complete or timeout. */
+	rv = wait_for_completion_interruptible_timeout(&wait,
+				msecs_to_jiffies(timeout));
+	if (rv <= 0) {
+		if (rv == -ERESTARTSYS) { /* interrupted */
+			dev_err(&dd->pdev->dev,
+				"Internal command [%02X] was interrupted after %u ms\n",
+				fis->command,
+				jiffies_to_msecs(jiffies - start));
+			rv = -EINTR;
 			goto exec_ic_exit;
-		}
-	} else {
-		u32 hba_stat, port_stat;
-
-		/* Spin for <timeout> checking if command still outstanding */
-		timeout = jiffies + msecs_to_jiffies(timeout);
-		while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL])
-				& (1 << MTIP_TAG_INTERNAL))
-				&& time_before(jiffies, timeout)) {
-			if (mtip_check_surprise_removal(dd->pdev)) {
-				rv = -ENXIO;
-				goto exec_ic_exit;
-			}
-			if ((fis->command != ATA_CMD_STANDBYNOW1) &&
-				test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
-						&dd->dd_flag)) {
-				rv = -ENXIO;
-				goto exec_ic_exit;
-			}
-			port_stat = readl(port->mmio + PORT_IRQ_STAT);
-			if (!port_stat)
-				continue;
+		} else if (rv == 0) /* timeout */
+			dev_err(&dd->pdev->dev,
+				"Internal command did not complete [%02X] within timeout of  %lu ms\n",
+				fis->command, timeout);
+		else
+			dev_err(&dd->pdev->dev,
+				"Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
+				fis->command, rv, timeout);
 
-			if (port_stat & PORT_IRQ_ERR) {
-				dev_err(&dd->pdev->dev,
-					"Internal command [%02X] failed\n",
-					fis->command);
-				mtip_device_reset(dd);
-				rv = -EIO;
-				goto exec_ic_exit;
-			} else {
-				writel(port_stat, port->mmio + PORT_IRQ_STAT);
-				hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
-				if (hba_stat)
-					writel(hba_stat,
-						dd->mmio + HOST_IRQ_STAT);
-			}
-			break;
+		if (mtip_check_surprise_removal(dd->pdev) ||
+			test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
+					&dd->dd_flag)) {
+			dev_err(&dd->pdev->dev,
+				"Internal command [%02X] wait returned due to SR\n",
+				fis->command);
+			rv = -ENXIO;
+			goto exec_ic_exit;
 		}
+		mtip_device_reset(dd); /* recover from timeout issue */
+		rv = -EAGAIN;
+		goto exec_ic_exit;
 	}
 
+	rv = 0;
 	if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
 			& (1 << MTIP_TAG_INTERNAL)) {
 		rv = -ENXIO;
@@ -1391,7 +1337,6 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
 				port->identify_dma,
 				sizeof(u16) * ATA_ID_WORDS,
 				0,
-				GFP_KERNEL,
 				MTIP_INT_CMD_TIMEOUT_MS)
 				< 0) {
 		rv = -1;
@@ -1477,7 +1422,6 @@ static int mtip_standby_immediate(struct mtip_port *port)
 					0,
 					0,
 					0,
-					GFP_ATOMIC,
 					timeout);
 	dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n",
 			jiffies_to_msecs(jiffies - start));
@@ -1523,7 +1467,6 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
 					buffer_dma,
 					sectors * ATA_SECT_SIZE,
 					0,
-					GFP_ATOMIC,
 					MTIP_INT_CMD_TIMEOUT_MS);
 }
 
@@ -1558,7 +1501,6 @@ static int mtip_get_smart_data(struct mtip_port *port, u8 *buffer,
 					buffer_dma,
 					ATA_SECT_SIZE,
 					0,
-					GFP_ATOMIC,
 					15000);
 }
 
@@ -1686,7 +1628,6 @@ static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
 					dma_addr,
 					ATA_SECT_SIZE,
 					0,
-					GFP_KERNEL,
 					MTIP_TRIM_TIMEOUT_MS) < 0)
 		rv = -EIO;
 
@@ -1850,7 +1791,6 @@ static int exec_drive_task(struct mtip_port *port, u8 *command)
 				 0,
 				 0,
 				 0,
-				 GFP_KERNEL,
 				 to) < 0) {
 		return -1;
 	}
@@ -1946,7 +1886,6 @@ static int exec_drive_command(struct mtip_port *port, u8 *command,
 				 (xfer_sz ? dma_addr : 0),
 				 (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0),
 				 0,
-				 GFP_KERNEL,
 				 to)
 				 < 0) {
 		rv = -EFAULT;
@@ -2189,7 +2128,6 @@ static int exec_drive_taskfile(struct driver_data *dd,
 				 dma_buffer,
 				 transfer_size,
 				 0,
-				 GFP_KERNEL,
 				 timeout) < 0) {
 		err = -EIO;
 		goto abort;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/6] mtip32xx: kill atomic argument to mtip_quiesce_io()
  2017-04-28 14:31 [PATCH v2a 0/6]: Fixup mtip32xx for scheduling Jens Axboe
  2017-04-28 14:31 ` [PATCH 1/6] mtip32xx: get rid of 'atomic' argument to mtip_exec_internal_command() Jens Axboe
@ 2017-04-28 14:31 ` Jens Axboe
  2017-04-28 14:38   ` Christoph Hellwig
  2017-04-28 14:31 ` [PATCH 3/6] mtip32xx: abstract out "are any commands active" helper Jens Axboe
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: Jens Axboe @ 2017-04-28 14:31 UTC (permalink / raw)
  To: linux-block; +Cc: ming.lei, hch, Jens Axboe

All callers now pass in GFP_KERNEL, get rid of the argument.

Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index d81d797ee65d..36f3d34f2156 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -1035,14 +1035,12 @@ static bool mtip_pause_ncq(struct mtip_port *port,
  *
  * @port    Pointer to port data structure
  * @timeout Max duration to wait (ms)
- * @atomic  gfp_t flag to indicate blockable context or not
  *
  * return value
  *	0	Success
  *	-EBUSY  Commands still active
  */
-static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout,
-								gfp_t atomic)
+static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 {
 	unsigned long to;
 	unsigned int n;
@@ -1053,18 +1051,12 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout,
 	to = jiffies + msecs_to_jiffies(timeout);
 	do {
 		if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) &&
-			test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags) &&
-			atomic == GFP_KERNEL) {
+			test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
 			msleep(20);
 			continue; /* svc thd is actively issuing commands */
 		}
 
-		if (atomic == GFP_KERNEL)
-			msleep(100);
-		else {
-			cpu_relax();
-			udelay(100);
-		}
+		msleep(100);
 
 		if (mtip_check_surprise_removal(port->dd->pdev))
 			goto err_fault;
@@ -1142,8 +1134,7 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 
 	if (fis->command != ATA_CMD_STANDBYNOW1) {
 		/* wait for io to complete if non atomic */
-		if (mtip_quiesce_io(port,
-			MTIP_QUIESCE_IO_TIMEOUT_MS, GFP_KERNEL) < 0) {
+		if (mtip_quiesce_io(port, MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) {
 			dev_warn(&dd->pdev->dev, "Failed to quiesce IO\n");
 			mtip_put_int_command(dd, int_cmd);
 			clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
@@ -4106,8 +4097,7 @@ static int mtip_block_remove(struct driver_data *dd)
 		 * Explicitly wait here for IOs to quiesce,
 		 * as mtip_standby_drive usually won't wait for IOs.
 		 */
-		if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS,
-								GFP_KERNEL))
+		if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS))
 			mtip_standby_drive(dd);
 	}
 	else
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/6] mtip32xx: abstract out "are any commands active" helper
  2017-04-28 14:31 [PATCH v2a 0/6]: Fixup mtip32xx for scheduling Jens Axboe
  2017-04-28 14:31 ` [PATCH 1/6] mtip32xx: get rid of 'atomic' argument to mtip_exec_internal_command() Jens Axboe
  2017-04-28 14:31 ` [PATCH 2/6] mtip32xx: kill atomic argument to mtip_quiesce_io() Jens Axboe
@ 2017-04-28 14:31 ` Jens Axboe
  2017-04-28 14:39   ` Christoph Hellwig
  2017-04-28 14:31 ` [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests Jens Axboe
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: Jens Axboe @ 2017-04-28 14:31 UTC (permalink / raw)
  To: linux-block; +Cc: ming.lei, hch, Jens Axboe

This is a prep patch for backoff in ->queue_rq() for non-ncq commands.

Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 36f3d34f2156..aee94f260725 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -1030,6 +1030,22 @@ static bool mtip_pause_ncq(struct mtip_port *port,
 	return false;
 }
 
+static bool mtip_commands_active(struct mtip_port *port)
+{
+	unsigned int active;
+	unsigned int n;
+
+	/*
+	 * Ignore s_active bit 0 of array element 0.
+	 * This bit will always be set
+	 */
+	active = readl(port->s_active[0]) & 0xFFFFFFFE;
+	for (n = 1; n < port->dd->slot_groups; n++)
+		active |= readl(port->s_active[n]);
+
+	return active != 0;
+}
+
 /*
  * Wait for port to quiesce
  *
@@ -1043,8 +1059,7 @@ static bool mtip_pause_ncq(struct mtip_port *port,
 static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 {
 	unsigned long to;
-	unsigned int n;
-	unsigned int active = 1;
+	bool active = true;
 
 	blk_mq_stop_hw_queues(port->dd->queue);
 
@@ -1061,14 +1076,7 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 		if (mtip_check_surprise_removal(port->dd->pdev))
 			goto err_fault;
 
-		/*
-		 * Ignore s_active bit 0 of array element 0.
-		 * This bit will always be set
-		 */
-		active = readl(port->s_active[0]) & 0xFFFFFFFE;
-		for (n = 1; n < port->dd->slot_groups; n++)
-			active |= readl(port->s_active[n]);
-
+		active = mtip_commands_active(port);
 		if (!active)
 			break;
 	} while (time_before(jiffies, to));
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests
  2017-04-28 14:31 [PATCH v2a 0/6]: Fixup mtip32xx for scheduling Jens Axboe
                   ` (2 preceding siblings ...)
  2017-04-28 14:31 ` [PATCH 3/6] mtip32xx: abstract out "are any commands active" helper Jens Axboe
@ 2017-04-28 14:31 ` Jens Axboe
  2017-04-28 14:41   ` Christoph Hellwig
  2017-04-28 14:31 ` [PATCH 5/6] mtip32xx: convert internal command issue to block IO path Jens Axboe
  2017-04-28 14:31 ` [PATCH 6/6] Revert "mtip32xx: pass BLK_MQ_F_NO_SCHED" Jens Axboe
  5 siblings, 1 reply; 19+ messages in thread
From: Jens Axboe @ 2017-04-28 14:31 UTC (permalink / raw)
  To: linux-block; +Cc: ming.lei, hch, Jens Axboe

Instead of bypassing the scheduler for insertion of reserved requests,
we ensure that the request is marked as RQF_RESERVED so they driver
knows where it came from.

Usually we just use the tag to know if it's reserved or not,
but that only works when the request has a driver tag assigned.
Using RQF_RESERVED can be done independently of whether or not
scheduling is used.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c   | 8 +++-----
 block/blk-mq.c         | 3 +++
 include/linux/blkdev.h | 2 ++
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 8b361e192e8a..27c67465f856 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -82,11 +82,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 	if (likely(!data->hctx))
 		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 
-	/*
-	 * For a reserved tag, allocate a normal request since we might
-	 * have driver dependencies on the value of the internal tag.
-	 */
-	if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) {
+	if (e) {
 		data->flags |= BLK_MQ_REQ_INTERNAL;
 
 		/*
@@ -104,6 +100,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 	}
 
 	if (rq) {
+		if (data->flags & BLK_MQ_REQ_RESERVED)
+			rq->rq_flags |= RQF_RESERVED;
 		if (!op_is_flush(op)) {
 			rq->elv.icq = NULL;
 			if (e && e->type->icq_cache)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b75ef2392db7..0168b27469cb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -268,6 +268,9 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
 			data->hctx->tags->rqs[rq->tag] = rq;
 		}
 
+		if (data->flags & BLK_MQ_REQ_RESERVED)
+			rq->rq_flags |= RQF_RESERVED;
+
 		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
 		return rq;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ba3884f26288..c246de5861dc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -120,6 +120,8 @@ typedef __u32 __bitwise req_flags_t;
 /* Look at ->special_vec for the actual data payload instead of the
    bio chain. */
 #define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
+/* Request came from the reserved tags/pool */
+#define RQF_RESERVED		((__force req_flags_t)(1 << 19))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 5/6] mtip32xx: convert internal command issue to block IO path
  2017-04-28 14:31 [PATCH v2a 0/6]: Fixup mtip32xx for scheduling Jens Axboe
                   ` (3 preceding siblings ...)
  2017-04-28 14:31 ` [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests Jens Axboe
@ 2017-04-28 14:31 ` Jens Axboe
  2017-04-28 14:49   ` Christoph Hellwig
  2017-04-28 14:31 ` [PATCH 6/6] Revert "mtip32xx: pass BLK_MQ_F_NO_SCHED" Jens Axboe
  5 siblings, 1 reply; 19+ messages in thread
From: Jens Axboe @ 2017-04-28 14:31 UTC (permalink / raw)
  To: linux-block; +Cc: ming.lei, hch, Jens Axboe

The driver special cases certain things for command issue, depending
on whether it's an internal command or not. Make the internal commands
use the regular infrastructure for issuing IO.

Since this is an 8-group souped up AHCI variant, we have to deal
with NCQ vs non-queueable commands. Do this from the queue_rq
handler, by backing off unless the drive is idle.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 103 +++++++++++++++++++++++++++-----------
 1 file changed, 73 insertions(+), 30 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index aee94f260725..90f14f799527 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -195,7 +195,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
 	if (mtip_check_surprise_removal(dd->pdev))
 		return NULL;
 
-	rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED);
+	rq = blk_mq_alloc_request(dd->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_RESERVED);
 	if (IS_ERR(rq))
 		return NULL;
 
@@ -1088,6 +1088,13 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
 	return -EFAULT;
 }
 
+struct mtip_int_cmd {
+	int fis_len;
+	dma_addr_t buffer;
+	int buf_len;
+	u32 opts;
+};
+
 /*
  * Execute an internal command and wait for the completion.
  *
@@ -1114,10 +1121,16 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 					u32 opts,
 					unsigned long timeout)
 {
-	struct mtip_cmd_sg *command_sg;
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct mtip_cmd *int_cmd;
 	struct driver_data *dd = port->dd;
+	struct request *rq;
+	struct mtip_int_cmd icmd = {
+		.fis_len = fis_len,
+		.buffer = buffer,
+		.buf_len = buf_len,
+		.opts = opts
+	};
 	int rv = 0;
 	unsigned long start;
 
@@ -1132,6 +1145,8 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 		dbg_printk(MTIP_DRV_NAME "Unable to allocate tag for PIO cmd\n");
 		return -EFAULT;
 	}
+	rq = blk_mq_rq_from_pdu(int_cmd);
+	rq->end_io_data = &icmd;
 
 	set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
 
@@ -1158,35 +1173,16 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 	/* Copy the command to the command table */
 	memcpy(int_cmd->command, fis, fis_len*4);
 
-	/* Populate the SG list */
-	int_cmd->command_header->opts =
-		 __force_bit2int cpu_to_le32(opts | fis_len);
-	if (buf_len) {
-		command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ;
-
-		command_sg->info =
-			__force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF);
-		command_sg->dba	=
-			__force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF);
-		command_sg->dba_upper =
-			__force_bit2int cpu_to_le32((buffer >> 16) >> 16);
-
-		int_cmd->command_header->opts |=
-			__force_bit2int cpu_to_le32((1 << 16));
-	}
-
-	/* Populate the command header */
-	int_cmd->command_header->byte_count = 0;
-
 	start = jiffies;
+	rq->timeout = timeout;
 
-	/* Issue the command to the hardware */
-	mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL);
+	/* insert request and run queue */
+	blk_execute_rq_nowait(rq->q, NULL, rq, true, NULL);
+
+	wait_for_completion(&wait);
+	rv = int_cmd->status;
 
-	/* Wait for the command to complete or timeout. */
-	rv = wait_for_completion_interruptible_timeout(&wait,
-				msecs_to_jiffies(timeout));
-	if (rv <= 0) {
+	if (rv < 0) {
 		if (rv == -ERESTARTSYS) { /* interrupted */
 			dev_err(&dd->pdev->dev,
 				"Internal command [%02X] was interrupted after %u ms\n",
@@ -1217,7 +1213,6 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 		goto exec_ic_exit;
 	}
 
-	rv = 0;
 	if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
 			& (1 << MTIP_TAG_INTERNAL)) {
 		rv = -ENXIO;
@@ -3762,6 +3757,44 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
+static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
+				   struct request *rq)
+{
+	struct driver_data *dd = hctx->queue->queuedata;
+	struct mtip_int_cmd *icmd = rq->end_io_data;
+	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
+	struct mtip_cmd_sg *command_sg;
+
+	if (mtip_commands_active(dd->port))
+		return BLK_MQ_RQ_QUEUE_BUSY;
+
+	rq->end_io_data = NULL;
+
+	/* Populate the SG list */
+	cmd->command_header->opts =
+		 __force_bit2int cpu_to_le32(icmd->opts | icmd->fis_len);
+	if (icmd->buf_len) {
+		command_sg = cmd->command + AHCI_CMD_TBL_HDR_SZ;
+
+		command_sg->info =
+			__force_bit2int cpu_to_le32((icmd->buf_len-1) & 0x3FFFFF);
+		command_sg->dba	=
+			__force_bit2int cpu_to_le32(icmd->buffer & 0xFFFFFFFF);
+		command_sg->dba_upper =
+			__force_bit2int cpu_to_le32((icmd->buffer >> 16) >> 16);
+
+		cmd->command_header->opts |=
+			__force_bit2int cpu_to_le32((1 << 16));
+	}
+
+	/* Populate the command header */
+	cmd->command_header->byte_count = 0;
+
+	blk_mq_start_request(rq);
+	mtip_issue_non_ncq_command(dd->port, rq->tag);
+	return BLK_MQ_RQ_QUEUE_OK;
+}
+
 static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
@@ -3770,6 +3803,9 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	mtip_init_cmd_header(rq);
 
+	if (rq->rq_flags & RQF_RESERVED)
+		return mtip_issue_reserved_cmd(hctx, rq);
+
 	if (unlikely(mtip_check_unal_depth(hctx, rq)))
 		return BLK_MQ_RQ_QUEUE_BUSY;
 
@@ -3825,8 +3861,14 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
 {
 	struct driver_data *dd = req->q->queuedata;
 
-	if (reserved)
+	if (reserved) {
+		struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
+
+		cmd->status = -ETIME;
+		if (cmd->comp_func)
+			cmd->comp_func(dd->port, MTIP_TAG_INTERNAL, cmd, -ETIME);
 		goto exit_handler;
+	}
 
 	if (test_bit(req->tag, dd->port->cmds_to_issue))
 		goto exit_handler;
@@ -4063,6 +4105,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
 	} else if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &dd->port->flags)) {
 
 		cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
+		cmd->status = -ENODEV;
 		if (cmd->comp_func)
 			cmd->comp_func(dd->port, MTIP_TAG_INTERNAL,
 					cmd, -ENODEV);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 6/6] Revert "mtip32xx: pass BLK_MQ_F_NO_SCHED"
  2017-04-28 14:31 [PATCH v2a 0/6]: Fixup mtip32xx for scheduling Jens Axboe
                   ` (4 preceding siblings ...)
  2017-04-28 14:31 ` [PATCH 5/6] mtip32xx: convert internal command issue to block IO path Jens Axboe
@ 2017-04-28 14:31 ` Jens Axboe
  2017-04-28 14:49   ` Christoph Hellwig
  5 siblings, 1 reply; 19+ messages in thread
From: Jens Axboe @ 2017-04-28 14:31 UTC (permalink / raw)
  To: linux-block; +Cc: ming.lei, hch, Jens Axboe

This reverts commit 4981d04dd8f1ab19e2cce008da556d7f099b6e68.

The driver has been converted to using the proper infrastructure
for issuing internal commands. This means it's now safe to use with
the scheduling infrastruture, so we can now revert the change
that turned off scheduling for mtip32xx.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 90f14f799527..15b9fcc2b89c 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3961,7 +3961,7 @@ static int mtip_block_initialize(struct driver_data *dd)
 	dd->tags.reserved_tags = 1;
 	dd->tags.cmd_size = sizeof(struct mtip_cmd);
 	dd->tags.numa_node = dd->numa_node;
-	dd->tags.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_NO_SCHED;
+	dd->tags.flags = BLK_MQ_F_SHOULD_MERGE;
 	dd->tags.driver_data = dd;
 	dd->tags.timeout = MTIP_NCQ_CMD_TIMEOUT_MS;
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/6] mtip32xx: get rid of 'atomic' argument to mtip_exec_internal_command()
  2017-04-28 14:31 ` [PATCH 1/6] mtip32xx: get rid of 'atomic' argument to mtip_exec_internal_command() Jens Axboe
@ 2017-04-28 14:38   ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2017-04-28 14:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, ming.lei, hch

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/6] mtip32xx: kill atomic argument to mtip_quiesce_io()
  2017-04-28 14:31 ` [PATCH 2/6] mtip32xx: kill atomic argument to mtip_quiesce_io() Jens Axboe
@ 2017-04-28 14:38   ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2017-04-28 14:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, ming.lei, hch

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 3/6] mtip32xx: abstract out "are any commands active" helper
  2017-04-28 14:31 ` [PATCH 3/6] mtip32xx: abstract out "are any commands active" helper Jens Axboe
@ 2017-04-28 14:39   ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2017-04-28 14:39 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, ming.lei, hch

Looks good,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests
  2017-04-28 14:31 ` [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests Jens Axboe
@ 2017-04-28 14:41   ` Christoph Hellwig
  2017-04-28 14:45     ` Jens Axboe
  0 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2017-04-28 14:41 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, ming.lei, hch

On Fri, Apr 28, 2017 at 08:31:49AM -0600, Jens Axboe wrote:
> Instead of bypassing the scheduler for insertion of reserved requests,
> we ensure that the request is marked as RQF_RESERVED so they driver
> knows where it came from.
> 
> Usually we just use the tag to know if it's reserved or not,
> but that only works when the request has a driver tag assigned.
> Using RQF_RESERVED can be done independently of whether or not
> scheduling is used.

Do we really need a flag for this?  If anyone cares if a request
is internal it can just check ->tag / ->internal_tag is smaller
than ->nr_reserved_tags using a little helper.

I'd really like to keep the number of flags down as much as possible.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests
  2017-04-28 14:41   ` Christoph Hellwig
@ 2017-04-28 14:45     ` Jens Axboe
  0 siblings, 0 replies; 19+ messages in thread
From: Jens Axboe @ 2017-04-28 14:45 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-block, ming.lei

On 04/28/2017 08:41 AM, Christoph Hellwig wrote:
> On Fri, Apr 28, 2017 at 08:31:49AM -0600, Jens Axboe wrote:
>> Instead of bypassing the scheduler for insertion of reserved requests,
>> we ensure that the request is marked as RQF_RESERVED so they driver
>> knows where it came from.
>>
>> Usually we just use the tag to know if it's reserved or not,
>> but that only works when the request has a driver tag assigned.
>> Using RQF_RESERVED can be done independently of whether or not
>> scheduling is used.
> 
> Do we really need a flag for this?  If anyone cares if a request
> is internal it can just check ->tag / ->internal_tag is smaller
> than ->nr_reserved_tags using a little helper.
> 
> I'd really like to keep the number of flags down as much as possible.

Ming had the same comment. Once we enter ->queue_rq() and later we don't
need the flag, since ->tag has been assigned and the regular helper
works fine. We can kill it. I forgot about this in the previous reply,
but it was meant to pass the fact that this was a reserved request to
get_driver_tag(), but it doesn't need that, since we already use the
regular helper against ->internal_tag.

I'll drop this patch.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/6] mtip32xx: convert internal command issue to block IO path
  2017-04-28 14:31 ` [PATCH 5/6] mtip32xx: convert internal command issue to block IO path Jens Axboe
@ 2017-04-28 14:49   ` Christoph Hellwig
  2017-04-28 16:43     ` Jens Axboe
  0 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2017-04-28 14:49 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, ming.lei, hch

> @@ -1088,6 +1088,13 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
>  	return -EFAULT;
>  }
>  
> +struct mtip_int_cmd {
> +	int fis_len;
> +	dma_addr_t buffer;
> +	int buf_len;
> +	u32 opts;
> +};

I know passing the dma_addr is probably the easier conversion for now,
but using blk_rq_map_kern would be the cleaner way going forward.

> +	/* insert request and run queue */
> +	blk_execute_rq_nowait(rq->q, NULL, rq, true, NULL);
> +
> +	wait_for_completion(&wait);

Why not blk_execute_rq?

> @@ -3770,6 +3803,9 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
>  
>  	mtip_init_cmd_header(rq);
>  
> +	if (rq->rq_flags & RQF_RESERVED)

And in fact I don't think we'd even need the helper I suggested before,
we can just check for REQ_OP_DRV_IN here.

But while we're at it - one oddity in mtip32xx is that it converts
discards to an internal command from ->queue_rq, so we end up using
two requests for it.  Just handling discards here would be a nice
improvement.  It would also easily allow the driver to support ranged
trims..

But I guess I'm simply to picky and we should just fix up the worst issues
first..

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 6/6] Revert "mtip32xx: pass BLK_MQ_F_NO_SCHED"
  2017-04-28 14:31 ` [PATCH 6/6] Revert "mtip32xx: pass BLK_MQ_F_NO_SCHED" Jens Axboe
@ 2017-04-28 14:49   ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2017-04-28 14:49 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, ming.lei, hch

Looks fine,

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 5/6] mtip32xx: convert internal command issue to block IO path
  2017-04-28 14:49   ` Christoph Hellwig
@ 2017-04-28 16:43     ` Jens Axboe
  0 siblings, 0 replies; 19+ messages in thread
From: Jens Axboe @ 2017-04-28 16:43 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-block, ming.lei

On 04/28/2017 08:49 AM, Christoph Hellwig wrote:
>> @@ -1088,6 +1088,13 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
>>  	return -EFAULT;
>>  }
>>  
>> +struct mtip_int_cmd {
>> +	int fis_len;
>> +	dma_addr_t buffer;
>> +	int buf_len;
>> +	u32 opts;
>> +};
> 
> I know passing the dma_addr is probably the easier conversion for now,
> but using blk_rq_map_kern would be the cleaner way going forward.
> 
>> +	/* insert request and run queue */
>> +	blk_execute_rq_nowait(rq->q, NULL, rq, true, NULL);
>> +
>> +	wait_for_completion(&wait);
> 
> Why not blk_execute_rq?

The internal requests don't go through the normal end_request part.
We can do that too of course, but I think we should keep it simple
to start to iron the issues out. Goes for using blk_rq_map_kern()
as well.

>> @@ -3770,6 +3803,9 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
>>  
>>  	mtip_init_cmd_header(rq);
>>  
>> +	if (rq->rq_flags & RQF_RESERVED)
> 
> And in fact I don't think we'd even need the helper I suggested before,
> we can just check for REQ_OP_DRV_IN here.

True, we can just use blk_rq_is_passthrough here, I'll do that.

> But while we're at it - one oddity in mtip32xx is that it converts
> discards to an internal command from ->queue_rq, so we end up using
> two requests for it.  Just handling discards here would be a nice
> improvement.  It would also easily allow the driver to support ranged
> trims..
> 
> But I guess I'm simply to picky and we should just fix up the worst issues
> first..

All of these are good suggestions, but yes, I think we should do the
worst issues first, so we can kill the NO_SCHED flag.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests
  2017-04-28 14:01 [PATCH v2 0/6] Fixup mtip32xx for scheduling Jens Axboe
@ 2017-04-28 14:01 ` Jens Axboe
  0 siblings, 0 replies; 19+ messages in thread
From: Jens Axboe @ 2017-04-28 14:01 UTC (permalink / raw)
  To: linux-block; +Cc: ming.lei, hch, Jens Axboe

Instead of bypassing the scheduler for insertion of reserved requests,
we ensure that the request is marked as RQF_RESERVED so they driver
knows where it came from.

Usually we just use the tag to know if it's reserved or not,
but that only works when the request has a driver tag assigned.
Using RQF_RESERVED can be done independently of whether or not
scheduling is used.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c   | 8 +++-----
 block/blk-mq.c         | 3 +++
 include/linux/blkdev.h | 2 ++
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 8b361e192e8a..27c67465f856 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -82,11 +82,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 	if (likely(!data->hctx))
 		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 
-	/*
-	 * For a reserved tag, allocate a normal request since we might
-	 * have driver dependencies on the value of the internal tag.
-	 */
-	if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) {
+	if (e) {
 		data->flags |= BLK_MQ_REQ_INTERNAL;
 
 		/*
@@ -104,6 +100,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 	}
 
 	if (rq) {
+		if (data->flags & BLK_MQ_REQ_RESERVED)
+			rq->rq_flags |= RQF_RESERVED;
 		if (!op_is_flush(op)) {
 			rq->elv.icq = NULL;
 			if (e && e->type->icq_cache)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b75ef2392db7..0168b27469cb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -268,6 +268,9 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
 			data->hctx->tags->rqs[rq->tag] = rq;
 		}
 
+		if (data->flags & BLK_MQ_REQ_RESERVED)
+			rq->rq_flags |= RQF_RESERVED;
+
 		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
 		return rq;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ba3884f26288..c246de5861dc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -120,6 +120,8 @@ typedef __u32 __bitwise req_flags_t;
 /* Look at ->special_vec for the actual data payload instead of the
    bio chain. */
 #define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
+/* Request came from the reserved tags/pool */
+#define RQF_RESERVED		((__force req_flags_t)(1 << 19))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests
  2017-04-28  4:04   ` Ming Lei
@ 2017-04-28  4:13     ` Jens Axboe
  0 siblings, 0 replies; 19+ messages in thread
From: Jens Axboe @ 2017-04-28  4:13 UTC (permalink / raw)
  To: Ming Lei; +Cc: linux-block, hch

On 04/27/2017 10:04 PM, Ming Lei wrote:
> On Thu, Apr 27, 2017 at 04:51:32PM -0600, Jens Axboe wrote:
>> Instead of bypassing the scheduler for insertion of reserved requests,
>> we ensure that the request is marked as RQF_RESERVED so they driver
>> knows where it came from.
>>
>> Usually we just use the tag to know if it's reserved or not,
>> but that only works when the request has a driver tag assigned.
>> Using RQF_RESERVED can be done independently of whether or not
>> scheduling is used.
>>
>> Signed-off-by: Jens Axboe <axboe@fb.com>
>> ---
>>  block/blk-mq-sched.c   | 8 +++-----
>>  block/blk-mq.c         | 3 +++
>>  include/linux/blkdev.h | 2 ++
>>  3 files changed, 8 insertions(+), 5 deletions(-)
>>
>> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
>> index 8b361e192e8a..27c67465f856 100644
>> --- a/block/blk-mq-sched.c
>> +++ b/block/blk-mq-sched.c
>> @@ -82,11 +82,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
>>  	if (likely(!data->hctx))
>>  		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
>>  
>> -	/*
>> -	 * For a reserved tag, allocate a normal request since we might
>> -	 * have driver dependencies on the value of the internal tag.
>> -	 */
>> -	if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) {
>> +	if (e) {
>>  		data->flags |= BLK_MQ_REQ_INTERNAL;
>>  
>>  		/*
>> @@ -104,6 +100,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
>>  	}
>>  
>>  	if (rq) {
>> +		if (data->flags & BLK_MQ_REQ_RESERVED)
>> +			rq->rq_flags |= RQF_RESERVED;
> 
> I think this flag may not be needed, becasue driver can
> decide if one rq is from reversed pool just by the tag, for example
> of mtip32xx, it can be done easily by checking if rq->tag is zero.
> 
> So I suggest to not introduce this flag until it is necessary.

But that only works after get_driver_tag() has been run, which is why
I added the flag. That may or may not be a big deal, depending on
what path is called before the request is sent off to be executed.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests
  2017-04-27 22:51 ` [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests Jens Axboe
@ 2017-04-28  4:04   ` Ming Lei
  2017-04-28  4:13     ` Jens Axboe
  0 siblings, 1 reply; 19+ messages in thread
From: Ming Lei @ 2017-04-28  4:04 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, hch

On Thu, Apr 27, 2017 at 04:51:32PM -0600, Jens Axboe wrote:
> Instead of bypassing the scheduler for insertion of reserved requests,
> we ensure that the request is marked as RQF_RESERVED so they driver
> knows where it came from.
> 
> Usually we just use the tag to know if it's reserved or not,
> but that only works when the request has a driver tag assigned.
> Using RQF_RESERVED can be done independently of whether or not
> scheduling is used.
> 
> Signed-off-by: Jens Axboe <axboe@fb.com>
> ---
>  block/blk-mq-sched.c   | 8 +++-----
>  block/blk-mq.c         | 3 +++
>  include/linux/blkdev.h | 2 ++
>  3 files changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index 8b361e192e8a..27c67465f856 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -82,11 +82,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
>  	if (likely(!data->hctx))
>  		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
>  
> -	/*
> -	 * For a reserved tag, allocate a normal request since we might
> -	 * have driver dependencies on the value of the internal tag.
> -	 */
> -	if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) {
> +	if (e) {
>  		data->flags |= BLK_MQ_REQ_INTERNAL;
>  
>  		/*
> @@ -104,6 +100,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
>  	}
>  
>  	if (rq) {
> +		if (data->flags & BLK_MQ_REQ_RESERVED)
> +			rq->rq_flags |= RQF_RESERVED;

I think this flag may not be needed, becasue driver can
decide if one rq is from reversed pool just by the tag, for example
of mtip32xx, it can be done easily by checking if rq->tag is zero.

So I suggest to not introduce this flag until it is necessary.

Thanks,
Ming

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests
  2017-04-27 22:51 [PATCH 0/6] Fixup mtip32xx for scheduling Jens Axboe
@ 2017-04-27 22:51 ` Jens Axboe
  2017-04-28  4:04   ` Ming Lei
  0 siblings, 1 reply; 19+ messages in thread
From: Jens Axboe @ 2017-04-27 22:51 UTC (permalink / raw)
  To: linux-block; +Cc: ming.lei, hch, Jens Axboe

Instead of bypassing the scheduler for insertion of reserved requests,
we ensure that the request is marked as RQF_RESERVED so they driver
knows where it came from.

Usually we just use the tag to know if it's reserved or not,
but that only works when the request has a driver tag assigned.
Using RQF_RESERVED can be done independently of whether or not
scheduling is used.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c   | 8 +++-----
 block/blk-mq.c         | 3 +++
 include/linux/blkdev.h | 2 ++
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 8b361e192e8a..27c67465f856 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -82,11 +82,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 	if (likely(!data->hctx))
 		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 
-	/*
-	 * For a reserved tag, allocate a normal request since we might
-	 * have driver dependencies on the value of the internal tag.
-	 */
-	if (e && !(data->flags & BLK_MQ_REQ_RESERVED)) {
+	if (e) {
 		data->flags |= BLK_MQ_REQ_INTERNAL;
 
 		/*
@@ -104,6 +100,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 	}
 
 	if (rq) {
+		if (data->flags & BLK_MQ_REQ_RESERVED)
+			rq->rq_flags |= RQF_RESERVED;
 		if (!op_is_flush(op)) {
 			rq->elv.icq = NULL;
 			if (e && e->type->icq_cache)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b75ef2392db7..0168b27469cb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -268,6 +268,9 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
 			data->hctx->tags->rqs[rq->tag] = rq;
 		}
 
+		if (data->flags & BLK_MQ_REQ_RESERVED)
+			rq->rq_flags |= RQF_RESERVED;
+
 		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
 		return rq;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ba3884f26288..c246de5861dc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -120,6 +120,8 @@ typedef __u32 __bitwise req_flags_t;
 /* Look at ->special_vec for the actual data payload instead of the
    bio chain. */
 #define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
+/* Request came from the reserved tags/pool */
+#define RQF_RESERVED		((__force req_flags_t)(1 << 19))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2017-04-28 16:43 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-28 14:31 [PATCH v2a 0/6]: Fixup mtip32xx for scheduling Jens Axboe
2017-04-28 14:31 ` [PATCH 1/6] mtip32xx: get rid of 'atomic' argument to mtip_exec_internal_command() Jens Axboe
2017-04-28 14:38   ` Christoph Hellwig
2017-04-28 14:31 ` [PATCH 2/6] mtip32xx: kill atomic argument to mtip_quiesce_io() Jens Axboe
2017-04-28 14:38   ` Christoph Hellwig
2017-04-28 14:31 ` [PATCH 3/6] mtip32xx: abstract out "are any commands active" helper Jens Axboe
2017-04-28 14:39   ` Christoph Hellwig
2017-04-28 14:31 ` [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests Jens Axboe
2017-04-28 14:41   ` Christoph Hellwig
2017-04-28 14:45     ` Jens Axboe
2017-04-28 14:31 ` [PATCH 5/6] mtip32xx: convert internal command issue to block IO path Jens Axboe
2017-04-28 14:49   ` Christoph Hellwig
2017-04-28 16:43     ` Jens Axboe
2017-04-28 14:31 ` [PATCH 6/6] Revert "mtip32xx: pass BLK_MQ_F_NO_SCHED" Jens Axboe
2017-04-28 14:49   ` Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2017-04-28 14:01 [PATCH v2 0/6] Fixup mtip32xx for scheduling Jens Axboe
2017-04-28 14:01 ` [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests Jens Axboe
2017-04-27 22:51 [PATCH 0/6] Fixup mtip32xx for scheduling Jens Axboe
2017-04-27 22:51 ` [PATCH 4/6] blk-mq: don't bypass scheduler for reserved requests Jens Axboe
2017-04-28  4:04   ` Ming Lei
2017-04-28  4:13     ` Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.