All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] scsi:tcmu: switch tcmu completion path to work queue context
@ 2022-05-11  2:03 Guixin Liu
  0 siblings, 0 replies; only message in thread
From: Guixin Liu @ 2022-05-11  2:03 UTC (permalink / raw)
  To: bostroesser, martin.petersen; +Cc: linux-scsi, target-devel

Currently, tcmu do completion in uio_write context, the userspace
will wait tcmu complete synchronously, switch completion path to
work queue context for better performance.

Use tcmu + file to evaluate performance,
fio job: fio -filename=/dev/sdb  -direct=1 -size=2G -name=1 -thread
-runtime=60 -time_based  -rw=randwrite -numjobs=16 -iodepth=16 -bs=128k

Without this patch:
	READ: bw=2775MiB/s (2910MB/s), 173MiB/s-174MiB/s
(181MB/s-183MB/s), io=163GiB (175GB), run=60001-60001msec

With this patch:
	READ: bw=3333MiB/s (3495MB/s), 208MiB/s-209MiB/s
(218MB/s-219MB/s), io=195GiB (210GB), run=60001-60001msec

Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
---
 drivers/target/target_core_user.c | 155 +++++++++++++++++++++-----------------
 1 file changed, 87 insertions(+), 68 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index fd7267b..a0766ee 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -168,6 +168,8 @@ struct tcmu_dev {
 	char dev_config[TCMU_CONFIG_LEN];
 
 	int nl_reply_supported;
+
+	struct work_struct complete_work;
 };
 
 #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev)
@@ -223,6 +225,7 @@ struct tcmu_tmr {
 
 static atomic_t global_page_count = ATOMIC_INIT(0);
 static struct delayed_work tcmu_unmap_work;
+struct workqueue_struct *tcmu_comp_wq;
 static int tcmu_global_max_pages = TCMU_GLOBAL_MAX_PAGES_DEF;
 
 static int tcmu_set_global_max_data_area(const char *str,
@@ -1596,6 +1599,76 @@ static void tcmu_detach_hba(struct se_hba *hba)
 	hba->hba_ptr = NULL;
 }
 
+static void run_qfull_queue(struct tcmu_dev *udev, bool fail)
+{
+	struct tcmu_cmd *tcmu_cmd, *tmp_cmd;
+	LIST_HEAD(cmds);
+	sense_reason_t scsi_ret;
+	int ret;
+
+	if (list_empty(&udev->qfull_queue))
+		return;
+
+	pr_debug("running %s's cmdr queue forcefail %d\n", udev->name, fail);
+
+	list_splice_init(&udev->qfull_queue, &cmds);
+
+	list_for_each_entry_safe(tcmu_cmd, tmp_cmd, &cmds, queue_entry) {
+		list_del_init(&tcmu_cmd->queue_entry);
+
+		pr_debug("removing cmd %p on dev %s from queue\n",
+			 tcmu_cmd, udev->name);
+
+		if (fail) {
+			/*
+			 * We were not able to even start the command, so
+			 * fail with busy to allow a retry in case runner
+			 * was only temporarily down. If the device is being
+			 * removed then LIO core will do the right thing and
+			 * fail the retry.
+			 */
+			tcmu_cmd->se_cmd->priv = NULL;
+			target_complete_cmd(tcmu_cmd->se_cmd, SAM_STAT_BUSY);
+			tcmu_free_cmd(tcmu_cmd);
+			continue;
+		}
+
+		ret = queue_cmd_ring(tcmu_cmd, &scsi_ret);
+		if (ret < 0) {
+			pr_debug("cmd %p on dev %s failed with %u\n",
+				 tcmu_cmd, udev->name, scsi_ret);
+			/*
+			 * Ignore scsi_ret for now. target_complete_cmd
+			 * drops it.
+			 */
+			tcmu_cmd->se_cmd->priv = NULL;
+			target_complete_cmd(tcmu_cmd->se_cmd,
+					    SAM_STAT_CHECK_CONDITION);
+			tcmu_free_cmd(tcmu_cmd);
+		} else if (ret > 0) {
+			pr_debug("ran out of space during cmdr queue run\n");
+			/*
+			 * cmd was requeued, so just put all cmds back in
+			 * the queue
+			 */
+			list_splice_tail(&cmds, &udev->qfull_queue);
+			break;
+		}
+	}
+
+	tcmu_set_next_deadline(&udev->qfull_queue, &udev->qfull_timer);
+}
+
+static void tcmu_complete_work(struct work_struct *work)
+{
+	struct tcmu_dev *udev = container_of(work, struct tcmu_dev, complete_work);
+
+	mutex_lock(&udev->cmdr_lock);
+	if (tcmu_handle_completions(udev))
+		run_qfull_queue(udev, false);
+	mutex_unlock(&udev->cmdr_lock);
+}
+
 static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 {
 	struct tcmu_dev *udev;
@@ -1634,6 +1707,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 
 	xa_init(&udev->data_pages);
 
+	INIT_WORK(&udev->complete_work, tcmu_complete_work);
 	return &udev->se_dev;
 }
 
@@ -1725,75 +1799,10 @@ static void tcmu_dev_kref_release(struct kref *kref)
 	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
 }
 
-static void run_qfull_queue(struct tcmu_dev *udev, bool fail)
-{
-	struct tcmu_cmd *tcmu_cmd, *tmp_cmd;
-	LIST_HEAD(cmds);
-	sense_reason_t scsi_ret;
-	int ret;
-
-	if (list_empty(&udev->qfull_queue))
-		return;
-
-	pr_debug("running %s's cmdr queue forcefail %d\n", udev->name, fail);
-
-	list_splice_init(&udev->qfull_queue, &cmds);
-
-	list_for_each_entry_safe(tcmu_cmd, tmp_cmd, &cmds, queue_entry) {
-		list_del_init(&tcmu_cmd->queue_entry);
-
-		pr_debug("removing cmd %p on dev %s from queue\n",
-			 tcmu_cmd, udev->name);
-
-		if (fail) {
-			/*
-			 * We were not able to even start the command, so
-			 * fail with busy to allow a retry in case runner
-			 * was only temporarily down. If the device is being
-			 * removed then LIO core will do the right thing and
-			 * fail the retry.
-			 */
-			tcmu_cmd->se_cmd->priv = NULL;
-			target_complete_cmd(tcmu_cmd->se_cmd, SAM_STAT_BUSY);
-			tcmu_free_cmd(tcmu_cmd);
-			continue;
-		}
-
-		ret = queue_cmd_ring(tcmu_cmd, &scsi_ret);
-		if (ret < 0) {
-			pr_debug("cmd %p on dev %s failed with %u\n",
-				 tcmu_cmd, udev->name, scsi_ret);
-			/*
-			 * Ignore scsi_ret for now. target_complete_cmd
-			 * drops it.
-			 */
-			tcmu_cmd->se_cmd->priv = NULL;
-			target_complete_cmd(tcmu_cmd->se_cmd,
-					    SAM_STAT_CHECK_CONDITION);
-			tcmu_free_cmd(tcmu_cmd);
-		} else if (ret > 0) {
-			pr_debug("ran out of space during cmdr queue run\n");
-			/*
-			 * cmd was requeued, so just put all cmds back in
-			 * the queue
-			 */
-			list_splice_tail(&cmds, &udev->qfull_queue);
-			break;
-		}
-	}
-
-	tcmu_set_next_deadline(&udev->qfull_queue, &udev->qfull_timer);
-}
-
 static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on)
 {
 	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
-
-	mutex_lock(&udev->cmdr_lock);
-	if (tcmu_handle_completions(udev))
-		run_qfull_queue(udev, false);
-	mutex_unlock(&udev->cmdr_lock);
-
+	queue_work(tcmu_comp_wq, &udev->complete_work);
 	return 0;
 }
 
@@ -3281,12 +3290,19 @@ static int __init tcmu_module_init(void)
 
 	INIT_DELAYED_WORK(&tcmu_unmap_work, tcmu_unmap_work_fn);
 
+	tcmu_comp_wq = alloc_workqueue("tcmu-comp-wq",
+			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+	if (!tcmu_comp_wq)
+		return -ENOMEM;
+
 	tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache",
 				sizeof(struct tcmu_cmd),
 				__alignof__(struct tcmu_cmd),
 				0, NULL);
-	if (!tcmu_cmd_cache)
-		return -ENOMEM;
+	if (!tcmu_cmd_cache) {
+		ret = -ENOMEM;
+		goto out_free_comp_wq;
+	}
 
 	tcmu_root_device = root_device_register("tcm_user");
 	if (IS_ERR(tcmu_root_device)) {
@@ -3335,6 +3351,8 @@ static int __init tcmu_module_init(void)
 	root_device_unregister(tcmu_root_device);
 out_free_cache:
 	kmem_cache_destroy(tcmu_cmd_cache);
+out_free_comp_wq:
+	destroy_workqueue(tcmu_comp_wq);
 
 	return ret;
 }
@@ -3347,6 +3365,7 @@ static void __exit tcmu_module_exit(void)
 	genl_unregister_family(&tcmu_genl_family);
 	root_device_unregister(tcmu_root_device);
 	kmem_cache_destroy(tcmu_cmd_cache);
+	destroy_workqueue(tcmu_comp_wq);
 }
 
 MODULE_DESCRIPTION("TCM USER subsystem plugin");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-11  2:03 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-11  2:03 [PATCH v2] scsi:tcmu: switch tcmu completion path to work queue context Guixin Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.