All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC v3 0/3] Simplify and optimize the UFS driver
@ 2019-11-06  1:06 Bart Van Assche
  2019-11-06  1:06 ` [PATCH RFC v3 1/3] ufs: Avoid busy-waiting by eliminating tag conflicts Bart Van Assche
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Bart Van Assche @ 2019-11-06  1:06 UTC (permalink / raw)
  To: Avri Altman, Bean Huo
  Cc: Martin K . Petersen, James E . J . Bottomley, linux-scsi,
	Christoph Hellwig, Bart Van Assche

Hello everyone,

This is version three of the patch series that simplifies and optimizes the
UFS driver. These patches are entirely untested. Any feedback is welcome.

Thanks,

Bart.

Changes compared to v2:
- Use a separate tag set for TMF tags.

Changes compared to v1:
- Use the block layer tag infrastructure for managing TMF tags.

Bart Van Assche (3):
  ufs: Avoid busy-waiting by eliminating tag conflicts
  ufs: Use blk_{get,put}_request() to allocate and free TMFs
  ufs: Simplify the clock scaling mechanism implementation

 drivers/scsi/ufs/ufshcd.c | 375 +++++++++++++++++---------------------
 drivers/scsi/ufs/ufshcd.h |  21 +--
 2 files changed, 169 insertions(+), 227 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH RFC v3 1/3] ufs: Avoid busy-waiting by eliminating tag conflicts
  2019-11-06  1:06 [PATCH RFC v3 0/3] Simplify and optimize the UFS driver Bart Van Assche
@ 2019-11-06  1:06 ` Bart Van Assche
  2019-11-07 17:45   ` [EXT] " Bean Huo (beanhuo)
  2019-11-06  1:06 ` [PATCH RFC v3 2/3] ufs: Use blk_{get,put}_request() to allocate and free TMFs Bart Van Assche
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Bart Van Assche @ 2019-11-06  1:06 UTC (permalink / raw)
  To: Avri Altman, Bean Huo
  Cc: Martin K . Petersen, James E . J . Bottomley, linux-scsi,
	Christoph Hellwig, Bart Van Assche, Yaniv Gardi, Stanley Chu,
	Tomas Winkler

Instead of tracking which tags are in use in the ufs_hba.lrb_in_use
bitmask, rely on the block layer tag allocation mechanism. This patch
removes the following busy-waiting loop if ufshcd_issue_devman_upiu_cmd()
and the block layer accidentally allocate the same tag for a SCSI request:
* ufshcd_queuecommand() returns SCSI_MLQUEUE_HOST_BUSY.
* The SCSI core requeues the SCSI command.

Cc: Yaniv Gardi <ygardi@codeaurora.org>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 121 +++++++++++++++-----------------------
 drivers/scsi/ufs/ufshcd.h |   6 +-
 2 files changed, 50 insertions(+), 77 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index c480a97943bd..4c8362a47577 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -497,8 +497,8 @@ static void ufshcd_print_tmrs(struct ufs_hba *hba, unsigned long bitmap)
 static void ufshcd_print_host_state(struct ufs_hba *hba)
 {
 	dev_err(hba->dev, "UFS Host state=%d\n", hba->ufshcd_state);
-	dev_err(hba->dev, "lrb in use=0x%lx, outstanding reqs=0x%lx tasks=0x%lx\n",
-		hba->lrb_in_use, hba->outstanding_reqs, hba->outstanding_tasks);
+	dev_err(hba->dev, "outstanding reqs=0x%lx tasks=0x%lx\n",
+		hba->outstanding_reqs, hba->outstanding_tasks);
 	dev_err(hba->dev, "saved_err=0x%x, saved_uic_err=0x%x\n",
 		hba->saved_err, hba->saved_uic_err);
 	dev_err(hba->dev, "Device power mode=%d, UIC link state=%d\n",
@@ -1596,6 +1596,25 @@ int ufshcd_hold(struct ufs_hba *hba, bool async)
 }
 EXPORT_SYMBOL_GPL(ufshcd_hold);
 
+static bool ufshcd_is_busy(struct request *req, void *priv, bool reserved)
+{
+	int *busy = priv;
+
+	WARN_ON_ONCE(reserved);
+	(*busy)++;
+	return false;
+}
+
+/* Whether or not any tag is in use by a request that is in progress. */
+static bool ufshcd_any_tag_in_use(struct ufs_hba *hba)
+{
+	struct request_queue *q = hba->cmd_queue;
+	int busy = 0;
+
+	blk_mq_tagset_busy_iter(q->tag_set, ufshcd_is_busy, &busy);
+	return busy;
+}
+
 static void ufshcd_gate_work(struct work_struct *work)
 {
 	struct ufs_hba *hba = container_of(work, struct ufs_hba,
@@ -1619,7 +1638,7 @@ static void ufshcd_gate_work(struct work_struct *work)
 
 	if (hba->clk_gating.active_reqs
 		|| hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL
-		|| hba->lrb_in_use || hba->outstanding_tasks
+		|| ufshcd_any_tag_in_use(hba) || hba->outstanding_tasks
 		|| hba->active_uic_cmd || hba->uic_async_done)
 		goto rel_lock;
 
@@ -1673,7 +1692,7 @@ static void __ufshcd_release(struct ufs_hba *hba)
 
 	if (hba->clk_gating.active_reqs || hba->clk_gating.is_suspended
 		|| hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL
-		|| hba->lrb_in_use || hba->outstanding_tasks
+		|| ufshcd_any_tag_in_use(hba) || hba->outstanding_tasks
 		|| hba->active_uic_cmd || hba->uic_async_done
 		|| ufshcd_eh_in_progress(hba))
 		return;
@@ -2443,22 +2462,9 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 
 	hba->req_abort_count = 0;
 
-	/* acquire the tag to make sure device cmds don't use it */
-	if (test_and_set_bit_lock(tag, &hba->lrb_in_use)) {
-		/*
-		 * Dev manage command in progress, requeue the command.
-		 * Requeuing the command helps in cases where the request *may*
-		 * find different tag instead of waiting for dev manage command
-		 * completion.
-		 */
-		err = SCSI_MLQUEUE_HOST_BUSY;
-		goto out;
-	}
-
 	err = ufshcd_hold(hba, true);
 	if (err) {
 		err = SCSI_MLQUEUE_HOST_BUSY;
-		clear_bit_unlock(tag, &hba->lrb_in_use);
 		goto out;
 	}
 	WARN_ON(hba->clk_gating.state != CLKS_ON);
@@ -2479,7 +2485,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	err = ufshcd_map_sg(hba, lrbp);
 	if (err) {
 		lrbp->cmd = NULL;
-		clear_bit_unlock(tag, &hba->lrb_in_use);
 		goto out;
 	}
 	/* Make sure descriptors are ready before ringing the doorbell */
@@ -2625,44 +2630,6 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
 	return err;
 }
 
-/**
- * ufshcd_get_dev_cmd_tag - Get device management command tag
- * @hba: per-adapter instance
- * @tag_out: pointer to variable with available slot value
- *
- * Get a free slot and lock it until device management command
- * completes.
- *
- * Returns false if free slot is unavailable for locking, else
- * return true with tag value in @tag.
- */
-static bool ufshcd_get_dev_cmd_tag(struct ufs_hba *hba, int *tag_out)
-{
-	int tag;
-	bool ret = false;
-	unsigned long tmp;
-
-	if (!tag_out)
-		goto out;
-
-	do {
-		tmp = ~hba->lrb_in_use;
-		tag = find_last_bit(&tmp, hba->nutrs);
-		if (tag >= hba->nutrs)
-			goto out;
-	} while (test_and_set_bit_lock(tag, &hba->lrb_in_use));
-
-	*tag_out = tag;
-	ret = true;
-out:
-	return ret;
-}
-
-static inline void ufshcd_put_dev_cmd_tag(struct ufs_hba *hba, int tag)
-{
-	clear_bit_unlock(tag, &hba->lrb_in_use);
-}
-
 /**
  * ufshcd_exec_dev_cmd - API for sending device management requests
  * @hba: UFS hba
@@ -2675,6 +2642,8 @@ static inline void ufshcd_put_dev_cmd_tag(struct ufs_hba *hba, int tag)
 static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 		enum dev_cmd_type cmd_type, int timeout)
 {
+	struct request_queue *q = hba->cmd_queue;
+	struct request *req;
 	struct ufshcd_lrb *lrbp;
 	int err;
 	int tag;
@@ -2688,7 +2657,11 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	 * Even though we use wait_event() which sleeps indefinitely,
 	 * the maximum wait time is bounded by SCSI request timeout.
 	 */
-	wait_event(hba->dev_cmd.tag_wq, ufshcd_get_dev_cmd_tag(hba, &tag));
+	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	tag = req->tag;
+	WARN_ON_ONCE(!ufshcd_valid_tag(hba, tag));
 
 	init_completion(&wait);
 	lrbp = &hba->lrb[tag];
@@ -2712,8 +2685,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 			err ? "query_complete_err" : "query_complete");
 
 out_put_tag:
-	ufshcd_put_dev_cmd_tag(hba, tag);
-	wake_up(&hba->dev_cmd.tag_wq);
+	blk_put_request(req);
 	up_read(&hba->clk_scaling_lock);
 	return err;
 }
@@ -4832,7 +4804,6 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 			cmd->result = result;
 			/* Mark completed command as NULL in LRB */
 			lrbp->cmd = NULL;
-			clear_bit_unlock(index, &hba->lrb_in_use);
 			/* Do not touch lrbp after scsi done */
 			cmd->scsi_done(cmd);
 			__ufshcd_release(hba);
@@ -4854,9 +4825,6 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 	hba->outstanding_reqs ^= completed_reqs;
 
 	ufshcd_clk_scaling_update_busy(hba);
-
-	/* we might have free'd some tags above */
-	wake_up(&hba->dev_cmd.tag_wq);
 }
 
 /**
@@ -5785,6 +5753,8 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 					enum dev_cmd_type cmd_type,
 					enum query_opcode desc_op)
 {
+	struct request_queue *q = hba->cmd_queue;
+	struct request *req;
 	struct ufshcd_lrb *lrbp;
 	int err = 0;
 	int tag;
@@ -5794,7 +5764,11 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 
 	down_read(&hba->clk_scaling_lock);
 
-	wait_event(hba->dev_cmd.tag_wq, ufshcd_get_dev_cmd_tag(hba, &tag));
+	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	tag = req->tag;
+	WARN_ON_ONCE(!ufshcd_valid_tag(hba, tag));
 
 	init_completion(&wait);
 	lrbp = &hba->lrb[tag];
@@ -5868,8 +5842,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 		}
 	}
 
-	ufshcd_put_dev_cmd_tag(hba, tag);
-	wake_up(&hba->dev_cmd.tag_wq);
+	blk_put_request(req);
 	up_read(&hba->clk_scaling_lock);
 	return err;
 }
@@ -6164,9 +6137,6 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 	hba->lrb[tag].cmd = NULL;
 	spin_unlock_irqrestore(host->host_lock, flags);
 
-	clear_bit_unlock(tag, &hba->lrb_in_use);
-	wake_up(&hba->dev_cmd.tag_wq);
-
 out:
 	if (!err) {
 		err = SUCCESS;
@@ -8170,6 +8140,7 @@ void ufshcd_remove(struct ufs_hba *hba)
 {
 	ufs_bsg_remove(hba);
 	ufs_sysfs_remove_nodes(hba->dev);
+	blk_cleanup_queue(hba->cmd_queue);
 	scsi_remove_host(hba->host);
 	/* disable interrupts */
 	ufshcd_disable_intr(hba, hba->intr_mask);
@@ -8333,9 +8304,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 
 	init_rwsem(&hba->clk_scaling_lock);
 
-	/* Initialize device management tag acquire wait queue */
-	init_waitqueue_head(&hba->dev_cmd.tag_wq);
-
 	ufshcd_init_clk_gating(hba);
 
 	ufshcd_init_clk_scaling(hba);
@@ -8369,6 +8337,11 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 		goto exit_gating;
 	}
 
+	err = -ENOMEM;
+	hba->cmd_queue = blk_mq_init_queue(&hba->host->tag_set);
+	if (!hba->cmd_queue)
+		goto out_remove_scsi_host;
+
 	/* Reset the attached device */
 	ufshcd_vops_device_reset(hba);
 
@@ -8378,7 +8351,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 		dev_err(hba->dev, "Host controller enable failed\n");
 		ufshcd_print_host_regs(hba);
 		ufshcd_print_host_state(hba);
-		goto out_remove_scsi_host;
+		goto free_cmd_queue;
 	}
 
 	/*
@@ -8415,6 +8388,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 
 	return 0;
 
+free_cmd_queue:
+	blk_cleanup_queue(hba->cmd_queue);
 out_remove_scsi_host:
 	scsi_remove_host(hba->host);
 exit_gating:
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index e3593cce23c1..9707194fa4b7 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -212,13 +212,11 @@ struct ufs_query {
  * @type: device management command type - Query, NOP OUT
  * @lock: lock to allow one command at a time
  * @complete: internal commands completion
- * @tag_wq: wait queue until free command slot is available
  */
 struct ufs_dev_cmd {
 	enum dev_cmd_type type;
 	struct mutex lock;
 	struct completion *complete;
-	wait_queue_head_t tag_wq;
 	struct ufs_query query;
 };
 
@@ -480,7 +478,7 @@ struct ufs_stats {
  * @host: Scsi_Host instance of the driver
  * @dev: device handle
  * @lrb: local reference block
- * @lrb_in_use: lrb in use
+ * @cmd_queue: Used to allocate command tags from hba->host->tag_set.
  * @outstanding_tasks: Bits representing outstanding task requests
  * @outstanding_reqs: Bits representing outstanding transfer requests
  * @capabilities: UFS Controller Capabilities
@@ -538,6 +536,7 @@ struct ufs_hba {
 
 	struct Scsi_Host *host;
 	struct device *dev;
+	struct request_queue *cmd_queue;
 	/*
 	 * This field is to keep a reference to "scsi_device" corresponding to
 	 * "UFS device" W-LU.
@@ -558,7 +557,6 @@ struct ufs_hba {
 	u32 ahit;
 
 	struct ufshcd_lrb *lrb;
-	unsigned long lrb_in_use;
 
 	unsigned long outstanding_tasks;
 	unsigned long outstanding_reqs;
-- 
2.24.0.rc1.363.gb1bccd3e3d-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC v3 2/3] ufs: Use blk_{get,put}_request() to allocate and free TMFs
  2019-11-06  1:06 [PATCH RFC v3 0/3] Simplify and optimize the UFS driver Bart Van Assche
  2019-11-06  1:06 ` [PATCH RFC v3 1/3] ufs: Avoid busy-waiting by eliminating tag conflicts Bart Van Assche
@ 2019-11-06  1:06 ` Bart Van Assche
  2019-11-07 17:45   ` [EXT] " Bean Huo (beanhuo)
  2019-11-06  1:06 ` [PATCH RFC v3 3/3] ufs: Simplify the clock scaling mechanism implementation Bart Van Assche
  2019-11-11 13:46 ` [PATCH RFC v3 0/3] Simplify and optimize the UFS driver Avri Altman
  3 siblings, 1 reply; 8+ messages in thread
From: Bart Van Assche @ 2019-11-06  1:06 UTC (permalink / raw)
  To: Avri Altman, Bean Huo
  Cc: Martin K . Petersen, James E . J . Bottomley, linux-scsi,
	Christoph Hellwig, Bart Van Assche, Yaniv Gardi, Stanley Chu,
	Tomas Winkler

Manage TMF tags with blk_{get,put}_request() instead of
ufshcd_get_tm_free_slot() / ufshcd_put_tm_slot(). Store a per-request
completion pointer in request.end_io_data instead of using a waitqueue
to report TMF completion.

Cc: Yaniv Gardi <ygardi@codeaurora.org>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 122 +++++++++++++++++++++++---------------
 drivers/scsi/ufs/ufshcd.h |  12 ++--
 2 files changed, 77 insertions(+), 57 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 4c8362a47577..2c6300fd5c75 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -645,40 +645,6 @@ static inline int ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp)
 	return le32_to_cpu(lrbp->utr_descriptor_ptr->header.dword_2) & MASK_OCS;
 }
 
-/**
- * ufshcd_get_tm_free_slot - get a free slot for task management request
- * @hba: per adapter instance
- * @free_slot: pointer to variable with available slot value
- *
- * Get a free tag and lock it until ufshcd_put_tm_slot() is called.
- * Returns 0 if free slot is not available, else return 1 with tag value
- * in @free_slot.
- */
-static bool ufshcd_get_tm_free_slot(struct ufs_hba *hba, int *free_slot)
-{
-	int tag;
-	bool ret = false;
-
-	if (!free_slot)
-		goto out;
-
-	do {
-		tag = find_first_zero_bit(&hba->tm_slots_in_use, hba->nutmrs);
-		if (tag >= hba->nutmrs)
-			goto out;
-	} while (test_and_set_bit_lock(tag, &hba->tm_slots_in_use));
-
-	*free_slot = tag;
-	ret = true;
-out:
-	return ret;
-}
-
-static inline void ufshcd_put_tm_slot(struct ufs_hba *hba, int slot)
-{
-	clear_bit_unlock(slot, &hba->tm_slots_in_use);
-}
-
 /**
  * ufshcd_utrl_clear - Clear a bit in UTRLCLR register
  * @hba: per adapter instance
@@ -5515,17 +5481,38 @@ static void ufshcd_check_errors(struct ufs_hba *hba)
 	 */
 }
 
+struct ctm_info {
+	struct ufs_hba *hba;
+	unsigned long pending;
+};
+
+static bool ufshcd_compl_tm(struct request *req, void *priv, bool reserved)
+{
+	const struct ctm_info *const ci = priv;
+	struct completion *c;
+
+	WARN_ON_ONCE(reserved);
+	if (test_bit(req->tag, &ci->pending))
+		return true;
+	c = req->end_io_data;
+	if (c)
+		complete(c);
+	return true;
+}
+
 /**
  * ufshcd_tmc_handler - handle task management function completion
  * @hba: per adapter instance
  */
 static void ufshcd_tmc_handler(struct ufs_hba *hba)
 {
-	u32 tm_doorbell;
+	struct request_queue *q = hba->tmf_queue;
+	struct ctm_info ci = {
+		.hba	 = hba,
+		.pending = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL),
+	};
 
-	tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
-	hba->tm_condition = tm_doorbell ^ hba->outstanding_tasks;
-	wake_up(&hba->tm_wq);
+	blk_mq_tagset_busy_iter(q->tag_set, ufshcd_compl_tm, &ci);
 }
 
 /**
@@ -5618,7 +5605,10 @@ static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag)
 static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 		struct utp_task_req_desc *treq, u8 tm_function)
 {
+	struct request_queue *q = hba->tmf_queue;
 	struct Scsi_Host *host = hba->host;
+	DECLARE_COMPLETION_ONSTACK(wait);
+	struct request *req;
 	unsigned long flags;
 	int free_slot, task_tag, err;
 
@@ -5627,7 +5617,10 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	 * Even though we use wait_event() which sleeps indefinitely,
 	 * the maximum wait time is bounded by %TM_CMD_TIMEOUT.
 	 */
-	wait_event(hba->tm_tag_wq, ufshcd_get_tm_free_slot(hba, &free_slot));
+	req = blk_get_request(q, REQ_OP_DRV_OUT, BLK_MQ_REQ_RESERVED);
+	req->end_io_data = &wait;
+	free_slot = req->tag;
+	WARN_ON_ONCE(free_slot < 0 || free_slot >= hba->nutmrs);
 	ufshcd_hold(hba, false);
 
 	spin_lock_irqsave(host->host_lock, flags);
@@ -5653,10 +5646,14 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_send");
 
 	/* wait until the task management command is completed */
-	err = wait_event_timeout(hba->tm_wq,
-			test_bit(free_slot, &hba->tm_condition),
+	err = wait_for_completion_io_timeout(&wait,
 			msecs_to_jiffies(TM_CMD_TIMEOUT));
 	if (!err) {
+		/*
+		 * Make sure that ufshcd_compl_tm() does not trigger a
+		 * use-after-free.
+		 */
+		req->end_io_data = NULL;
 		ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_complete_err");
 		dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n",
 				__func__, tm_function);
@@ -5675,9 +5672,7 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	__clear_bit(free_slot, &hba->outstanding_tasks);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
-	clear_bit(free_slot, &hba->tm_condition);
-	ufshcd_put_tm_slot(hba, free_slot);
-	wake_up(&hba->tm_tag_wq);
+	blk_put_request(req);
 
 	ufshcd_release(hba);
 	return err;
@@ -8140,6 +8135,8 @@ void ufshcd_remove(struct ufs_hba *hba)
 {
 	ufs_bsg_remove(hba);
 	ufs_sysfs_remove_nodes(hba->dev);
+	blk_cleanup_queue(hba->tmf_queue);
+	blk_mq_free_tag_set(&hba->tmf_tag_set);
 	blk_cleanup_queue(hba->cmd_queue);
 	scsi_remove_host(hba->host);
 	/* disable interrupts */
@@ -8219,6 +8216,18 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle)
 }
 EXPORT_SYMBOL(ufshcd_alloc_host);
 
+/* This function exists because blk_mq_alloc_tag_set() requires this. */
+static blk_status_t ufshcd_queue_tmf(struct blk_mq_hw_ctx *hctx,
+				     const struct blk_mq_queue_data *qd)
+{
+	WARN_ON_ONCE(true);
+	return BLK_STS_NOTSUPP;
+}
+
+static const struct blk_mq_ops ufshcd_tmf_ops = {
+	.queue_rq = ufshcd_queue_tmf,
+};
+
 /**
  * ufshcd_init - Driver initialization routine
  * @hba: per-adapter instance
@@ -8288,10 +8297,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 
 	hba->max_pwr_info.is_valid = false;
 
-	/* Initailize wait queue for task management */
-	init_waitqueue_head(&hba->tm_wq);
-	init_waitqueue_head(&hba->tm_tag_wq);
-
 	/* Initialize work queues */
 	INIT_WORK(&hba->eh_work, ufshcd_err_handler);
 	INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);
@@ -8342,6 +8347,21 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	if (!hba->cmd_queue)
 		goto out_remove_scsi_host;
 
+	hba->tmf_tag_set = (struct blk_mq_tag_set) {
+		.nr_hw_queues	= 1,
+		.queue_depth	= hba->nutmrs,
+		.ops		= &ufshcd_tmf_ops,
+		.flags		= BLK_MQ_F_NO_SCHED,
+	};
+	err = blk_mq_alloc_tag_set(&hba->tmf_tag_set);
+	if (err < 0)
+		goto free_cmd_queue;
+	hba->tmf_queue = blk_mq_init_queue(&hba->tmf_tag_set);
+	if (IS_ERR(hba->tmf_queue)) {
+		err = PTR_ERR(hba->tmf_queue);
+		goto free_tmf_tag_set;
+	}
+
 	/* Reset the attached device */
 	ufshcd_vops_device_reset(hba);
 
@@ -8351,7 +8371,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 		dev_err(hba->dev, "Host controller enable failed\n");
 		ufshcd_print_host_regs(hba);
 		ufshcd_print_host_state(hba);
-		goto free_cmd_queue;
+		goto free_tmf_queue;
 	}
 
 	/*
@@ -8388,6 +8408,10 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 
 	return 0;
 
+free_tmf_queue:
+	blk_cleanup_queue(hba->tmf_queue);
+free_tmf_tag_set:
+	blk_mq_free_tag_set(&hba->tmf_tag_set);
 free_cmd_queue:
 	blk_cleanup_queue(hba->cmd_queue);
 out_remove_scsi_host:
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 9707194fa4b7..6e0b87e8f875 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -490,11 +490,9 @@ struct ufs_stats {
  * @irq: Irq number of the controller
  * @active_uic_cmd: handle of active UIC command
  * @uic_cmd_mutex: mutex for uic command
- * @tm_wq: wait queue for task management
- * @tm_tag_wq: wait queue for free task management slots
- * @tm_slots_in_use: bit map of task management request slots in use
+ * @tmf_tag_set: TMF tag set.
+ * @tmf_queue: Used to allocate TMF tags.
  * @pwr_done: completion for power mode change
- * @tm_condition: condition variable for task management
  * @ufshcd_state: UFSHCD states
  * @eh_flags: Error handling flags
  * @intr_mask: Interrupt Mask Bits
@@ -638,10 +636,8 @@ struct ufs_hba {
 	/* Device deviations from standard UFS device spec. */
 	unsigned int dev_quirks;
 
-	wait_queue_head_t tm_wq;
-	wait_queue_head_t tm_tag_wq;
-	unsigned long tm_condition;
-	unsigned long tm_slots_in_use;
+	struct blk_mq_tag_set tmf_tag_set;
+	struct request_queue *tmf_queue;
 
 	struct uic_command *active_uic_cmd;
 	struct mutex uic_cmd_mutex;
-- 
2.24.0.rc1.363.gb1bccd3e3d-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH RFC v3 3/3] ufs: Simplify the clock scaling mechanism implementation
  2019-11-06  1:06 [PATCH RFC v3 0/3] Simplify and optimize the UFS driver Bart Van Assche
  2019-11-06  1:06 ` [PATCH RFC v3 1/3] ufs: Avoid busy-waiting by eliminating tag conflicts Bart Van Assche
  2019-11-06  1:06 ` [PATCH RFC v3 2/3] ufs: Use blk_{get,put}_request() to allocate and free TMFs Bart Van Assche
@ 2019-11-06  1:06 ` Bart Van Assche
  2019-11-11 13:51   ` Avri Altman
  2019-11-11 13:46 ` [PATCH RFC v3 0/3] Simplify and optimize the UFS driver Avri Altman
  3 siblings, 1 reply; 8+ messages in thread
From: Bart Van Assche @ 2019-11-06  1:06 UTC (permalink / raw)
  To: Avri Altman, Bean Huo
  Cc: Martin K . Petersen, James E . J . Bottomley, linux-scsi,
	Christoph Hellwig, Bart Van Assche, Yaniv Gardi, Stanley Chu,
	Tomas Winkler

Scaling the clock is only safe while no commands are in progress. Use
blk_mq_{un,}freeze_queue() to block submission of new commands and to
wait for ongoing commands to complete.

Cc: Yaniv Gardi <ygardi@codeaurora.org>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 134 ++++++++++++--------------------------
 drivers/scsi/ufs/ufshcd.h |   3 -
 2 files changed, 43 insertions(+), 94 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 2c6300fd5c75..d1912ba5d2d9 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -292,14 +292,49 @@ static inline void ufshcd_disable_irq(struct ufs_hba *hba)
 
 static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba)
 {
-	if (atomic_dec_and_test(&hba->scsi_block_reqs_cnt))
-		scsi_unblock_requests(hba->host);
+	struct scsi_device *sdev;
+
+	blk_mq_unfreeze_queue(hba->tmf_queue);
+	blk_mq_unfreeze_queue(hba->cmd_queue);
+	shost_for_each_device(sdev, hba->host)
+		blk_mq_unfreeze_queue(sdev->request_queue);
 }
 
-static void ufshcd_scsi_block_requests(struct ufs_hba *hba)
+static int ufshcd_scsi_block_requests(struct ufs_hba *hba,
+				      unsigned long timeout)
 {
-	if (atomic_inc_return(&hba->scsi_block_reqs_cnt) == 1)
-		scsi_block_requests(hba->host);
+	struct scsi_device *sdev;
+	unsigned long deadline = jiffies + timeout;
+
+	if (timeout == ULONG_MAX) {
+		shost_for_each_device(sdev, hba->host)
+			blk_mq_freeze_queue(sdev->request_queue);
+		blk_mq_freeze_queue(hba->cmd_queue);
+		blk_mq_freeze_queue(hba->tmf_queue);
+		return 0;
+	}
+
+	shost_for_each_device(sdev, hba->host)
+		blk_freeze_queue_start(sdev->request_queue);
+	blk_freeze_queue_start(hba->cmd_queue);
+	blk_freeze_queue_start(hba->tmf_queue);
+	shost_for_each_device(sdev, hba->host) {
+		if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0) {
+			goto err;
+		}
+	}
+	if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0)
+		goto err;
+	if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
+				max_t(long, 0, deadline - jiffies)) <= 0)
+		goto err;
+	return 0;
+
+err:
+	ufshcd_scsi_unblock_requests(hba);
+	return -ETIMEDOUT;
 }
 
 static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned int tag,
@@ -971,65 +1006,6 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
 	return false;
 }
 
-static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
-					u64 wait_timeout_us)
-{
-	unsigned long flags;
-	int ret = 0;
-	u32 tm_doorbell;
-	u32 tr_doorbell;
-	bool timeout = false, do_last_check = false;
-	ktime_t start;
-
-	ufshcd_hold(hba, false);
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	/*
-	 * Wait for all the outstanding tasks/transfer requests.
-	 * Verify by checking the doorbell registers are clear.
-	 */
-	start = ktime_get();
-	do {
-		if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
-			ret = -EBUSY;
-			goto out;
-		}
-
-		tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
-		tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-		if (!tm_doorbell && !tr_doorbell) {
-			timeout = false;
-			break;
-		} else if (do_last_check) {
-			break;
-		}
-
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-		schedule();
-		if (ktime_to_us(ktime_sub(ktime_get(), start)) >
-		    wait_timeout_us) {
-			timeout = true;
-			/*
-			 * We might have scheduled out for long time so make
-			 * sure to check if doorbells are cleared by this time
-			 * or not.
-			 */
-			do_last_check = true;
-		}
-		spin_lock_irqsave(hba->host->host_lock, flags);
-	} while (tm_doorbell || tr_doorbell);
-
-	if (timeout) {
-		dev_err(hba->dev,
-			"%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
-			__func__, tm_doorbell, tr_doorbell);
-		ret = -EBUSY;
-	}
-out:
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	ufshcd_release(hba);
-	return ret;
-}
-
 /**
  * ufshcd_scale_gear - scale up/down UFS gear
  * @hba: per adapter instance
@@ -1079,26 +1055,15 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up)
 
 static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
 {
-	#define DOORBELL_CLR_TOUT_US		(1000 * 1000) /* 1 sec */
-	int ret = 0;
 	/*
 	 * make sure that there are no outstanding requests when
 	 * clock scaling is in progress
 	 */
-	ufshcd_scsi_block_requests(hba);
-	down_write(&hba->clk_scaling_lock);
-	if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
-		ret = -EBUSY;
-		up_write(&hba->clk_scaling_lock);
-		ufshcd_scsi_unblock_requests(hba);
-	}
-
-	return ret;
+	return ufshcd_scsi_block_requests(hba, HZ);
 }
 
 static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)
 {
-	up_write(&hba->clk_scaling_lock);
 	ufshcd_scsi_unblock_requests(hba);
 }
 
@@ -1528,7 +1493,7 @@ int ufshcd_hold(struct ufs_hba *hba, bool async)
 		 */
 		/* fallthrough */
 	case CLKS_OFF:
-		ufshcd_scsi_block_requests(hba);
+		ufshcd_scsi_block_requests(hba, ULONG_MAX);
 		hba->clk_gating.state = REQ_CLKS_ON;
 		trace_ufshcd_clk_gating(dev_name(hba->dev),
 					hba->clk_gating.state);
@@ -2395,9 +2360,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		BUG();
 	}
 
-	if (!down_read_trylock(&hba->clk_scaling_lock))
-		return SCSI_MLQUEUE_HOST_BUSY;
-
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	switch (hba->ufshcd_state) {
 	case UFSHCD_STATE_OPERATIONAL:
@@ -2462,7 +2424,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 out_unlock:
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 out:
-	up_read(&hba->clk_scaling_lock);
 	return err;
 }
 
@@ -2616,8 +2577,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	struct completion wait;
 	unsigned long flags;
 
-	down_read(&hba->clk_scaling_lock);
-
 	/*
 	 * Get free slot, sleep if slots are unavailable.
 	 * Even though we use wait_event() which sleeps indefinitely,
@@ -2652,7 +2611,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 
 out_put_tag:
 	blk_put_request(req);
-	up_read(&hba->clk_scaling_lock);
 	return err;
 }
 
@@ -5451,7 +5409,7 @@ static void ufshcd_check_errors(struct ufs_hba *hba)
 		/* handle fatal errors only when link is functional */
 		if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
 			/* block commands from scsi mid-layer */
-			ufshcd_scsi_block_requests(hba);
+			ufshcd_scsi_block_requests(hba, ULONG_MAX);
 
 			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED;
 
@@ -5757,8 +5715,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 	unsigned long flags;
 	u32 upiu_flags;
 
-	down_read(&hba->clk_scaling_lock);
-
 	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -5838,7 +5794,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 	}
 
 	blk_put_request(req);
-	up_read(&hba->clk_scaling_lock);
 	return err;
 }
 
@@ -8307,8 +8262,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	/* Initialize mutex for device management commands */
 	mutex_init(&hba->dev_cmd.lock);
 
-	init_rwsem(&hba->clk_scaling_lock);
-
 	ufshcd_init_clk_gating(hba);
 
 	ufshcd_init_clk_scaling(hba);
@@ -8394,7 +8347,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 
 	/* Hold auto suspend until async scan completes */
 	pm_runtime_get_sync(dev);
-	atomic_set(&hba->scsi_block_reqs_cnt, 0);
 	/*
 	 * We are assuming that device wasn't put in sleep/power-down
 	 * state exclusively during the boot stage before kernel.
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 6e0b87e8f875..cea71057fa90 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -517,7 +517,6 @@ struct ufs_stats {
  * @urgent_bkops_lvl: keeps track of urgent bkops level for device
  * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
  *  device is known or not.
- * @scsi_block_reqs_cnt: reference counting for scsi block requests
  */
 struct ufs_hba {
 	void __iomem *mmio_base;
@@ -721,9 +720,7 @@ struct ufs_hba {
 	enum bkops_status urgent_bkops_lvl;
 	bool is_urgent_bkops_lvl_checked;
 
-	struct rw_semaphore clk_scaling_lock;
 	struct ufs_desc_size desc_size;
-	atomic_t scsi_block_reqs_cnt;
 
 	struct device		bsg_dev;
 	struct request_queue	*bsg_queue;
-- 
2.24.0.rc1.363.gb1bccd3e3d-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* RE: [EXT] [PATCH RFC v3 1/3] ufs: Avoid busy-waiting by eliminating tag conflicts
  2019-11-06  1:06 ` [PATCH RFC v3 1/3] ufs: Avoid busy-waiting by eliminating tag conflicts Bart Van Assche
@ 2019-11-07 17:45   ` Bean Huo (beanhuo)
  0 siblings, 0 replies; 8+ messages in thread
From: Bean Huo (beanhuo) @ 2019-11-07 17:45 UTC (permalink / raw)
  To: Bart Van Assche, Avri Altman
  Cc: Martin K . Petersen, James E . J . Bottomley, linux-scsi,
	Christoph Hellwig, Yaniv Gardi, Stanley Chu, Tomas Winkler

Hi, Bart
I tested it on my platform, no problem found.

Tested-by: Bean Huo <beanhuo@micron.com>

Below is log shows you the tags assignment:
 
313.828168: ufshcd_command: send: ff3b0000.ufs: tag: 0, DB: 0x1, size: 8192, IS: 0, LBA: 2880752, opcode: 0x2a
313.828183: ufshcd_command: send: ff3b0000.ufs: tag: 1, DB: 0x3, size: 8192, IS: 0, LBA: 2880768, opcode: 0x2a
313.828194: ufshcd_command: send: ff3b0000.ufs: tag: 2, DB: 0x7, size: 8192, IS: 0, LBA: 2880784, opcode: 0x2a
313.828206: ufshcd_command: send: ff3b0000.ufs: tag: 3, DB: 0xf, size: 8192, IS: 0, LBA: 2880800, opcode: 0x2a
313.828218: ufshcd_command: send: ff3b0000.ufs: tag: 4, DB: 0x1f, size: 8192, IS: 0, LBA: 2880816, opcode: 0x2a
313.828228: ufshcd_command: send: ff3b0000.ufs: tag: 5, DB: 0x3f, size: 4096, IS: 0, LBA: 2880832, opcode: 0x2a
313.829654: ufshcd_command: send: ff3b0000.ufs: tag: 6, DB: 0x40, size: -1, IS: 0, LBA: 18446744073709551615, opcode: 0x35
313.830324: ufshcd_command: send: ff3b0000.ufs: tag: 7, DB: 0x80, size: 4096, IS: 0, LBA: 2880840, opcode: 0x2a
317.713874: ufshcd_command: send: ff3b0000.ufs: tag: 14, DB: 0x4000, size: 4096, IS: 0, LBA: 11976872, opcode: 0x28
317.755322: ufshcd_command: send: ff3b0000.ufs: tag: 15, DB: 0x8000, size: 4096, IS: 0, LBA: 11976744, opcode: 0x28
317.756758: ufshcd_command: send: ff3b0000.ufs: tag: 8, DB: 0x100, size: 4096, IS: 0, LBA: 11976752, opcode: 0x28
317.758389: ufshcd_command: send: ff3b0000.ufs: tag: 9, DB: 0x200, size: 8192, IS: 0, LBA: 12239872, opcode: 0x2a
317.758396: ufshcd_command: send: ff3b0000.ufs: tag: 10, DB: 0x600, size: 8192, IS: 0, LBA: 12239888, opcode: 0x2a
317.758402: ufshcd_command: send: ff3b0000.ufs: tag: 11, DB: 0xe00, size: 8192, IS: 0, LBA: 12239904, opcode: 0x2a
317.758406: ufshcd_command: send: ff3b0000.ufs: tag: 12, DB: 0x1e00, size: 8192, IS: 0, LBA: 12239920, opcode: 0x2a
317.758411: ufshcd_command: send: ff3b0000.ufs: tag: 13, DB: 0x3e00, size: 8192, IS: 0, LBA: 12239936, opcode: 0x2a
317.758415: ufshcd_command: send: ff3b0000.ufs: tag: 14, DB: 0x7e00, size: 8192, IS: 0, LBA: 12239952, opcode: 0x2a
317.758420: ufshcd_command: send: ff3b0000.ufs: tag: 15, DB: 0xfe00, size: 8192, IS: 0, LBA: 12239968, opcode: 0x2a
317.758426: ufshcd_command: send: ff3b0000.ufs: tag: 8, DB: 0xff00, size: 8192, IS: 0, LBA: 12239984, opcode: 0x2a
317.758431: ufshcd_command: send: ff3b0000.ufs: tag: 16, DB: 0x1ff00, size: 8192, IS: 0, LBA: 12240000, opcode: 0x2a
317.758436: ufshcd_command: send: ff3b0000.ufs: tag: 17, DB: 0x3ff00, size: 8192, IS: 0, LBA: 12240016, opcode: 0x2a
317.758441: ufshcd_command: send: ff3b0000.ufs: tag: 18, DB: 0x7ff00, size: 8192, IS: 0, LBA: 12240032, opcode: 0x2a
317.758446: ufshcd_command: send: ff3b0000.ufs: tag: 19, DB: 0xfff00, size: 8192, IS: 0, LBA: 12240048, opcode: 0x2a
317.758451: ufshcd_command: send: ff3b0000.ufs: tag: 20, DB: 0x1fff00, size: 8192, IS: 0, LBA: 12240064, opcode: 0x2a
317.758455: ufshcd_command: send: ff3b0000.ufs: tag: 21, DB: 0x3fff00, size: 8192, IS: 0, LBA: 12240080, opcode: 0x2a
317.758461: ufshcd_command: send: ff3b0000.ufs: tag: 22, DB: 0x7fff00, size: 8192, IS: 0, LBA: 12240096, opcode: 0x2a
317.758466: ufshcd_command: send: ff3b0000.ufs: tag: 23, DB: 0xffff00, size: 8192, IS: 0, LBA: 12240112, opcode: 0x2a
317.758834: ufshcd_command: send: ff3b0000.ufs: tag: 9, DB: 0xff8300, size: 8192, IS: 0, LBA: 12240128, opcode: 0x2a
317.758840: ufshcd_command: send: ff3b0000.ufs: tag: 10, DB: 0xff8700, size: 8192, IS: 0, LBA: 12240144, opcode: 0x2a
317.758891: ufshcd_command: send: ff3b0000.ufs: tag: 11, DB: 0xfc0e00, size: 8192, IS: 0, LBA: 12240160, opcode: 0x2a
317.758918: ufshcd_command: send: ff3b0000.ufs: tag: 16, DB: 0xf90e00, size: 8192, IS: 0, LBA: 12240176, opcode: 0x2a
317.758925: ufshcd_command: send: ff3b0000.ufs: tag: 17, DB: 0xfb0e00, size: 8192, IS: 0, LBA: 12240192, opcode: 0x2a
317.758937: ufshcd_command: send: ff3b0000.ufs: tag: 18, DB: 0xe70e00, size: 8192, IS: 1, LBA: 12240208, opcode: 0x2a
317.758957: ufshcd_command: send: ff3b0000.ufs: tag: 19, DB: 0xcf0e00, size: 8192, IS: 0, LBA: 12240224, opcode: 0x2a
317.758964: ufshcd_command: send: ff3b0000.ufs: tag: 20, DB: 0x9f0e00, size: 8192, IS: 1, LBA: 12240240, opcode: 0x2a
317.758978: ufshcd_command: send: ff3b0000.ufs: tag: 21, DB: 0xbf0e00, size: 8192, IS: 0, LBA: 12240256, opcode: 0x2a
317.758984: ufshcd_command: send: ff3b0000.ufs: tag: 22, DB: 0xff0e00, size: 8192, IS: 0, LBA: 12240272, opcode: 0x2a
317.758993: ufshcd_command: send: ff3b0000.ufs: tag: 24, DB: 0x1ff0e00, size: 8192, IS: 0, LBA: 12240288, opcode: 0x2a
317.759000: ufshcd_command: send: ff3b0000.ufs: tag: 25, DB: 0x3ff0e00, size: 8192, IS: 0, LBA: 12240304, opcode: 0x2a
317.759005: ufshcd_command: send: ff3b0000.ufs: tag: 26, DB: 0x7ff0e00, size: 8192, IS: 0, LBA: 12240320, opcode: 0x2a
317.759011: ufshcd_command: send: ff3b0000.ufs: tag: 27, DB: 0xf7f0e00, size: 8192, IS: 1, LBA: 12240336, opcode: 0x2a
317.759044: ufshcd_command: send: ff3b0000.ufs: tag: 28, DB: 0x1f7f0000, size: 8192, IS: 1, LBA: 12240352, opcode: 0x2a
317.759058: ufshcd_command: send: ff3b0000.ufs: tag: 12, DB: 0x1f7f1000, size: 8192, IS: 0, LBA: 12240368, opcode: 0x2a
317.759063: ufshcd_command: send: ff3b0000.ufs: tag: 13, DB: 0x1f7f3000, size: 8192, IS: 0, LBA: 12240384, opcode: 0x2a
317.759068: ufshcd_command: send: ff3b0000.ufs: tag: 14, DB: 0x1f7f7000, size: 8192, IS: 0, LBA: 12262400, opcode: 0x2a
317.759074: ufshcd_command: send: ff3b0000.ufs: tag: 8, DB: 0x1f7e7100, size: 8192, IS: 1, LBA: 12262416, opcode: 0x2a
317.759104: ufshcd_command: send: ff3b0000.ufs: tag: 16, DB: 0x1f797100, size: 8192, IS: 0, LBA: 12262432, opcode: 0x2a
317.759109: ufshcd_command: send: ff3b0000.ufs: tag: 17, DB: 0x1f737100, size: 8192, IS: 1, LBA: 12262448, opcode: 0x2a
317.759131: ufshcd_command: send: ff3b0000.ufs: tag: 18, DB: 0x1f677100, size: 8192, IS: 0, LBA: 12262464, opcode: 0x2a
317.759143: ufshcd_command: send: ff3b0000.ufs: tag: 23, DB: 0x1fe77100, size: 8192, IS: 0, LBA: 12262480, opcode: 0x2a
317.759149: ufshcd_command: send: ff3b0000.ufs: tag: 19, DB: 0x1fef7100, size: 8192, IS: 0, LBA: 12262496, opcode: 0x2a
317.759207: ufshcd_command: send: ff3b0000.ufs: tag: 20, DB: 0x1f9f7100, size: 8192, IS: 0, LBA: 12262512, opcode: 0x2a
317.759214: ufshcd_command: send: ff3b0000.ufs: tag: 21, DB: 0x1ebf7100, size: 8192, IS: 1, LBA: 12262528, opcode: 0x2a
317.759245: ufshcd_command: send: ff3b0000.ufs: tag: 9, DB: 0x18bf7300, size: 8192, IS: 0, LBA: 12262544, opcode: 0x2a
317.759247: ufshcd_command: send: ff3b0000.ufs: tag: 29, DB: 0x38bf7300, size: 8192, IS: 0, LBA: 12262576, opcode: 0x2a
317.759251: ufshcd_command: send: ff3b0000.ufs: tag: 10, DB: 0x38bf7700, size: 8192, IS: 0, LBA: 12262560, opcode: 0x2a
317.759276: ufshcd_command: send: ff3b0000.ufs: tag: 30, DB: 0x60bf7700, size: 8192, IS: 1, LBA: 12262624, opcode: 0x2a
317.759304: ufshcd_command: send: ff3b0000.ufs: tag: 31, DB: 0xe0bf4700, size: 8192, IS: 0, LBA: 12262608, opcode: 0x2a
317.759308: ufshcd_command: send: ff3b0000.ufs: tag: 11, DB: 0xe0bf4f00, size: 8192, IS: 0, LBA: 12262592, opcode: 0x2a
317.759314: ufshcd_command: send: ff3b0000.ufs: tag: 15, DB: 0xe0bf8f00, size: 8192, IS: 1, LBA: 12262640, opcode: 0x2a
317.759361: ufshcd_command: send: ff3b0000.ufs: tag: 12, DB: 0xe0bc9e00, size: 8192, IS: 0, LBA: 12262656, opcode: 0x2a
317.759378: ufshcd_command: send: ff3b0000.ufs: tag: 22, DB: 0xe0f89e00, size: 8192, IS: 1, LBA: 12262688, opcode: 0x2a
317.759393: ufshcd_command: send: ff3b0000.ufs: tag: 16, DB: 0xe0f99e00, size: 8192, IS: 0, LBA: 12262704, opcode: 0x2a
317.759399: ufshcd_command: send: ff3b0000.ufs: tag: 17, DB: 0xe07b9e00, size: 8192, IS: 1, LBA: 12262720, opcode: 0x2a
317.759414: ufshcd_command: send: ff3b0000.ufs: tag: 18, DB: 0xe07f9e00, size: 8192, IS: 0, LBA: 12262736, opcode: 0x2a
317.759420: ufshcd_command: send: ff3b0000.ufs: tag: 23, DB: 0xe0ff9e00, size: 8192, IS: 0, LBA: 12262752, opcode: 0x2a

//Bean

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [EXT] [PATCH RFC v3 2/3] ufs: Use blk_{get,put}_request() to allocate and free TMFs
  2019-11-06  1:06 ` [PATCH RFC v3 2/3] ufs: Use blk_{get,put}_request() to allocate and free TMFs Bart Van Assche
@ 2019-11-07 17:45   ` Bean Huo (beanhuo)
  0 siblings, 0 replies; 8+ messages in thread
From: Bean Huo (beanhuo) @ 2019-11-07 17:45 UTC (permalink / raw)
  To: Bart Van Assche, Avri Altman
  Cc: Martin K . Petersen, James E . J . Bottomley, linux-scsi,
	Christoph Hellwig, Yaniv Gardi, Stanley Chu, Tomas Winkler

> 
> Cc: Yaniv Gardi <ygardi@codeaurora.org>
> Cc: Stanley Chu <stanley.chu@mediatek.com>
> Cc: Avri Altman <avri.altman@wdc.com>
> Cc: Tomas Winkler <tomas.winkler@intel.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Bean Huo <beanhuo@micron.com>

//Bean

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH RFC v3 0/3] Simplify and optimize the UFS driver
  2019-11-06  1:06 [PATCH RFC v3 0/3] Simplify and optimize the UFS driver Bart Van Assche
                   ` (2 preceding siblings ...)
  2019-11-06  1:06 ` [PATCH RFC v3 3/3] ufs: Simplify the clock scaling mechanism implementation Bart Van Assche
@ 2019-11-11 13:46 ` Avri Altman
  3 siblings, 0 replies; 8+ messages in thread
From: Avri Altman @ 2019-11-11 13:46 UTC (permalink / raw)
  To: Bart Van Assche, Bean Huo
  Cc: Martin K . Petersen, James E . J . Bottomley, linux-scsi,
	Christoph Hellwig

Brilliant.
Except for one small nit in the 3rd patch (which you don't really need to attend),
Reviewed-by: Avri Altman <avri.altman@wdc.com>

> 
> 
> Hello everyone,
> 
> This is version three of the patch series that simplifies and optimizes the UFS
> driver. These patches are entirely untested. Any feedback is welcome.
> 
> Thanks,
> 
> Bart.
> 
> Changes compared to v2:
> - Use a separate tag set for TMF tags.
> 
> Changes compared to v1:
> - Use the block layer tag infrastructure for managing TMF tags.
> 
> Bart Van Assche (3):
>   ufs: Avoid busy-waiting by eliminating tag conflicts
>   ufs: Use blk_{get,put}_request() to allocate and free TMFs
>   ufs: Simplify the clock scaling mechanism implementation
> 
>  drivers/scsi/ufs/ufshcd.c | 375 +++++++++++++++++---------------------
>  drivers/scsi/ufs/ufshcd.h |  21 +--
>  2 files changed, 169 insertions(+), 227 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH RFC v3 3/3] ufs: Simplify the clock scaling mechanism implementation
  2019-11-06  1:06 ` [PATCH RFC v3 3/3] ufs: Simplify the clock scaling mechanism implementation Bart Van Assche
@ 2019-11-11 13:51   ` Avri Altman
  0 siblings, 0 replies; 8+ messages in thread
From: Avri Altman @ 2019-11-11 13:51 UTC (permalink / raw)
  To: Bart Van Assche, Bean Huo
  Cc: Martin K . Petersen, James E . J . Bottomley, linux-scsi,
	Christoph Hellwig, Yaniv Gardi, Stanley Chu, Tomas Winkler

Some small nit:
ufshcd_scsi_{block,unblock}_requests are no longer using scsi_block_requests,
so maybe just omit the _scsi_ part of the name.

I think that you can add Bean's tested-by to this patch as well, because clock-scaling
Is happening all the time, in oppose to, e.g. task management, which is, on some
Platforms, hard to reproduced.

Thanks,
Avri

> 
> 
> Scaling the clock is only safe while no commands are in progress. Use
> blk_mq_{un,}freeze_queue() to block submission of new commands and to wait
> for ongoing commands to complete.
> 
> Cc: Yaniv Gardi <ygardi@codeaurora.org>
> Cc: Stanley Chu <stanley.chu@mediatek.com>
> Cc: Avri Altman <avri.altman@wdc.com>
> Cc: Tomas Winkler <tomas.winkler@intel.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 134 ++++++++++++--------------------------
>  drivers/scsi/ufs/ufshcd.h |   3 -
>  2 files changed, 43 insertions(+), 94 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index
> 2c6300fd5c75..d1912ba5d2d9 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -292,14 +292,49 @@ static inline void ufshcd_disable_irq(struct ufs_hba
> *hba)
> 
>  static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba)  {
> -       if (atomic_dec_and_test(&hba->scsi_block_reqs_cnt))
> -               scsi_unblock_requests(hba->host);
> +       struct scsi_device *sdev;
> +
> +       blk_mq_unfreeze_queue(hba->tmf_queue);
> +       blk_mq_unfreeze_queue(hba->cmd_queue);
> +       shost_for_each_device(sdev, hba->host)
> +               blk_mq_unfreeze_queue(sdev->request_queue);
>  }
> 
> -static void ufshcd_scsi_block_requests(struct ufs_hba *hba)
> +static int ufshcd_scsi_block_requests(struct ufs_hba *hba,
> +                                     unsigned long timeout)
>  {
> -       if (atomic_inc_return(&hba->scsi_block_reqs_cnt) == 1)
> -               scsi_block_requests(hba->host);
> +       struct scsi_device *sdev;
> +       unsigned long deadline = jiffies + timeout;
> +
> +       if (timeout == ULONG_MAX) {
> +               shost_for_each_device(sdev, hba->host)
> +                       blk_mq_freeze_queue(sdev->request_queue);
> +               blk_mq_freeze_queue(hba->cmd_queue);
> +               blk_mq_freeze_queue(hba->tmf_queue);
> +               return 0;
> +       }
> +
> +       shost_for_each_device(sdev, hba->host)
> +               blk_freeze_queue_start(sdev->request_queue);
> +       blk_freeze_queue_start(hba->cmd_queue);
> +       blk_freeze_queue_start(hba->tmf_queue);
> +       shost_for_each_device(sdev, hba->host) {
> +               if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue,
> +                               max_t(long, 0, deadline - jiffies)) <= 0) {
> +                       goto err;
> +               }
> +       }
> +       if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue,
> +                               max_t(long, 0, deadline - jiffies)) <= 0)
> +               goto err;
> +       if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue,
> +                               max_t(long, 0, deadline - jiffies)) <= 0)
> +               goto err;
> +       return 0;
> +
> +err:
> +       ufshcd_scsi_unblock_requests(hba);
> +       return -ETIMEDOUT;
>  }
> 
>  static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned int tag,
> @@ -971,65 +1006,6 @@ static bool
> ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba,
>         return false;
>  }
> 
> -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba,
> -                                       u64 wait_timeout_us)
> -{
> -       unsigned long flags;
> -       int ret = 0;
> -       u32 tm_doorbell;
> -       u32 tr_doorbell;
> -       bool timeout = false, do_last_check = false;
> -       ktime_t start;
> -
> -       ufshcd_hold(hba, false);
> -       spin_lock_irqsave(hba->host->host_lock, flags);
> -       /*
> -        * Wait for all the outstanding tasks/transfer requests.
> -        * Verify by checking the doorbell registers are clear.
> -        */
> -       start = ktime_get();
> -       do {
> -               if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
> -                       ret = -EBUSY;
> -                       goto out;
> -               }
> -
> -               tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
> -               tr_doorbell = ufshcd_readl(hba,
> REG_UTP_TRANSFER_REQ_DOOR_BELL);
> -               if (!tm_doorbell && !tr_doorbell) {
> -                       timeout = false;
> -                       break;
> -               } else if (do_last_check) {
> -                       break;
> -               }
> -
> -               spin_unlock_irqrestore(hba->host->host_lock, flags);
> -               schedule();
> -               if (ktime_to_us(ktime_sub(ktime_get(), start)) >
> -                   wait_timeout_us) {
> -                       timeout = true;
> -                       /*
> -                        * We might have scheduled out for long time so make
> -                        * sure to check if doorbells are cleared by this time
> -                        * or not.
> -                        */
> -                       do_last_check = true;
> -               }
> -               spin_lock_irqsave(hba->host->host_lock, flags);
> -       } while (tm_doorbell || tr_doorbell);
> -
> -       if (timeout) {
> -               dev_err(hba->dev,
> -                       "%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n",
> -                       __func__, tm_doorbell, tr_doorbell);
> -               ret = -EBUSY;
> -       }
> -out:
> -       spin_unlock_irqrestore(hba->host->host_lock, flags);
> -       ufshcd_release(hba);
> -       return ret;
> -}
> -
>  /**
>   * ufshcd_scale_gear - scale up/down UFS gear
>   * @hba: per adapter instance
> @@ -1079,26 +1055,15 @@ static int ufshcd_scale_gear(struct ufs_hba *hba,
> bool scale_up)
> 
>  static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)  {
> -       #define DOORBELL_CLR_TOUT_US            (1000 * 1000) /* 1 sec */
> -       int ret = 0;
>         /*
>          * make sure that there are no outstanding requests when
>          * clock scaling is in progress
>          */
> -       ufshcd_scsi_block_requests(hba);
> -       down_write(&hba->clk_scaling_lock);
> -       if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
> -               ret = -EBUSY;
> -               up_write(&hba->clk_scaling_lock);
> -               ufshcd_scsi_unblock_requests(hba);
> -       }
> -
> -       return ret;
> +       return ufshcd_scsi_block_requests(hba, HZ);
>  }
> 
>  static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)  {
> -       up_write(&hba->clk_scaling_lock);
>         ufshcd_scsi_unblock_requests(hba);
>  }
> 
> @@ -1528,7 +1493,7 @@ int ufshcd_hold(struct ufs_hba *hba, bool async)
>                  */
>                 /* fallthrough */
>         case CLKS_OFF:
> -               ufshcd_scsi_block_requests(hba);
> +               ufshcd_scsi_block_requests(hba, ULONG_MAX);
>                 hba->clk_gating.state = REQ_CLKS_ON;
>                 trace_ufshcd_clk_gating(dev_name(hba->dev),
>                                         hba->clk_gating.state); @@ -2395,9 +2360,6 @@ static
> int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>                 BUG();
>         }
> 
> -       if (!down_read_trylock(&hba->clk_scaling_lock))
> -               return SCSI_MLQUEUE_HOST_BUSY;
> -
>         spin_lock_irqsave(hba->host->host_lock, flags);
>         switch (hba->ufshcd_state) {
>         case UFSHCD_STATE_OPERATIONAL:
> @@ -2462,7 +2424,6 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
>  out_unlock:
>         spin_unlock_irqrestore(hba->host->host_lock, flags);
>  out:
> -       up_read(&hba->clk_scaling_lock);
>         return err;
>  }
> 
> @@ -2616,8 +2577,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba
> *hba,
>         struct completion wait;
>         unsigned long flags;
> 
> -       down_read(&hba->clk_scaling_lock);
> -
>         /*
>          * Get free slot, sleep if slots are unavailable.
>          * Even though we use wait_event() which sleeps indefinitely, @@ -2652,7
> +2611,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
> 
>  out_put_tag:
>         blk_put_request(req);
> -       up_read(&hba->clk_scaling_lock);
>         return err;
>  }
> 
> @@ -5451,7 +5409,7 @@ static void ufshcd_check_errors(struct ufs_hba
> *hba)
>                 /* handle fatal errors only when link is functional */
>                 if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
>                         /* block commands from scsi mid-layer */
> -                       ufshcd_scsi_block_requests(hba);
> +                       ufshcd_scsi_block_requests(hba, ULONG_MAX);
> 
>                         hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED;
> 
> @@ -5757,8 +5715,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct
> ufs_hba *hba,
>         unsigned long flags;
>         u32 upiu_flags;
> 
> -       down_read(&hba->clk_scaling_lock);
> -
>         req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
>         if (IS_ERR(req))
>                 return PTR_ERR(req);
> @@ -5838,7 +5794,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct
> ufs_hba *hba,
>         }
> 
>         blk_put_request(req);
> -       up_read(&hba->clk_scaling_lock);
>         return err;
>  }
> 
> @@ -8307,8 +8262,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem
> *mmio_base, unsigned int irq)
>         /* Initialize mutex for device management commands */
>         mutex_init(&hba->dev_cmd.lock);
> 
> -       init_rwsem(&hba->clk_scaling_lock);
> -
>         ufshcd_init_clk_gating(hba);
> 
>         ufshcd_init_clk_scaling(hba);
> @@ -8394,7 +8347,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem
> *mmio_base, unsigned int irq)
> 
>         /* Hold auto suspend until async scan completes */
>         pm_runtime_get_sync(dev);
> -       atomic_set(&hba->scsi_block_reqs_cnt, 0);
>         /*
>          * We are assuming that device wasn't put in sleep/power-down
>          * state exclusively during the boot stage before kernel.
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index
> 6e0b87e8f875..cea71057fa90 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -517,7 +517,6 @@ struct ufs_stats {
>   * @urgent_bkops_lvl: keeps track of urgent bkops level for device
>   * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
>   *  device is known or not.
> - * @scsi_block_reqs_cnt: reference counting for scsi block requests
>   */
>  struct ufs_hba {
>         void __iomem *mmio_base;
> @@ -721,9 +720,7 @@ struct ufs_hba {
>         enum bkops_status urgent_bkops_lvl;
>         bool is_urgent_bkops_lvl_checked;
> 
> -       struct rw_semaphore clk_scaling_lock;
>         struct ufs_desc_size desc_size;
> -       atomic_t scsi_block_reqs_cnt;
> 
>         struct device           bsg_dev;
>         struct request_queue    *bsg_queue;
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-11-11 13:51 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-06  1:06 [PATCH RFC v3 0/3] Simplify and optimize the UFS driver Bart Van Assche
2019-11-06  1:06 ` [PATCH RFC v3 1/3] ufs: Avoid busy-waiting by eliminating tag conflicts Bart Van Assche
2019-11-07 17:45   ` [EXT] " Bean Huo (beanhuo)
2019-11-06  1:06 ` [PATCH RFC v3 2/3] ufs: Use blk_{get,put}_request() to allocate and free TMFs Bart Van Assche
2019-11-07 17:45   ` [EXT] " Bean Huo (beanhuo)
2019-11-06  1:06 ` [PATCH RFC v3 3/3] ufs: Simplify the clock scaling mechanism implementation Bart Van Assche
2019-11-11 13:51   ` Avri Altman
2019-11-11 13:46 ` [PATCH RFC v3 0/3] Simplify and optimize the UFS driver Avri Altman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.