All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands
@ 2021-04-21  7:55 Ming Lei
  2021-04-21  7:55 ` [PATCH 1/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_terminate_rport_io Ming Lei
                   ` (6 more replies)
  0 siblings, 7 replies; 15+ messages in thread
From: Ming Lei @ 2021-04-21  7:55 UTC (permalink / raw)
  To: Martin K . Petersen, linux-scsi
  Cc: Ming Lei, Satish Kharat, Karan Tilak Kumar, David Jeffery

Hello Guys,

fnic uses the following way to walk scsi commands in failure handling,
which is obvious wrong, because caller of scsi_host_find_tag has to
guarantee that the tag is active.

        for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
				...
                sc = scsi_host_find_tag(fnic->lport->host, tag);
				...
		}

Fix the issue by using blk_mq_tagset_busy_iter() to walk
request/scsi_command.

thanks,
Ming


Ming Lei (5):
  scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
    fnic_terminate_rport_io
  scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
    fnic_clean_pending_aborts
  scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
    fnic_cleanup_io
  scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
    fnic_rport_exch_reset
  scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
    fnic_is_abts_pending

 drivers/scsi/fnic/fnic_scsi.c | 933 ++++++++++++++++++----------------
 1 file changed, 493 insertions(+), 440 deletions(-)

Cc: Satish Kharat <satishkh@cisco.com>
Cc: Karan Tilak Kumar <kartilak@cisco.com>
Cc: David Jeffery <djeffery@redhat.com>

-- 
2.29.2


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_terminate_rport_io
  2021-04-21  7:55 [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Ming Lei
@ 2021-04-21  7:55 ` Ming Lei
  2021-04-21  7:55 ` [PATCH 2/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_clean_pending_aborts Ming Lei
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2021-04-21  7:55 UTC (permalink / raw)
  To: Martin K . Petersen, linux-scsi
  Cc: Ming Lei, Satish Kharat, Karan Tilak Kumar, David Jeffery

So far, scsi_host_find_tag() is supposed to use in fast path and the
passed tag should be active.

Convert the scsi command walking into blk_mq_tagset_busy_iter(), which has
been one common pattern for handling failure.

Cc: Satish Kharat <satishkh@cisco.com>
Cc: Karan Tilak Kumar <kartilak@cisco.com>
Cc: David Jeffery <djeffery@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/scsi/fnic/fnic_scsi.c | 223 +++++++++++++++++-----------------
 1 file changed, 113 insertions(+), 110 deletions(-)

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 36744968378f..522e1b43409d 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -1678,23 +1678,123 @@ static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
 
 }
 
-void fnic_terminate_rport_io(struct fc_rport *rport)
+static bool fnic_terminate_cmd(struct request *req, void *data, bool reserved)
 {
-	int tag;
-	int abt_tag;
-	int term_cnt = 0;
+	int abt_tag, tag;
 	struct fnic_io_req *io_req;
 	spinlock_t *io_lock;
 	unsigned long flags;
-	struct scsi_cmnd *sc;
 	struct scsi_lun fc_lun;
-	struct fc_rport_libfc_priv *rdata;
-	struct fc_lport *lport;
-	struct fnic *fnic;
 	struct fc_rport *cmd_rport;
+	enum fnic_ioreq_state old_ioreq_state;
+	struct scsi_cmnd *sc = blk_mq_rq_to_pdu(req);
+	struct fc_lport *lp = shost_priv(sc->device->host);
+	struct fnic *fnic = lport_priv(lp);
+	struct fc_rport *rport;
 	struct reset_stats *reset_stats;
 	struct terminate_stats *term_stats;
-	enum fnic_ioreq_state old_ioreq_state;
+	int *term_cnt = data;
+
+	term_stats = &fnic->fnic_stats.term_stats;
+	reset_stats = &fnic->fnic_stats.reset_stats;
+	tag = req->tag;
+	io_lock = fnic_io_lock_tag(fnic, tag);
+
+	spin_lock_irqsave(io_lock, flags);
+	rport = starget_to_rport(scsi_target(sc->device));
+	abt_tag = tag;
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
+	if (!io_req)
+		goto unlock;
+
+	cmd_rport = starget_to_rport(scsi_target(sc->device));
+	if (rport != cmd_rport)
+		goto unlock;
+
+	if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+		(!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		"fnic_terminate_rport_io dev rst not pending sc 0x%p\n",
+		sc);
+		goto unlock;
+	}
+	/*
+	 * Found IO that is still pending with firmware and
+	 * belongs to rport that went away
+	 */
+	if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+		goto unlock;
+
+	if (io_req->abts_done) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+		"fnic_terminate_rport_io: io_req->abts_done is set "
+		"state is %s\n",
+		fnic_ioreq_state_to_str(CMD_STATE(sc)));
+	}
+	if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
+		FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+			  "fnic_terminate_rport_io "
+			  "IO not yet issued %p tag 0x%x flags "
+			  "%x state %d\n",
+			  sc, tag, CMD_FLAGS(sc), CMD_STATE(sc));
+	}
+	old_ioreq_state = CMD_STATE(sc);
+	CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+	CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+	if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+		atomic64_inc(&reset_stats->device_reset_terminates);
+		abt_tag = (tag | FNIC_TAG_DEV_RST);
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		"fnic_terminate_rport_io dev rst sc 0x%p\n", sc);
+	}
+
+	BUG_ON(io_req->abts_done);
+
+	FNIC_SCSI_DBG(KERN_DEBUG,
+		      fnic->lport->host,
+		      "fnic_terminate_rport_io: Issuing abts\n");
+
+	spin_unlock_irqrestore(io_lock, flags);
+
+	/* Now queue the abort command to firmware */
+	int_to_scsilun(sc->device->lun, &fc_lun);
+
+	if (fnic_queue_abort_io_req(fnic, abt_tag,
+				    FCPIO_ITMF_ABT_TASK_TERM,
+				    fc_lun.scsi_lun, io_req)) {
+		/*
+		 * Revert the cmd state back to old state, if
+		 * it hasn't changed in between. This cmd will get
+		 * aborted later by scsi_eh, or cleaned up during
+		 * lun reset
+		 */
+		spin_lock_irqsave(io_lock, flags);
+		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+			CMD_STATE(sc) = old_ioreq_state;
+		spin_unlock_irqrestore(io_lock, flags);
+	} else {
+		spin_lock_irqsave(io_lock, flags);
+		if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
+			CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
+		else
+			CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
+		spin_unlock_irqrestore(io_lock, flags);
+		atomic64_inc(&term_stats->terminates);
+		(*term_cnt)++;
+	}
+
+	return true;
+ unlock:
+	spin_unlock_irqrestore(io_lock, flags);
+	return true;
+}
+
+void fnic_terminate_rport_io(struct fc_rport *rport)
+{
+	int term_cnt = 0;
+	struct fc_rport_libfc_priv *rdata;
+	struct fc_lport *lport;
+	struct fnic *fnic;
 
 	if (!rport) {
 		printk(KERN_ERR "fnic_terminate_rport_io: rport is NULL\n");
@@ -1722,108 +1822,11 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
 	if (fnic->in_remove)
 		return;
 
-	reset_stats = &fnic->fnic_stats.reset_stats;
-	term_stats = &fnic->fnic_stats.term_stats;
-
-	for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
-		abt_tag = tag;
-		io_lock = fnic_io_lock_tag(fnic, tag);
-		spin_lock_irqsave(io_lock, flags);
-		sc = scsi_host_find_tag(fnic->lport->host, tag);
-		if (!sc) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
-
-		io_req = (struct fnic_io_req *)CMD_SP(sc);
-		if (!io_req) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
-
-		cmd_rport = starget_to_rport(scsi_target(sc->device));
-		if (rport != cmd_rport) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
-
-		if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
-			(!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
-			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-			"fnic_terminate_rport_io dev rst not pending sc 0x%p\n",
-			sc);
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
-		/*
-		 * Found IO that is still pending with firmware and
-		 * belongs to rport that went away
-		 */
-		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
-		if (io_req->abts_done) {
-			shost_printk(KERN_ERR, fnic->lport->host,
-			"fnic_terminate_rport_io: io_req->abts_done is set "
-			"state is %s\n",
-			fnic_ioreq_state_to_str(CMD_STATE(sc)));
-		}
-		if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
-			FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
-				  "fnic_terminate_rport_io "
-				  "IO not yet issued %p tag 0x%x flags "
-				  "%x state %d\n",
-				  sc, tag, CMD_FLAGS(sc), CMD_STATE(sc));
-		}
-		old_ioreq_state = CMD_STATE(sc);
-		CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
-		CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
-		if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
-			atomic64_inc(&reset_stats->device_reset_terminates);
-			abt_tag = (tag | FNIC_TAG_DEV_RST);
-			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-			"fnic_terminate_rport_io dev rst sc 0x%p\n", sc);
-		}
-
-		BUG_ON(io_req->abts_done);
-
-		FNIC_SCSI_DBG(KERN_DEBUG,
-			      fnic->lport->host,
-			      "fnic_terminate_rport_io: Issuing abts\n");
-
-		spin_unlock_irqrestore(io_lock, flags);
-
-		/* Now queue the abort command to firmware */
-		int_to_scsilun(sc->device->lun, &fc_lun);
-
-		if (fnic_queue_abort_io_req(fnic, abt_tag,
-					    FCPIO_ITMF_ABT_TASK_TERM,
-					    fc_lun.scsi_lun, io_req)) {
-			/*
-			 * Revert the cmd state back to old state, if
-			 * it hasn't changed in between. This cmd will get
-			 * aborted later by scsi_eh, or cleaned up during
-			 * lun reset
-			 */
-			spin_lock_irqsave(io_lock, flags);
-			if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
-				CMD_STATE(sc) = old_ioreq_state;
-			spin_unlock_irqrestore(io_lock, flags);
-		} else {
-			spin_lock_irqsave(io_lock, flags);
-			if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
-				CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
-			else
-				CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
-			spin_unlock_irqrestore(io_lock, flags);
-			atomic64_inc(&term_stats->terminates);
-			term_cnt++;
-		}
-	}
-	if (term_cnt > atomic64_read(&term_stats->max_terminates))
-		atomic64_set(&term_stats->max_terminates, term_cnt);
+	blk_mq_tagset_busy_iter(&fnic->lport->host->tag_set,
+			fnic_terminate_cmd, &term_cnt);
 
+	if (term_cnt > atomic64_read(&fnic->fnic_stats.term_stats.max_terminates))
+		atomic64_set(&fnic->fnic_stats.term_stats.max_terminates, term_cnt);
 }
 
 /*
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_clean_pending_aborts
  2021-04-21  7:55 [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Ming Lei
  2021-04-21  7:55 ` [PATCH 1/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_terminate_rport_io Ming Lei
@ 2021-04-21  7:55 ` Ming Lei
  2021-04-21  7:55 ` [PATCH 3/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_cleanup_io Ming Lei
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2021-04-21  7:55 UTC (permalink / raw)
  To: Martin K . Petersen, linux-scsi
  Cc: Ming Lei, Satish Kharat, Karan Tilak Kumar, David Jeffery

So far, scsi_host_find_tag() is supposed to use in fast path and the
passed tag should be active.

Convert the scsi command walking into blk_mq_tagset_busy_iter() in
fnic_terminate_rport_io, which has been one common pattern for handling
failure.

Cc: Satish Kharat <satishkh@cisco.com>
Cc: Karan Tilak Kumar <kartilak@cisco.com>
Cc: David Jeffery <djeffery@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/scsi/fnic/fnic_scsi.c | 283 ++++++++++++++++++----------------
 1 file changed, 153 insertions(+), 130 deletions(-)

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 522e1b43409d..12503b59f42c 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -2121,172 +2121,195 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic,
 	return ret;
 }
 
-/*
- * Clean up any pending aborts on the lun
- * For each outstanding IO on this lun, whose abort is not completed by fw,
- * issue a local abort. Wait for abort to complete. Return 0 if all commands
- * successfully aborted, 1 otherwise
- */
-static int fnic_clean_pending_aborts(struct fnic *fnic,
-				     struct scsi_cmnd *lr_sc,
-					 bool new_sc)
+struct fnic_clean_pending_aborts_data {
+	struct fnic *fnic;
+	struct scsi_cmnd *lr_sc;
+	bool new_sc;
+	int ret;
+};
 
+static bool __fnic_clean_pending_aborts(struct request *req, void *data,
+		bool reserved)
 {
-	int tag, abt_tag;
+	struct fnic_clean_pending_aborts_data *aborts_data = data;
+	struct fnic *fnic = aborts_data->fnic;
+	struct scsi_cmnd *lr_sc = aborts_data->lr_sc;
+	bool new_sc = aborts_data->new_sc;
+	struct scsi_device *lun_dev = lr_sc->device;
+	struct scsi_cmnd *sc = blk_mq_rq_to_pdu(req);
+	int tag = req->tag;
+	int abt_tag;
 	struct fnic_io_req *io_req;
 	spinlock_t *io_lock;
 	unsigned long flags;
-	int ret = 0;
-	struct scsi_cmnd *sc;
 	struct scsi_lun fc_lun;
-	struct scsi_device *lun_dev = lr_sc->device;
 	DECLARE_COMPLETION_ONSTACK(tm_done);
 	enum fnic_ioreq_state old_ioreq_state;
 
-	for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
-		io_lock = fnic_io_lock_tag(fnic, tag);
-		spin_lock_irqsave(io_lock, flags);
-		sc = scsi_host_find_tag(fnic->lport->host, tag);
-		/*
-		 * ignore this lun reset cmd if issued using new SC
-		 * or cmds that do not belong to this lun
-		 */
-		if (!sc || ((sc == lr_sc) && new_sc) || sc->device != lun_dev) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
-
-		io_req = (struct fnic_io_req *)CMD_SP(sc);
+	io_lock = fnic_io_lock_tag(fnic, tag);
+	spin_lock_irqsave(io_lock, flags);
+	sc = scsi_host_find_tag(fnic->lport->host, tag);
+	/*
+	 * ignore this lun reset cmd if issued using new SC
+	 * or cmds that do not belong to this lun
+	 */
+	if (!sc || ((sc == lr_sc) && new_sc) || sc->device != lun_dev)
+		goto unlock;
 
-		if (!io_req || sc->device != lun_dev) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
 
-		/*
-		 * Found IO that is still pending with firmware and
-		 * belongs to the LUN that we are resetting
-		 */
-		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-			      "Found IO in %s on lun\n",
-			      fnic_ioreq_state_to_str(CMD_STATE(sc)));
+	if (!io_req || sc->device != lun_dev)
+		goto unlock;
 
-		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
-		if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
-			(!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
-			FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
-				"%s dev rst not pending sc 0x%p\n", __func__,
-				sc);
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
+	/*
+	 * Found IO that is still pending with firmware and
+	 * belongs to the LUN that we are resetting
+	 */
+	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		      "Found IO in %s on lun\n",
+		      fnic_ioreq_state_to_str(CMD_STATE(sc)));
 
-		if (io_req->abts_done)
-			shost_printk(KERN_ERR, fnic->lport->host,
-			  "%s: io_req->abts_done is set state is %s\n",
-			  __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
-		old_ioreq_state = CMD_STATE(sc);
-		/*
-		 * Any pending IO issued prior to reset is expected to be
-		 * in abts pending state, if not we need to set
-		 * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending.
-		 * When IO is completed, the IO will be handed over and
-		 * handled in this function.
-		 */
-		CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+	if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+		goto unlock;
 
-		BUG_ON(io_req->abts_done);
+	if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+		(!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
+		FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+			"%s dev rst not pending sc 0x%p\n", __func__,
+			sc);
+		goto unlock;
+	}
 
-		abt_tag = tag;
-		if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
-			abt_tag |= FNIC_TAG_DEV_RST;
-			FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
-				  "%s: dev rst sc 0x%p\n", __func__, sc);
-		}
+	if (io_req->abts_done)
+		shost_printk(KERN_ERR, fnic->lport->host,
+		  "%s: io_req->abts_done is set state is %s\n",
+		  __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
+	old_ioreq_state = CMD_STATE(sc);
+	/*
+	 * Any pending IO issued prior to reset is expected to be
+	 * in abts pending state, if not we need to set
+	 * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending.
+	 * When IO is completed, the IO will be handed over and
+	 * handled in this function.
+	 */
+	CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
 
-		CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
-		io_req->abts_done = &tm_done;
-		spin_unlock_irqrestore(io_lock, flags);
+	BUG_ON(io_req->abts_done);
 
-		/* Now queue the abort command to firmware */
-		int_to_scsilun(sc->device->lun, &fc_lun);
+	abt_tag = tag;
+	if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+		abt_tag |= FNIC_TAG_DEV_RST;
+		FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+			  "%s: dev rst sc 0x%p\n", __func__, sc);
+	}
 
-		if (fnic_queue_abort_io_req(fnic, abt_tag,
-					    FCPIO_ITMF_ABT_TASK_TERM,
-					    fc_lun.scsi_lun, io_req)) {
-			spin_lock_irqsave(io_lock, flags);
-			io_req = (struct fnic_io_req *)CMD_SP(sc);
-			if (io_req)
-				io_req->abts_done = NULL;
-			if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
-				CMD_STATE(sc) = old_ioreq_state;
-			spin_unlock_irqrestore(io_lock, flags);
-			ret = 1;
-			goto clean_pending_aborts_end;
-		} else {
-			spin_lock_irqsave(io_lock, flags);
-			if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
-				CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
-			spin_unlock_irqrestore(io_lock, flags);
-		}
-		CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
+	CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+	io_req->abts_done = &tm_done;
+	spin_unlock_irqrestore(io_lock, flags);
 
-		wait_for_completion_timeout(&tm_done,
-					    msecs_to_jiffies
-					    (fnic->config.ed_tov));
+	/* Now queue the abort command to firmware */
+	int_to_scsilun(sc->device->lun, &fc_lun);
 
-		/* Recheck cmd state to check if it is now aborted */
+	if (fnic_queue_abort_io_req(fnic, abt_tag,
+				    FCPIO_ITMF_ABT_TASK_TERM,
+				    fc_lun.scsi_lun, io_req)) {
 		spin_lock_irqsave(io_lock, flags);
 		io_req = (struct fnic_io_req *)CMD_SP(sc);
-		if (!io_req) {
-			spin_unlock_irqrestore(io_lock, flags);
-			CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL;
-			continue;
-		}
+		if (io_req)
+			io_req->abts_done = NULL;
+		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+			CMD_STATE(sc) = old_ioreq_state;
+		spin_unlock_irqrestore(io_lock, flags);
+		goto fail;
+	} else {
+		spin_lock_irqsave(io_lock, flags);
+		if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
+			CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
+		spin_unlock_irqrestore(io_lock, flags);
+	}
+	CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
 
-		io_req->abts_done = NULL;
+	wait_for_completion_timeout(&tm_done,
+				    msecs_to_jiffies
+				    (fnic->config.ed_tov));
 
-		/* if abort is still pending with fw, fail */
-		if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
-			spin_unlock_irqrestore(io_lock, flags);
-			CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
-			ret = 1;
-			goto clean_pending_aborts_end;
-		}
-		CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
+	/* Recheck cmd state to check if it is now aborted */
+	spin_lock_irqsave(io_lock, flags);
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
+	if (!io_req) {
+		CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL;
+		goto unlock;
+	}
 
-		/* original sc used for lr is handled by dev reset code */
-		if (sc != lr_sc)
-			CMD_SP(sc) = NULL;
+	io_req->abts_done = NULL;
+
+	/* if abort is still pending with fw, fail */
+	if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
 		spin_unlock_irqrestore(io_lock, flags);
+		CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
+		goto fail;
+	}
+	CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
 
-		/* original sc used for lr is handled by dev reset code */
-		if (sc != lr_sc) {
-			fnic_release_ioreq_buf(fnic, io_req, sc);
-			mempool_free(io_req, fnic->io_req_pool);
-		}
+	/* original sc used for lr is handled by dev reset code */
+	if (sc != lr_sc)
+		CMD_SP(sc) = NULL;
+	spin_unlock_irqrestore(io_lock, flags);
 
-		/*
-		 * Any IO is returned during reset, it needs to call scsi_done
-		 * to return the scsi_cmnd to upper layer.
-		 */
-		if (sc->scsi_done) {
-			/* Set result to let upper SCSI layer retry */
-			sc->result = DID_RESET << 16;
-			sc->scsi_done(sc);
-		}
+	/* original sc used for lr is handled by dev reset code */
+	if (sc != lr_sc) {
+		fnic_release_ioreq_buf(fnic, io_req, sc);
+		mempool_free(io_req, fnic->io_req_pool);
 	}
 
+	/*
+	 * Any IO is returned during reset, it needs to call scsi_done
+	 * to return the scsi_cmnd to upper layer.
+	 */
+	if (sc->scsi_done) {
+		/* Set result to let upper SCSI layer retry */
+		sc->result = DID_RESET << 16;
+		sc->scsi_done(sc);
+	}
+	return true;
+ unlock:
+	spin_unlock_irqrestore(io_lock, flags);
+	return true;
+ fail:
+	aborts_data->ret = 1;
+	return false;
+}
+
+/*
+ * Clean up any pending aborts on the lun
+ * For each outstanding IO on this lun, whose abort is not completed by fw,
+ * issue a local abort. Wait for abort to complete. Return 0 if all commands
+ * successfully aborted, 1 otherwise
+ */
+static int fnic_clean_pending_aborts(struct fnic *fnic,
+				     struct scsi_cmnd *lr_sc,
+					 bool new_sc)
+
+{
+	int ret = 0;
+	struct fnic_clean_pending_aborts_data data = {
+		.fnic	=	fnic,
+		.lr_sc	=	lr_sc,
+		.new_sc	=	new_sc,
+		.ret	=	0,
+	};
+
+	blk_mq_tagset_busy_iter(&fnic->lport->host->tag_set,
+			__fnic_clean_pending_aborts, &data);
+	if (data.ret)
+		return data.ret;
+
 	schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov));
 
 	/* walk again to check, if IOs are still pending in fw */
 	if (fnic_is_abts_pending(fnic, lr_sc))
 		ret = FAILED;
 
-clean_pending_aborts_end:
 	return ret;
 }
 
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_cleanup_io
  2021-04-21  7:55 [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Ming Lei
  2021-04-21  7:55 ` [PATCH 1/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_terminate_rport_io Ming Lei
  2021-04-21  7:55 ` [PATCH 2/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_clean_pending_aborts Ming Lei
@ 2021-04-21  7:55 ` Ming Lei
  2021-04-21  7:55 ` [PATCH 4/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_rport_exch_reset Ming Lei
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2021-04-21  7:55 UTC (permalink / raw)
  To: Martin K . Petersen, linux-scsi
  Cc: Ming Lei, Satish Kharat, Karan Tilak Kumar, David Jeffery

So far, scsi_host_find_tag() is supposed to use in fast path and the
passed tag should be active.

Convert the scsi command walking into blk_mq_tagset_busy_iter(), which has
been one common pattern for handling failure.

Cc: Satish Kharat <satishkh@cisco.com>
Cc: Karan Tilak Kumar <kartilak@cisco.com>
Cc: David Jeffery <djeffery@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/scsi/fnic/fnic_scsi.c | 139 +++++++++++++++++-----------------
 1 file changed, 70 insertions(+), 69 deletions(-)

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 12503b59f42c..f052526066c1 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -1361,93 +1361,94 @@ int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do)
 	return wq_work_done;
 }
 
-static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
+static bool fnic_cleanup_cmd(struct request *req, void *data, bool reserved)
 {
-	int i;
 	struct fnic_io_req *io_req;
 	unsigned long flags = 0;
-	struct scsi_cmnd *sc;
 	spinlock_t *io_lock;
 	unsigned long start_time = 0;
+	struct scsi_cmnd *sc = blk_mq_rq_to_pdu(req);
+	struct fc_lport *lp = shost_priv(sc->device->host);
+	struct fnic *fnic = lport_priv(lp);
 	struct fnic_stats *fnic_stats = &fnic->fnic_stats;
+	int *exclude_id = data;
 
-	for (i = 0; i < fnic->fnic_max_tag_id; i++) {
-		if (i == exclude_id)
-			continue;
+	if (*exclude_id == req->tag)
+		return true;
 
-		io_lock = fnic_io_lock_tag(fnic, i);
-		spin_lock_irqsave(io_lock, flags);
-		sc = scsi_host_find_tag(fnic->lport->host, i);
-		if (!sc) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
+	io_lock = fnic_io_lock_tag(fnic, req->tag);
+	spin_lock_irqsave(io_lock, flags);
 
-		io_req = (struct fnic_io_req *)CMD_SP(sc);
-		if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
-			!(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) {
-			/*
-			 * We will be here only when FW completes reset
-			 * without sending completions for outstanding ios.
-			 */
-			CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE;
-			if (io_req && io_req->dr_done)
-				complete(io_req->dr_done);
-			else if (io_req && io_req->abts_done)
-				complete(io_req->abts_done);
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		} else if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
-		if (!io_req) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
+	if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+		!(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) {
+		/*
+		 * We will be here only when FW completes reset
+		 * without sending completions for outstanding ios.
+		 */
+		CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE;
+		if (io_req && io_req->dr_done)
+			complete(io_req->dr_done);
+		else if (io_req && io_req->abts_done)
+			complete(io_req->abts_done);
+		goto unlock;
+	} else if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+		goto unlock;
+	}
+	if (!io_req)
+		goto unlock;
 
-		CMD_SP(sc) = NULL;
+	CMD_SP(sc) = NULL;
 
-		spin_unlock_irqrestore(io_lock, flags);
+	spin_unlock_irqrestore(io_lock, flags);
 
-		/*
-		 * If there is a scsi_cmnd associated with this io_req, then
-		 * free the corresponding state
-		 */
-		start_time = io_req->start_time;
-		fnic_release_ioreq_buf(fnic, io_req, sc);
-		mempool_free(io_req, fnic->io_req_pool);
+	/*
+	 * If there is a scsi_cmnd associated with this io_req, then
+	 * free the corresponding state
+	 */
+	start_time = io_req->start_time;
+	fnic_release_ioreq_buf(fnic, io_req, sc);
+	mempool_free(io_req, fnic->io_req_pool);
 
-		sc->result = DID_TRANSPORT_DISRUPTED << 16;
-		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-			      "%s: tag:0x%x : sc:0x%p duration = %lu DID_TRANSPORT_DISRUPTED\n",
-			      __func__, sc->request->tag, sc,
-			      (jiffies - start_time));
+	sc->result = DID_TRANSPORT_DISRUPTED << 16;
+	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		      "%s: tag:0x%x : sc:0x%p duration = %lu DID_TRANSPORT_DISRUPTED\n",
+		      __func__, sc->request->tag, sc,
+		      (jiffies - start_time));
 
-		if (atomic64_read(&fnic->io_cmpl_skip))
-			atomic64_dec(&fnic->io_cmpl_skip);
-		else
-			atomic64_inc(&fnic_stats->io_stats.io_completions);
+	if (atomic64_read(&fnic->io_cmpl_skip))
+		atomic64_dec(&fnic->io_cmpl_skip);
+	else
+		atomic64_inc(&fnic_stats->io_stats.io_completions);
 
-		/* Complete the command to SCSI */
-		if (sc->scsi_done) {
-			if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED))
-				shost_printk(KERN_ERR, fnic->lport->host,
-				"Calling done for IO not issued to fw: tag:0x%x sc:0x%p\n",
-				 sc->request->tag, sc);
+	/* Complete the command to SCSI */
+	if (sc->scsi_done) {
+		if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED))
+			shost_printk(KERN_ERR, fnic->lport->host,
+			"Calling done for IO not issued to fw: tag:0x%x sc:0x%p\n",
+			 sc->request->tag, sc);
 
-			FNIC_TRACE(fnic_cleanup_io,
-				  sc->device->host->host_no, i, sc,
-				  jiffies_to_msecs(jiffies - start_time),
-				  0, ((u64)sc->cmnd[0] << 32 |
-				  (u64)sc->cmnd[2] << 24 |
-				  (u64)sc->cmnd[3] << 16 |
-				  (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
-				  (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+		FNIC_TRACE(fnic_cleanup_io,
+			  sc->device->host->host_no, req->tag, sc,
+			  jiffies_to_msecs(jiffies - start_time),
+			  0, ((u64)sc->cmnd[0] << 32 |
+			  (u64)sc->cmnd[2] << 24 |
+			  (u64)sc->cmnd[3] << 16 |
+			  (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
+			  (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
 
-			sc->scsi_done(sc);
-		}
+		sc->scsi_done(sc);
 	}
+	return true;
+ unlock:
+	spin_unlock_irqrestore(io_lock, flags);
+	return true;
+}
+
+static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
+{
+	blk_mq_tagset_busy_iter(&fnic->lport->host->tag_set,
+			fnic_cleanup_cmd, &exclude_id);
 }
 
 void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq,
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_rport_exch_reset
  2021-04-21  7:55 [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Ming Lei
                   ` (2 preceding siblings ...)
  2021-04-21  7:55 ` [PATCH 3/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_cleanup_io Ming Lei
@ 2021-04-21  7:55 ` Ming Lei
  2021-04-21  7:55 ` [PATCH 5/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_is_abts_pending Ming Lei
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2021-04-21  7:55 UTC (permalink / raw)
  To: Martin K . Petersen, linux-scsi
  Cc: Ming Lei, Satish Kharat, Karan Tilak Kumar, David Jeffery

So far, scsi_host_find_tag() is supposed to use in fast path and the
passed tag should be active.

Convert the scsi command walking into blk_mq_tagset_busy_iter(), which has been one
common pattern for handling failure.

Cc: Satish Kharat <satishkh@cisco.com>
Cc: Karan Tilak Kumar <kartilak@cisco.com>
Cc: David Jeffery <djeffery@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/scsi/fnic/fnic_scsi.c | 202 ++++++++++++++++++----------------
 1 file changed, 107 insertions(+), 95 deletions(-)

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index f052526066c1..ecbf4f5c5a07 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -1559,124 +1559,136 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag,
 	return 0;
 }
 
-static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
+struct fnic_reset_data {
+	u32 port_id;
+	int term_cnt;
+};
+
+static bool fnic_reset_cmd(struct request *req, void *data, bool reserved)
 {
+	struct fnic_reset_data *reset_data = data;
 	int tag;
 	int abt_tag;
-	int term_cnt = 0;
 	struct fnic_io_req *io_req;
 	spinlock_t *io_lock;
 	unsigned long flags;
-	struct scsi_cmnd *sc;
-	struct reset_stats *reset_stats = &fnic->fnic_stats.reset_stats;
-	struct terminate_stats *term_stats = &fnic->fnic_stats.term_stats;
 	struct scsi_lun fc_lun;
 	enum fnic_ioreq_state old_ioreq_state;
+	struct scsi_cmnd *sc = blk_mq_rq_to_pdu(req);
+	struct fc_lport *lp = shost_priv(sc->device->host);
+	struct fnic *fnic = lport_priv(lp);
+	struct reset_stats *reset_stats = &fnic->fnic_stats.reset_stats;
+	struct terminate_stats *term_stats = &fnic->fnic_stats.term_stats;
 
-	FNIC_SCSI_DBG(KERN_DEBUG,
-		      fnic->lport->host,
-		      "fnic_rport_exch_reset called portid 0x%06x\n",
-		      port_id);
+	tag = req->tag;
+	abt_tag = tag;
+	io_lock = fnic_io_lock_tag(fnic, tag);
+	spin_lock_irqsave(io_lock, flags);
 
-	if (fnic->in_remove)
-		return;
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
 
-	for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
-		abt_tag = tag;
-		io_lock = fnic_io_lock_tag(fnic, tag);
-		spin_lock_irqsave(io_lock, flags);
-		sc = scsi_host_find_tag(fnic->lport->host, tag);
-		if (!sc) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
+	if (!io_req || io_req->port_id != reset_data->port_id)
+		goto unlock;
 
-		io_req = (struct fnic_io_req *)CMD_SP(sc);
+	if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+		(!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		"fnic_rport_exch_reset dev rst not pending sc 0x%p\n",
+		sc);
+		goto unlock;
+	}
 
-		if (!io_req || io_req->port_id != port_id) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
+	/*
+	 * Found IO that is still pending with firmware and
+	 * belongs to rport that went away
+	 */
+	if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+		goto unlock;
 
-		if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
-			(!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
-			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-			"fnic_rport_exch_reset dev rst not pending sc 0x%p\n",
-			sc);
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
+	if (io_req->abts_done) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+		"fnic_rport_exch_reset: io_req->abts_done is set "
+		"state is %s\n",
+		fnic_ioreq_state_to_str(CMD_STATE(sc)));
+	}
 
-		/*
-		 * Found IO that is still pending with firmware and
-		 * belongs to rport that went away
-		 */
-		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
-		if (io_req->abts_done) {
-			shost_printk(KERN_ERR, fnic->lport->host,
-			"fnic_rport_exch_reset: io_req->abts_done is set "
-			"state is %s\n",
-			fnic_ioreq_state_to_str(CMD_STATE(sc)));
-		}
+	if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
+		shost_printk(KERN_ERR, fnic->lport->host,
+			  "rport_exch_reset "
+			  "IO not yet issued %p tag 0x%x flags "
+			  "%x state %d\n",
+			  sc, tag, CMD_FLAGS(sc), CMD_STATE(sc));
+	}
+	old_ioreq_state = CMD_STATE(sc);
+	CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+	CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+	if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+		atomic64_inc(&reset_stats->device_reset_terminates);
+		abt_tag = (tag | FNIC_TAG_DEV_RST);
+		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		"fnic_rport_exch_reset dev rst sc 0x%p\n",
+		sc);
+	}
 
-		if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
-			shost_printk(KERN_ERR, fnic->lport->host,
-				  "rport_exch_reset "
-				  "IO not yet issued %p tag 0x%x flags "
-				  "%x state %d\n",
-				  sc, tag, CMD_FLAGS(sc), CMD_STATE(sc));
-		}
-		old_ioreq_state = CMD_STATE(sc);
-		CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
-		CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
-		if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
-			atomic64_inc(&reset_stats->device_reset_terminates);
-			abt_tag = (tag | FNIC_TAG_DEV_RST);
-			FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-			"fnic_rport_exch_reset dev rst sc 0x%p\n",
-			sc);
-		}
+	BUG_ON(io_req->abts_done);
 
-		BUG_ON(io_req->abts_done);
+	FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+		      "fnic_rport_reset_exch: Issuing abts\n");
 
-		FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
-			      "fnic_rport_reset_exch: Issuing abts\n");
+	spin_unlock_irqrestore(io_lock, flags);
 
+	/* Now queue the abort command to firmware */
+	int_to_scsilun(sc->device->lun, &fc_lun);
+
+	if (fnic_queue_abort_io_req(fnic, abt_tag,
+				    FCPIO_ITMF_ABT_TASK_TERM,
+				    fc_lun.scsi_lun, io_req)) {
+		/*
+		 * Revert the cmd state back to old state, if
+		 * it hasn't changed in between. This cmd will get
+		 * aborted later by scsi_eh, or cleaned up during
+		 * lun reset
+		 */
+		spin_lock_irqsave(io_lock, flags);
+		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+			CMD_STATE(sc) = old_ioreq_state;
 		spin_unlock_irqrestore(io_lock, flags);
+	} else {
+		spin_lock_irqsave(io_lock, flags);
+		if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
+			CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
+		else
+			CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
+		spin_unlock_irqrestore(io_lock, flags);
+		atomic64_inc(&term_stats->terminates);
+		reset_data->term_cnt++;
+	}
+	return true;
+unlock:
+	spin_unlock_irqrestore(io_lock, flags);
+	return true;
+}
 
-		/* Now queue the abort command to firmware */
-		int_to_scsilun(sc->device->lun, &fc_lun);
+static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
+{
+	struct fnic_reset_data data = {
+		.port_id	=	port_id,
+		.term_cnt	=	0,
+	};
 
-		if (fnic_queue_abort_io_req(fnic, abt_tag,
-					    FCPIO_ITMF_ABT_TASK_TERM,
-					    fc_lun.scsi_lun, io_req)) {
-			/*
-			 * Revert the cmd state back to old state, if
-			 * it hasn't changed in between. This cmd will get
-			 * aborted later by scsi_eh, or cleaned up during
-			 * lun reset
-			 */
-			spin_lock_irqsave(io_lock, flags);
-			if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
-				CMD_STATE(sc) = old_ioreq_state;
-			spin_unlock_irqrestore(io_lock, flags);
-		} else {
-			spin_lock_irqsave(io_lock, flags);
-			if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
-				CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
-			else
-				CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
-			spin_unlock_irqrestore(io_lock, flags);
-			atomic64_inc(&term_stats->terminates);
-			term_cnt++;
-		}
-	}
-	if (term_cnt > atomic64_read(&term_stats->max_terminates))
-		atomic64_set(&term_stats->max_terminates, term_cnt);
+	FNIC_SCSI_DBG(KERN_DEBUG,
+		      fnic->lport->host,
+		      "fnic_rport_exch_reset called portid 0x%06x\n",
+		      port_id);
+
+	if (fnic->in_remove)
+		return;
+
+	blk_mq_tagset_busy_iter(&fnic->lport->host->tag_set,
+			fnic_reset_cmd, &data);
 
+	if (data.term_cnt > atomic64_read(&fnic->fnic_stats.term_stats.max_terminates))
+		atomic64_set(&fnic->fnic_stats.term_stats.max_terminates, data.term_cnt);
 }
 
 static bool fnic_terminate_cmd(struct request *req, void *data, bool reserved)
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 5/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_is_abts_pending
  2021-04-21  7:55 [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Ming Lei
                   ` (3 preceding siblings ...)
  2021-04-21  7:55 ` [PATCH 4/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_rport_exch_reset Ming Lei
@ 2021-04-21  7:55 ` Ming Lei
  2021-04-21 10:19 ` [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Martin Wilck
  2021-04-21 20:14 ` Hannes Reinecke
  6 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2021-04-21  7:55 UTC (permalink / raw)
  To: Martin K . Petersen, linux-scsi
  Cc: Ming Lei, Satish Kharat, Karan Tilak Kumar, David Jeffery

So far, scsi_host_find_tag() is supposed to use in fast path and the
passed tag should be active.

Convert the scsi command walking into blk_mq_tagset_busy_iter(), which has been one
common pattern for handling failure.

Cc: Satish Kharat <satishkh@cisco.com>
Cc: Karan Tilak Kumar <kartilak@cisco.com>
Cc: David Jeffery <djeffery@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/scsi/fnic/fnic_scsi.c | 96 ++++++++++++++++++++---------------
 1 file changed, 55 insertions(+), 41 deletions(-)

diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index ecbf4f5c5a07..af8e860f488e 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -2814,58 +2814,72 @@ void fnic_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did)
 
 }
 
-/*
- * fnic_is_abts_pending() is a helper function that
- * walks through tag map to check if there is any IOs pending,if there is one,
- * then it returns 1 (true), otherwise 0 (false)
- * if @lr_sc is non NULL, then it checks IOs specific to particular LUN,
- * otherwise, it checks for all IOs.
- */
-int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
+struct fnic_is_abts_pending_data {
+	struct scsi_cmnd *lr_sc;
+	int ret;
+};
+
+static bool __fnic_is_abts_pending(struct request *req, void *data,
+		bool reserved)
 {
-	int tag;
+	struct fnic_is_abts_pending_data *pdata = data;
 	struct fnic_io_req *io_req;
 	spinlock_t *io_lock;
 	unsigned long flags;
-	int ret = 0;
-	struct scsi_cmnd *sc;
 	struct scsi_device *lun_dev = NULL;
+	struct scsi_cmnd *sc = blk_mq_rq_to_pdu(req);
+	struct fc_lport *lp = shost_priv(sc->device->host);
+	struct fnic *fnic = lport_priv(lp);
 
-	if (lr_sc)
-		lun_dev = lr_sc->device;
+	if (pdata->lr_sc)
+		lun_dev = pdata->lr_sc->device;
 
-	/* walk again to check, if IOs are still pending in fw */
-	for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
-		sc = scsi_host_find_tag(fnic->lport->host, tag);
-		/*
-		 * ignore this lun reset cmd or cmds that do not belong to
-		 * this lun
-		 */
-		if (!sc || (lr_sc && (sc->device != lun_dev || sc == lr_sc)))
-			continue;
+	/*
+	 * ignore this lun reset cmd or cmds that do not belong to
+	 * this lun
+	 */
+	if (!sc || (pdata->lr_sc && (sc->device != lun_dev ||
+					sc == pdata->lr_sc)))
+		return true;
 
-		io_lock = fnic_io_lock_hash(fnic, sc);
-		spin_lock_irqsave(io_lock, flags);
+	io_lock = fnic_io_lock_hash(fnic, sc);
+	spin_lock_irqsave(io_lock, flags);
 
-		io_req = (struct fnic_io_req *)CMD_SP(sc);
+	io_req = (struct fnic_io_req *)CMD_SP(sc);
 
-		if (!io_req || sc->device != lun_dev) {
-			spin_unlock_irqrestore(io_lock, flags);
-			continue;
-		}
+	if (!io_req || sc->device != lun_dev)
+		goto unlock;
 
-		/*
-		 * Found IO that is still pending with firmware and
-		 * belongs to the LUN that we are resetting
-		 */
-		FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
-			      "Found IO in %s on lun\n",
-			      fnic_ioreq_state_to_str(CMD_STATE(sc)));
+	/*
+	 * Found IO that is still pending with firmware and
+	 * belongs to the LUN that we are resetting
+	 */
+	FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+		      "Found IO in %s on lun\n",
+		      fnic_ioreq_state_to_str(CMD_STATE(sc)));
 
-		if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
-			ret = 1;
-		spin_unlock_irqrestore(io_lock, flags);
-	}
+	if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+		pdata->ret = 1;
+ unlock:
+	spin_unlock_irqrestore(io_lock, flags);
+	return true;
+}
 
-	return ret;
+/*
+ * fnic_is_abts_pending() is a helper function that
+ * walks through tag map to check if there is any IOs pending,if there is one,
+ * then it returns 1 (true), otherwise 0 (false)
+ * if @lr_sc is non NULL, then it checks IOs specific to particular LUN,
+ * otherwise, it checks for all IOs.
+ */
+int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
+{
+	struct fnic_is_abts_pending_data data = {
+		.lr_sc	=	lr_sc,
+		.ret	=	0,
+	};
+
+	blk_mq_tagset_busy_iter(&fnic->lport->host->tag_set,
+			__fnic_is_abts_pending, &data);
+	return data.ret;
 }
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands
  2021-04-21  7:55 [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Ming Lei
                   ` (4 preceding siblings ...)
  2021-04-21  7:55 ` [PATCH 5/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_is_abts_pending Ming Lei
@ 2021-04-21 10:19 ` Martin Wilck
  2021-04-21 10:40   ` Ming Lei
  2021-04-21 20:14 ` Hannes Reinecke
  6 siblings, 1 reply; 15+ messages in thread
From: Martin Wilck @ 2021-04-21 10:19 UTC (permalink / raw)
  To: Ming Lei, Martin K . Petersen, linux-scsi, Hannes Reinecke
  Cc: Satish Kharat, Karan Tilak Kumar, David Jeffery, Daniel Wagner

On Wed, 2021-04-21 at 15:55 +0800, Ming Lei wrote:
> Hello Guys,
> 
> fnic uses the following way to walk scsi commands in failure handling,
> which is obvious wrong, because caller of scsi_host_find_tag has to
> guarantee that the tag is active.
> 
>         for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
>                                 ...
>                 sc = scsi_host_find_tag(fnic->lport->host, tag);
>                                 ...
>                 }
> 
> Fix the issue by using blk_mq_tagset_busy_iter() to walk
> request/scsi_command.

How does this relate to Hannes' previous patch?
https://marc.info/?l=linux-scsi&m=161400059528859&w=2

Regards
Martin



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands
  2021-04-21 10:19 ` [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Martin Wilck
@ 2021-04-21 10:40   ` Ming Lei
  2021-04-21 12:33     ` Martin Wilck
  0 siblings, 1 reply; 15+ messages in thread
From: Ming Lei @ 2021-04-21 10:40 UTC (permalink / raw)
  To: Martin Wilck
  Cc: Martin K . Petersen, linux-scsi, Hannes Reinecke, Satish Kharat,
	Karan Tilak Kumar, David Jeffery, Daniel Wagner

On Wed, Apr 21, 2021 at 12:19:00PM +0200, Martin Wilck wrote:
> On Wed, 2021-04-21 at 15:55 +0800, Ming Lei wrote:
> > Hello Guys,
> > 
> > fnic uses the following way to walk scsi commands in failure handling,
> > which is obvious wrong, because caller of scsi_host_find_tag has to
> > guarantee that the tag is active.
> > 
> >         for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
> >                                 ...
> >                 sc = scsi_host_find_tag(fnic->lport->host, tag);
> >                                 ...
> >                 }
> > 
> > Fix the issue by using blk_mq_tagset_busy_iter() to walk
> > request/scsi_command.
> 
> How does this relate to Hannes' previous patch?
> https://marc.info/?l=linux-scsi&m=161400059528859&w=2

oops, this patch is actually same or similar with Hannes's.

Given these patches are bug fix, can we cherry-pick them for 5.13?


Thanks,
Ming


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands
  2021-04-21 10:40   ` Ming Lei
@ 2021-04-21 12:33     ` Martin Wilck
  2021-04-21 14:05       ` Ming Lei
  0 siblings, 1 reply; 15+ messages in thread
From: Martin Wilck @ 2021-04-21 12:33 UTC (permalink / raw)
  To: Ming Lei
  Cc: Martin K . Petersen, linux-scsi, Hannes Reinecke, Satish Kharat,
	Karan Tilak Kumar, David Jeffery, Daniel Wagner

On Wed, 2021-04-21 at 18:40 +0800, Ming Lei wrote:
> On Wed, Apr 21, 2021 at 12:19:00PM +0200, Martin Wilck wrote:
> > On Wed, 2021-04-21 at 15:55 +0800, Ming Lei wrote:
> > > Hello Guys,
> > > 
> > > fnic uses the following way to walk scsi commands in failure
> > > handling,
> > > which is obvious wrong, because caller of scsi_host_find_tag has
> > > to
> > > guarantee that the tag is active.
> > > 
> > >         for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
> > >                                 ...
> > >                 sc = scsi_host_find_tag(fnic->lport->host, tag);
> > >                                 ...
> > >                 }
> > > 
> > > Fix the issue by using blk_mq_tagset_busy_iter() to walk
> > > request/scsi_command.
> > 
> > How does this relate to Hannes' previous patch?
> > https://marc.info/?l=linux-scsi&m=161400059528859&w=2
> 
> oops, this patch is actually same or similar with Hannes's.
> 
> Given these patches are bug fix, can we cherry-pick them for 5.13?

No objections in principle, but the differences between your patch and
Hannes' are pretty large. I couldn't tell which one is more
appropriate.

Question: Both your patch set and Hannes' patch replace a couple of
scsi_host_find_tag() calls in the fnic driver, while leaving some
others in place. It's not clear to me why we can be sure that no
corruption occurs in any of the latter. Could you explain?

Regards,
Martin



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands
  2021-04-21 12:33     ` Martin Wilck
@ 2021-04-21 14:05       ` Ming Lei
  0 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2021-04-21 14:05 UTC (permalink / raw)
  To: Martin Wilck
  Cc: Martin K . Petersen, linux-scsi, Hannes Reinecke, Satish Kharat,
	Karan Tilak Kumar, David Jeffery, Daniel Wagner

On Wed, Apr 21, 2021 at 02:33:46PM +0200, Martin Wilck wrote:
> On Wed, 2021-04-21 at 18:40 +0800, Ming Lei wrote:
> > On Wed, Apr 21, 2021 at 12:19:00PM +0200, Martin Wilck wrote:
> > > On Wed, 2021-04-21 at 15:55 +0800, Ming Lei wrote:
> > > > Hello Guys,
> > > > 
> > > > fnic uses the following way to walk scsi commands in failure
> > > > handling,
> > > > which is obvious wrong, because caller of scsi_host_find_tag has
> > > > to
> > > > guarantee that the tag is active.
> > > > 
> > > >         for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
> > > >                                 ...
> > > >                 sc = scsi_host_find_tag(fnic->lport->host, tag);
> > > >                                 ...
> > > >                 }
> > > > 
> > > > Fix the issue by using blk_mq_tagset_busy_iter() to walk
> > > > request/scsi_command.
> > > 
> > > How does this relate to Hannes' previous patch?
> > > https://marc.info/?l=linux-scsi&m=161400059528859&w=2
> > 
> > oops, this patch is actually same or similar with Hannes's.
> > 
> > Given these patches are bug fix, can we cherry-pick them for 5.13?
> 
> No objections in principle, but the differences between your patch and
> Hannes' are pretty large. I couldn't tell which one is more
> appropriate.
> 
> Question: Both your patch set and Hannes' patch replace a couple of
> scsi_host_find_tag() calls in the fnic driver, while leaving some
> others in place. It's not clear to me why we can be sure that no
> corruption occurs in any of the latter. Could you explain?

Caller of scsi_host_find_tag() has to guarantee that the passed tag
is active.

The changed functions do pass invalid tag to scsi_host_find_tag(), so
we need to fix them.


Thanks,
Ming


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands
  2021-04-21  7:55 [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Ming Lei
                   ` (5 preceding siblings ...)
  2021-04-21 10:19 ` [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Martin Wilck
@ 2021-04-21 20:14 ` Hannes Reinecke
  2021-04-22  0:08   ` Ming Lei
  6 siblings, 1 reply; 15+ messages in thread
From: Hannes Reinecke @ 2021-04-21 20:14 UTC (permalink / raw)
  To: Ming Lei, Martin K . Petersen, linux-scsi
  Cc: Satish Kharat, Karan Tilak Kumar, David Jeffery

On 4/21/21 9:55 AM, Ming Lei wrote:
> Hello Guys,
> 
> fnic uses the following way to walk scsi commands in failure handling,
> which is obvious wrong, because caller of scsi_host_find_tag has to
> guarantee that the tag is active.
> 
>          for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
> 				...
>                  sc = scsi_host_find_tag(fnic->lport->host, tag);
> 				...
> 		}
> 
> Fix the issue by using blk_mq_tagset_busy_iter() to walk
> request/scsi_command.
> 
> thanks,
> Ming
> 
> 
> Ming Lei (5):
>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
>      fnic_terminate_rport_io
>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
>      fnic_clean_pending_aborts
>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
>      fnic_cleanup_io
>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
>      fnic_rport_exch_reset
>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
>      fnic_is_abts_pending
> 
>   drivers/scsi/fnic/fnic_scsi.c | 933 ++++++++++++++++++----------------
>   1 file changed, 493 insertions(+), 440 deletions(-)
> 
> Cc: Satish Kharat <satishkh@cisco.com>
> Cc: Karan Tilak Kumar <kartilak@cisco.com>
> Cc: David Jeffery <djeffery@redhat.com>
> 
Well, this is actually not that easy for fnic.
Problem is the reset hack hch put in some time ago (cf 
fnic_host_start_tag()), which will cause any TMF to use a tag which is 
_not_ visible to the busy iter.
That will cause the iter to miss any TMF, with unpredictable results if 
a TMF is running at the same time than, say, a link bounce.

I have folded this as part of my patchset for reserved commands in SCSI; 
that way fnic can use 'normal' tags for TMFs, which are then visible to 
the busy iter and life's good.

Will be reposting the patchset shortly.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands
  2021-04-21 20:14 ` Hannes Reinecke
@ 2021-04-22  0:08   ` Ming Lei
  2021-04-22 14:17     ` Ming Lei
  0 siblings, 1 reply; 15+ messages in thread
From: Ming Lei @ 2021-04-22  0:08 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K . Petersen, linux-scsi, Satish Kharat,
	Karan Tilak Kumar, David Jeffery

On Wed, Apr 21, 2021 at 10:14:56PM +0200, Hannes Reinecke wrote:
> On 4/21/21 9:55 AM, Ming Lei wrote:
> > Hello Guys,
> > 
> > fnic uses the following way to walk scsi commands in failure handling,
> > which is obvious wrong, because caller of scsi_host_find_tag has to
> > guarantee that the tag is active.
> > 
> >          for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
> > 				...
> >                  sc = scsi_host_find_tag(fnic->lport->host, tag);
> > 				...
> > 		}
> > 
> > Fix the issue by using blk_mq_tagset_busy_iter() to walk
> > request/scsi_command.
> > 
> > thanks,
> > Ming
> > 
> > 
> > Ming Lei (5):
> >    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> >      fnic_terminate_rport_io
> >    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> >      fnic_clean_pending_aborts
> >    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> >      fnic_cleanup_io
> >    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> >      fnic_rport_exch_reset
> >    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> >      fnic_is_abts_pending
> > 
> >   drivers/scsi/fnic/fnic_scsi.c | 933 ++++++++++++++++++----------------
> >   1 file changed, 493 insertions(+), 440 deletions(-)
> > 
> > Cc: Satish Kharat <satishkh@cisco.com>
> > Cc: Karan Tilak Kumar <kartilak@cisco.com>
> > Cc: David Jeffery <djeffery@redhat.com>
> > 
> Well, this is actually not that easy for fnic.
> Problem is the reset hack hch put in some time ago (cf
> fnic_host_start_tag()), which will cause any TMF to use a tag which is _not_
> visible to the busy iter.

'git grep -n fnic_host_start_tag ./' shows nothing.

> That will cause the iter to miss any TMF, with unpredictable results if a
> TMF is running at the same time than, say, a link bounce.

Wrt. linus tree or next tree, I don't see any issue wrt. your concern.

> 
> I have folded this as part of my patchset for reserved commands in SCSI;
> that way fnic can use 'normal' tags for TMFs, which are then visible to the
> busy iter and life's good.

No, this fix is one bug fix, which can't depend on your reserved
command in SCSI, and they need to be backported to stable tree too.


Thanks, 
Ming


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands
  2021-04-22  0:08   ` Ming Lei
@ 2021-04-22 14:17     ` Ming Lei
  2021-04-26  7:30       ` Hannes Reinecke
  0 siblings, 1 reply; 15+ messages in thread
From: Ming Lei @ 2021-04-22 14:17 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K . Petersen, linux-scsi, Satish Kharat,
	Karan Tilak Kumar, David Jeffery

On Thu, Apr 22, 2021 at 08:08:41AM +0800, Ming Lei wrote:
> On Wed, Apr 21, 2021 at 10:14:56PM +0200, Hannes Reinecke wrote:
> > On 4/21/21 9:55 AM, Ming Lei wrote:
> > > Hello Guys,
> > > 
> > > fnic uses the following way to walk scsi commands in failure handling,
> > > which is obvious wrong, because caller of scsi_host_find_tag has to
> > > guarantee that the tag is active.
> > > 
> > >          for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
> > > 				...
> > >                  sc = scsi_host_find_tag(fnic->lport->host, tag);
> > > 				...
> > > 		}
> > > 
> > > Fix the issue by using blk_mq_tagset_busy_iter() to walk
> > > request/scsi_command.
> > > 
> > > thanks,
> > > Ming
> > > 
> > > 
> > > Ming Lei (5):
> > >    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> > >      fnic_terminate_rport_io
> > >    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> > >      fnic_clean_pending_aborts
> > >    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> > >      fnic_cleanup_io
> > >    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> > >      fnic_rport_exch_reset
> > >    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> > >      fnic_is_abts_pending
> > > 
> > >   drivers/scsi/fnic/fnic_scsi.c | 933 ++++++++++++++++++----------------
> > >   1 file changed, 493 insertions(+), 440 deletions(-)
> > > 
> > > Cc: Satish Kharat <satishkh@cisco.com>
> > > Cc: Karan Tilak Kumar <kartilak@cisco.com>
> > > Cc: David Jeffery <djeffery@redhat.com>
> > > 
> > Well, this is actually not that easy for fnic.
> > Problem is the reset hack hch put in some time ago (cf
> > fnic_host_start_tag()), which will cause any TMF to use a tag which is _not_
> > visible to the busy iter.
> 
> 'git grep -n fnic_host_start_tag ./' shows nothing.
> 
> > That will cause the iter to miss any TMF, with unpredictable results if a
> > TMF is running at the same time than, say, a link bounce.
> 
> Wrt. linus tree or next tree, I don't see any issue wrt. your concern.
> 
> > 
> > I have folded this as part of my patchset for reserved commands in SCSI;
> > that way fnic can use 'normal' tags for TMFs, which are then visible to the
> > busy iter and life's good.
> 
> No, this fix is one bug fix, which can't depend on your reserved
> command in SCSI, and they need to be backported to stable tree too.

Hi Hannes,

We have customers report on this issue, could you please let us know
if you will post out one bug-fix only version?

Thanks,
Ming


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands
  2021-04-22 14:17     ` Ming Lei
@ 2021-04-26  7:30       ` Hannes Reinecke
  2021-04-26  8:04         ` Ming Lei
  0 siblings, 1 reply; 15+ messages in thread
From: Hannes Reinecke @ 2021-04-26  7:30 UTC (permalink / raw)
  To: Ming Lei
  Cc: Martin K . Petersen, linux-scsi, Satish Kharat,
	Karan Tilak Kumar, David Jeffery

On 4/22/21 4:17 PM, Ming Lei wrote:
> On Thu, Apr 22, 2021 at 08:08:41AM +0800, Ming Lei wrote:
>> On Wed, Apr 21, 2021 at 10:14:56PM +0200, Hannes Reinecke wrote:
>>> On 4/21/21 9:55 AM, Ming Lei wrote:
>>>> Hello Guys,
>>>>
>>>> fnic uses the following way to walk scsi commands in failure handling,
>>>> which is obvious wrong, because caller of scsi_host_find_tag has to
>>>> guarantee that the tag is active.
>>>>
>>>>          for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
>>>> 				...
>>>>                  sc = scsi_host_find_tag(fnic->lport->host, tag);
>>>> 				...
>>>> 		}
>>>>
>>>> Fix the issue by using blk_mq_tagset_busy_iter() to walk
>>>> request/scsi_command.
>>>>
>>>> thanks,
>>>> Ming
>>>>
>>>>
>>>> Ming Lei (5):
>>>>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
>>>>      fnic_terminate_rport_io
>>>>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
>>>>      fnic_clean_pending_aborts
>>>>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
>>>>      fnic_cleanup_io
>>>>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
>>>>      fnic_rport_exch_reset
>>>>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
>>>>      fnic_is_abts_pending
>>>>
>>>>   drivers/scsi/fnic/fnic_scsi.c | 933 ++++++++++++++++++----------------
>>>>   1 file changed, 493 insertions(+), 440 deletions(-)
>>>>
>>>> Cc: Satish Kharat <satishkh@cisco.com>
>>>> Cc: Karan Tilak Kumar <kartilak@cisco.com>
>>>> Cc: David Jeffery <djeffery@redhat.com>
>>>>
>>> Well, this is actually not that easy for fnic.
>>> Problem is the reset hack hch put in some time ago (cf
>>> fnic_host_start_tag()), which will cause any TMF to use a tag which is _not_
>>> visible to the busy iter.
>>
>> 'git grep -n fnic_host_start_tag ./' shows nothing.
>>
>>> That will cause the iter to miss any TMF, with unpredictable results if a
>>> TMF is running at the same time than, say, a link bounce.
>>
>> Wrt. linus tree or next tree, I don't see any issue wrt. your concern.
>>
>>>
>>> I have folded this as part of my patchset for reserved commands in SCSI;
>>> that way fnic can use 'normal' tags for TMFs, which are then visible to the
>>> busy iter and life's good.
>>
>> No, this fix is one bug fix, which can't depend on your reserved
>> command in SCSI, and they need to be backported to stable tree too.
> 
> Hi Hannes,
> 
> We have customers report on this issue, could you please let us know
> if you will post out one bug-fix only version?
> 
And so have we, and indeed we have the same bug reports.
So I'll be splitting off those patches and send it as a stand-alone
patchset.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		        Kernel Storage Architect
hare@suse.de			               +49 911 74053 688
SUSE Software Solutions Germany GmbH, 90409 Nürnberg
GF: F. Imendörffer, HRB 36809 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands
  2021-04-26  7:30       ` Hannes Reinecke
@ 2021-04-26  8:04         ` Ming Lei
  0 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2021-04-26  8:04 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Martin K . Petersen, linux-scsi, Satish Kharat,
	Karan Tilak Kumar, David Jeffery

On Mon, Apr 26, 2021 at 09:30:45AM +0200, Hannes Reinecke wrote:
> On 4/22/21 4:17 PM, Ming Lei wrote:
> > On Thu, Apr 22, 2021 at 08:08:41AM +0800, Ming Lei wrote:
> >> On Wed, Apr 21, 2021 at 10:14:56PM +0200, Hannes Reinecke wrote:
> >>> On 4/21/21 9:55 AM, Ming Lei wrote:
> >>>> Hello Guys,
> >>>>
> >>>> fnic uses the following way to walk scsi commands in failure handling,
> >>>> which is obvious wrong, because caller of scsi_host_find_tag has to
> >>>> guarantee that the tag is active.
> >>>>
> >>>>          for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
> >>>> 				...
> >>>>                  sc = scsi_host_find_tag(fnic->lport->host, tag);
> >>>> 				...
> >>>> 		}
> >>>>
> >>>> Fix the issue by using blk_mq_tagset_busy_iter() to walk
> >>>> request/scsi_command.
> >>>>
> >>>> thanks,
> >>>> Ming
> >>>>
> >>>>
> >>>> Ming Lei (5):
> >>>>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> >>>>      fnic_terminate_rport_io
> >>>>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> >>>>      fnic_clean_pending_aborts
> >>>>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> >>>>      fnic_cleanup_io
> >>>>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> >>>>      fnic_rport_exch_reset
> >>>>    scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in
> >>>>      fnic_is_abts_pending
> >>>>
> >>>>   drivers/scsi/fnic/fnic_scsi.c | 933 ++++++++++++++++++----------------
> >>>>   1 file changed, 493 insertions(+), 440 deletions(-)
> >>>>
> >>>> Cc: Satish Kharat <satishkh@cisco.com>
> >>>> Cc: Karan Tilak Kumar <kartilak@cisco.com>
> >>>> Cc: David Jeffery <djeffery@redhat.com>
> >>>>
> >>> Well, this is actually not that easy for fnic.
> >>> Problem is the reset hack hch put in some time ago (cf
> >>> fnic_host_start_tag()), which will cause any TMF to use a tag which is _not_
> >>> visible to the busy iter.
> >>
> >> 'git grep -n fnic_host_start_tag ./' shows nothing.
> >>
> >>> That will cause the iter to miss any TMF, with unpredictable results if a
> >>> TMF is running at the same time than, say, a link bounce.
> >>
> >> Wrt. linus tree or next tree, I don't see any issue wrt. your concern.
> >>
> >>>
> >>> I have folded this as part of my patchset for reserved commands in SCSI;
> >>> that way fnic can use 'normal' tags for TMFs, which are then visible to the
> >>> busy iter and life's good.
> >>
> >> No, this fix is one bug fix, which can't depend on your reserved
> >> command in SCSI, and they need to be backported to stable tree too.
> > 
> > Hi Hannes,
> > 
> > We have customers report on this issue, could you please let us know
> > if you will post out one bug-fix only version?
> > 
> And so have we, and indeed we have the same bug reports.
> So I'll be splitting off those patches and send it as a stand-alone
> patchset.
> 

That is great!

Glad to give an review after it is posted out.


Thanks,
Ming


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-04-26  8:04 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-21  7:55 [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Ming Lei
2021-04-21  7:55 ` [PATCH 1/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_terminate_rport_io Ming Lei
2021-04-21  7:55 ` [PATCH 2/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_clean_pending_aborts Ming Lei
2021-04-21  7:55 ` [PATCH 3/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_cleanup_io Ming Lei
2021-04-21  7:55 ` [PATCH 4/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_rport_exch_reset Ming Lei
2021-04-21  7:55 ` [PATCH 5/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands in fnic_is_abts_pending Ming Lei
2021-04-21 10:19 ` [PATCH 0/5] scsi: fnic: use blk_mq_tagset_busy_iter() to walk scsi commands Martin Wilck
2021-04-21 10:40   ` Ming Lei
2021-04-21 12:33     ` Martin Wilck
2021-04-21 14:05       ` Ming Lei
2021-04-21 20:14 ` Hannes Reinecke
2021-04-22  0:08   ` Ming Lei
2021-04-22 14:17     ` Ming Lei
2021-04-26  7:30       ` Hannes Reinecke
2021-04-26  8:04         ` Ming Lei

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.