All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Christie <michael.christie@oracle.com>
To: lduncan@suse.com, cleech@redhat.com, njavali@marvell.com,
	mrangankar@marvell.com, GR-QLogic-Storage-Upstream@marvell.com,
	martin.petersen@oracle.com, linux-scsi@vger.kernel.org,
	jejb@linux.ibm.com
Cc: Mike Christie <michael.christie@oracle.com>
Subject: [PATCH v2 27/28] scsi: qedi: Complete TMF works before disconnect
Date: Tue, 25 May 2021 13:18:20 -0500	[thread overview]
Message-ID: <20210525181821.7617-28-michael.christie@oracle.com> (raw)
In-Reply-To: <20210525181821.7617-1-michael.christie@oracle.com>

We need to make sure that abort and reset completion work has completed
before ep_disconnect returns. After ep_disconnect we can't manipulate
cmds because libiscsi will call conn_stop and take onwership.

We are trying to make sure abort work and reset completion work has
completed before we do the cmd clean up in ep_disconnect. The problem is
that:

1. the work function sets the QEDI_CONN_FW_CLEANUP bit, so if the work was
still pending we would not see the bit set. We need to do this before the
work is queued.

2. If we had multiple works queued then we could break from the loop in
qedi_ep_disconnect early because when abort work 1 completes it could
clear QEDI_CONN_FW_CLEANUP. qedi_ep_disconnect could then see that before
work 2 has run.

3. A TMF reset completion work could run after ep_disconnect starts
cleaning up cmds via qedi_clearsq. ep_disconnect's call to qedi_clearsq
-> qedi_cleanup_all_io would might think it's done cleaning up cmds, but
the reset completion work could still be running. We then return from
ep_disconnect while still doing cleanup.

This replaces the bit with a counter to track the number of queued TMF
works, and adds a bool to prevent new works from starting from the
completion path once a ep_disconnect starts.

Reviewed-by: Manish Rangankar <mrangankar@marvell.com>
Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/scsi/qedi/qedi_fw.c    | 42 ++++++++++++++++++++++------------
 drivers/scsi/qedi/qedi_iscsi.c | 23 +++++++++++++------
 drivers/scsi/qedi/qedi_iscsi.h |  4 ++--
 3 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index 4ee1ce1dcdef..3de1422ce80b 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -156,7 +156,6 @@ static void qedi_tmf_resp_work(struct work_struct *work)
 	struct iscsi_tm_rsp *resp_hdr_ptr;
 	int rval = 0;
 
-	set_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
 	resp_hdr_ptr =  (struct iscsi_tm_rsp *)qedi_cmd->tmf_resp_buf;
 
 	rval = qedi_cleanup_all_io(qedi, qedi_conn, qedi_cmd->task, true);
@@ -169,7 +168,10 @@ static void qedi_tmf_resp_work(struct work_struct *work)
 
 exit_tmf_resp:
 	kfree(resp_hdr_ptr);
-	clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
+
+	spin_lock(&qedi_conn->tmf_work_lock);
+	qedi_conn->fw_cleanup_works--;
+	spin_unlock(&qedi_conn->tmf_work_lock);
 }
 
 static void qedi_process_tmf_resp(struct qedi_ctx *qedi,
@@ -225,16 +227,25 @@ static void qedi_process_tmf_resp(struct qedi_ctx *qedi,
 	}
 	spin_unlock(&qedi_conn->list_lock);
 
-	if (((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) ==
-	      ISCSI_TM_FUNC_LOGICAL_UNIT_RESET) ||
-	    ((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) ==
-	      ISCSI_TM_FUNC_TARGET_WARM_RESET) ||
-	    ((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) ==
-	      ISCSI_TM_FUNC_TARGET_COLD_RESET)) {
+	spin_lock(&qedi_conn->tmf_work_lock);
+	switch (tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) {
+	case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+	case ISCSI_TM_FUNC_TARGET_WARM_RESET:
+	case ISCSI_TM_FUNC_TARGET_COLD_RESET:
+		if (qedi_conn->ep_disconnect_starting) {
+			/* Session is down so ep_disconnect will clean up */
+			spin_unlock(&qedi_conn->tmf_work_lock);
+			goto unblock_sess;
+		}
+
+		qedi_conn->fw_cleanup_works++;
+		spin_unlock(&qedi_conn->tmf_work_lock);
+
 		INIT_WORK(&qedi_cmd->tmf_work, qedi_tmf_resp_work);
 		queue_work(qedi->tmf_thread, &qedi_cmd->tmf_work);
 		goto unblock_sess;
 	}
+	spin_unlock(&qedi_conn->tmf_work_lock);
 
 	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)resp_hdr_ptr, NULL, 0);
 	kfree(resp_hdr_ptr);
@@ -1359,7 +1370,6 @@ static void qedi_abort_work(struct work_struct *work)
 
 	mtask = qedi_cmd->task;
 	tmf_hdr = (struct iscsi_tm *)mtask->hdr;
-	set_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
 
 	spin_lock_bh(&conn->session->back_lock);
 	ctask = iscsi_itt_to_ctask(conn, tmf_hdr->rtt);
@@ -1429,10 +1439,7 @@ static void qedi_abort_work(struct work_struct *work)
 
 send_tmf:
 	send_iscsi_tmf(qedi_conn, qedi_cmd->task, ctask);
-
-clear_cleanup:
-	clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
-	return;
+	goto clear_cleanup;
 
 ldel_exit:
 	spin_lock_bh(&qedi_conn->tmf_work_lock);
@@ -1451,7 +1458,10 @@ static void qedi_abort_work(struct work_struct *work)
 	}
 	spin_unlock(&qedi_conn->list_lock);
 
-	clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
+clear_cleanup:
+	spin_lock(&qedi_conn->tmf_work_lock);
+	qedi_conn->fw_cleanup_works--;
+	spin_unlock(&qedi_conn->tmf_work_lock);
 }
 
 static int send_iscsi_tmf(struct qedi_conn *qedi_conn, struct iscsi_task *mtask,
@@ -1546,6 +1556,10 @@ int qedi_send_iscsi_tmf(struct qedi_conn *qedi_conn, struct iscsi_task *mtask)
 
 	switch (tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) {
 	case ISCSI_TM_FUNC_ABORT_TASK:
+		spin_lock(&qedi_conn->tmf_work_lock);
+		qedi_conn->fw_cleanup_works++;
+		spin_unlock(&qedi_conn->tmf_work_lock);
+
 		INIT_WORK(&qedi_cmd->tmf_work, qedi_abort_work);
 		queue_work(qedi->tmf_thread, &qedi_cmd->tmf_work);
 		break;
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index ddb47784eb4a..bf581ecea897 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -603,7 +603,11 @@ static int qedi_conn_start(struct iscsi_cls_conn *cls_conn)
 		goto start_err;
 	}
 
-	clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
+	spin_lock(&qedi_conn->tmf_work_lock);
+	qedi_conn->fw_cleanup_works = 0;
+	qedi_conn->ep_disconnect_starting = false;
+	spin_unlock(&qedi_conn->tmf_work_lock);
+
 	qedi_conn->abrt_conn = 0;
 
 	rval = iscsi_conn_start(cls_conn);
@@ -1019,7 +1023,6 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
 	int ret = 0;
 	int wait_delay;
 	int abrt_conn = 0;
-	int count = 10;
 
 	wait_delay = 60 * HZ + DEF_MAX_RT_TIME;
 	qedi_ep = ep->dd_data;
@@ -1035,13 +1038,19 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
 		qedi_conn = qedi_ep->conn;
 		abrt_conn = qedi_conn->abrt_conn;
 
-		while (count--)	{
-			if (!test_bit(QEDI_CONN_FW_CLEANUP,
-				      &qedi_conn->flags)) {
-				break;
-			}
+		QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+			  "cid=0x%x qedi_ep=%p waiting for %d tmfs\n",
+			  qedi_ep->iscsi_cid, qedi_ep,
+			  qedi_conn->fw_cleanup_works);
+
+		spin_lock(&qedi_conn->tmf_work_lock);
+		qedi_conn->ep_disconnect_starting = true;
+		while (qedi_conn->fw_cleanup_works > 0) {
+			spin_unlock(&qedi_conn->tmf_work_lock);
 			msleep(1000);
+			spin_lock(&qedi_conn->tmf_work_lock);
 		}
+		spin_unlock(&qedi_conn->tmf_work_lock);
 
 		if (test_bit(QEDI_IN_RECOVERY, &qedi->flags)) {
 			if (qedi_do_not_recover) {
diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h
index 68ef519f5480..758735209e15 100644
--- a/drivers/scsi/qedi/qedi_iscsi.h
+++ b/drivers/scsi/qedi/qedi_iscsi.h
@@ -169,8 +169,8 @@ struct qedi_conn {
 	struct list_head tmf_work_list;
 	wait_queue_head_t wait_queue;
 	spinlock_t tmf_work_lock;	/* tmf work lock */
-	unsigned long flags;
-#define QEDI_CONN_FW_CLEANUP	1
+	bool ep_disconnect_starting;
+	int fw_cleanup_works;
 };
 
 struct qedi_cmd {
-- 
2.25.1


  parent reply	other threads:[~2021-05-25 18:19 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-25 18:17 iSCSI error handler fixes v2 Mike Christie
2021-05-25 18:17 ` [PATCH v2 01/28] scsi: iscsi: Add task completion helper Mike Christie
2021-06-08  3:05   ` Martin K. Petersen
2021-05-25 18:17 ` [PATCH v2 02/28] scsi: iscsi: Stop queueing during ep_disconnect Mike Christie
2021-05-25 18:17 ` [PATCH v2 03/28] scsi: iscsi: Drop suspend calls from ep_disconnect Mike Christie
2021-05-25 18:17 ` [PATCH v2 04/28] scsi: iscsi: Force immediate failure during shutdown Mike Christie
2021-05-25 18:17 ` [PATCH v2 05/28] scsi: iscsi: Use system_unbound_wq for destroy_work Mike Christie
2021-05-25 18:17 ` [PATCH v2 06/28] scsi: iscsi: Rel ref after iscsi_lookup_endpoint Mike Christie
2021-05-25 18:18 ` [PATCH v2 07/28] scsi: iscsi: Fix in-kernel conn failure handling Mike Christie
2021-05-25 18:18 ` [PATCH v2 08/28] scsi: iscsi_tcp: Set no linger Mike Christie
2021-05-25 18:18 ` [PATCH v2 09/28] scsi: iscsi_tcp: Start socket shutdown during conn stop Mike Christie
2021-05-25 18:18 ` [PATCH v2 10/28] scsi: iscsi: Add iscsi_cls_conn refcount helpers Mike Christie
2021-05-25 18:18 ` [PATCH v2 11/28] scsi: iscsi: Have abort handler get ref to conn Mike Christie
2021-05-25 18:18 ` [PATCH v2 12/28] scsi: iscsi: Get ref to conn during reset handling Mike Christie
2021-05-25 18:18 ` [PATCH v2 13/28] scsi: iscsi: Fix conn use after free during resets Mike Christie
2021-05-25 18:18 ` [PATCH v2 14/28] scsi: iscsi: Fix shost->max_id use Mike Christie
2021-05-25 18:18 ` [PATCH v2 15/28] scsi: iscsi: Fix completion check during abort races Mike Christie
2021-05-25 18:18 ` [PATCH v2 16/28] scsi: iscsi: Flush block work before unblock Mike Christie
2021-05-25 18:18 ` [PATCH v2 17/28] scsi: iscsi: Hold task ref during TMF timeout handling Mike Christie
2021-05-25 18:18 ` [PATCH v2 18/28] scsi: iscsi: Move pool freeing Mike Christie
2021-05-25 18:18 ` [PATCH v2 19/28] scsi: qedi: Fix null ref during abort handling Mike Christie
2021-05-25 18:18 ` [PATCH v2 20/28] scsi: qedi: Fix race during abort timeouts Mike Christie
2021-05-25 18:18 ` [PATCH v2 21/28] scsi: qedi: Fix use after free during abort cleanup Mike Christie
2021-05-25 18:18 ` [PATCH v2 22/28] scsi: qedi: Fix TMF tid allocation Mike Christie
2021-05-25 18:18 ` [PATCH v2 23/28] scsi: qedi: Use GFP_NOIO for TMF allocation Mike Christie
2021-05-25 18:18 ` [PATCH v2 24/28] scsi: qedi: Fix TMF session block/unblock use Mike Christie
2021-05-25 18:18 ` [PATCH v2 25/28] scsi: qedi: Fix cleanup " Mike Christie
2021-05-25 18:18 ` [PATCH v2 26/28] scsi: qedi: Pass send_iscsi_tmf task to abort Mike Christie
2021-05-25 18:18 ` Mike Christie [this message]
2021-05-25 18:18 ` [PATCH v2 28/28] scsi: qedi: Wake up if cmd_cleanup_req is set Mike Christie
2021-06-02  5:28 ` iSCSI error handler fixes v2 Martin K. Petersen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210525181821.7617-28-michael.christie@oracle.com \
    --to=michael.christie@oracle.com \
    --cc=GR-QLogic-Storage-Upstream@marvell.com \
    --cc=cleech@redhat.com \
    --cc=jejb@linux.ibm.com \
    --cc=lduncan@suse.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=mrangankar@marvell.com \
    --cc=njavali@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.