From: Mike Christie <michael.christie@oracle.com>
To: lduncan@suse.com, cleech@redhat.com, njavali@marvell.com,
mrangankar@marvell.com, GR-QLogic-Storage-Upstream@marvell.com,
martin.petersen@oracle.com, linux-scsi@vger.kernel.org,
jejb@linux.ibm.com
Cc: Mike Christie <michael.christie@oracle.com>
Subject: [PATCH v2 21/28] scsi: qedi: Fix use after free during abort cleanup
Date: Tue, 25 May 2021 13:18:14 -0500 [thread overview]
Message-ID: <20210525181821.7617-22-michael.christie@oracle.com> (raw)
In-Reply-To: <20210525181821.7617-1-michael.christie@oracle.com>
If qedi_tmf_work's qedi_wait_for_cleanup_request call times out we will
also force the clean up of the qedi_work_map but
qedi_process_cmd_cleanup_resp could still be accessing the qedi_cmd.
To fix this issue we extend where we hold the tmf_work_lock and back_lock
so the qedi_process_cmd_cleanup_resp access is serialized with the cleanup
done in qedi_tmf_work and any completion handling for the iscsi_task.
Reviewed-by: Manish Rangankar <mrangankar@marvell.com>
Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
drivers/scsi/qedi/qedi_fw.c | 113 ++++++++++++++++++---------------
drivers/scsi/qedi/qedi_iscsi.h | 1 +
2 files changed, 63 insertions(+), 51 deletions(-)
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index c12bb2dd5ff9..61c1ebb3004c 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -729,7 +729,6 @@ static void qedi_process_nopin_local_cmpl(struct qedi_ctx *qedi,
static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
struct iscsi_cqe_solicited *cqe,
- struct iscsi_task *task,
struct iscsi_conn *conn)
{
struct qedi_work_map *work, *work_tmp;
@@ -741,7 +740,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
u32 iscsi_cid;
struct qedi_conn *qedi_conn;
struct qedi_cmd *dbg_cmd;
- struct iscsi_task *mtask;
+ struct iscsi_task *mtask, *task;
struct iscsi_tm *tmf_hdr = NULL;
iscsi_cid = cqe->conn_id;
@@ -767,6 +766,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
}
found = 1;
mtask = qedi_cmd->task;
+ task = work->ctask;
tmf_hdr = (struct iscsi_tm *)mtask->hdr;
list_del_init(&work->list);
@@ -774,52 +774,47 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
qedi_cmd->list_tmf_work = NULL;
}
}
- spin_unlock_bh(&qedi_conn->tmf_work_lock);
-
- if (found) {
- QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
- "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n",
- proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id);
-
- if ((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) ==
- ISCSI_TM_FUNC_ABORT_TASK) {
- spin_lock_bh(&conn->session->back_lock);
-
- protoitt = build_itt(get_itt(tmf_hdr->rtt),
- conn->session->age);
- task = iscsi_itt_to_task(conn, protoitt);
- spin_unlock_bh(&conn->session->back_lock);
+ if (!found) {
+ spin_unlock_bh(&qedi_conn->tmf_work_lock);
+ goto check_cleanup_reqs;
+ }
- if (!task) {
- QEDI_NOTICE(&qedi->dbg_ctx,
- "IO task completed, tmf rtt=0x%x, cid=0x%x\n",
- get_itt(tmf_hdr->rtt),
- qedi_conn->iscsi_conn_id);
- return;
- }
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
+ "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n",
+ proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id);
+
+ spin_lock_bh(&conn->session->back_lock);
+ if (iscsi_task_is_completed(task)) {
+ QEDI_NOTICE(&qedi->dbg_ctx,
+ "IO task completed, tmf rtt=0x%x, cid=0x%x\n",
+ get_itt(tmf_hdr->rtt), qedi_conn->iscsi_conn_id);
+ goto unlock;
+ }
- dbg_cmd = task->dd_data;
+ dbg_cmd = task->dd_data;
- QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
- "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o tid=0x%x, cid=0x%x\n",
- get_itt(tmf_hdr->rtt), get_itt(task->itt),
- dbg_cmd->task_id, qedi_conn->iscsi_conn_id);
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
+ "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o tid=0x%x, cid=0x%x\n",
+ get_itt(tmf_hdr->rtt), get_itt(task->itt), dbg_cmd->task_id,
+ qedi_conn->iscsi_conn_id);
- if (qedi_cmd->state == CLEANUP_WAIT_FAILED)
- qedi_cmd->state = CLEANUP_RECV;
+ spin_lock(&qedi_conn->list_lock);
+ if (likely(dbg_cmd->io_cmd_in_list)) {
+ dbg_cmd->io_cmd_in_list = false;
+ list_del_init(&dbg_cmd->io_cmd);
+ qedi_conn->active_cmd_count--;
+ }
+ spin_unlock(&qedi_conn->list_lock);
+ qedi_cmd->state = CLEANUP_RECV;
+unlock:
+ spin_unlock_bh(&conn->session->back_lock);
+ spin_unlock_bh(&qedi_conn->tmf_work_lock);
+ wake_up_interruptible(&qedi_conn->wait_queue);
+ return;
- spin_lock(&qedi_conn->list_lock);
- if (likely(dbg_cmd->io_cmd_in_list)) {
- dbg_cmd->io_cmd_in_list = false;
- list_del_init(&dbg_cmd->io_cmd);
- qedi_conn->active_cmd_count--;
- }
- spin_unlock(&qedi_conn->list_lock);
- qedi_cmd->state = CLEANUP_RECV;
- wake_up_interruptible(&qedi_conn->wait_queue);
- }
- } else if (qedi_conn->cmd_cleanup_req > 0) {
+check_cleanup_reqs:
+ if (qedi_conn->cmd_cleanup_req > 0) {
spin_lock_bh(&conn->session->back_lock);
qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt);
protoitt = build_itt(ptmp_itt, conn->session->age);
@@ -842,6 +837,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_TID,
"Freeing tid=0x%x for cid=0x%x\n",
cqe->itid, qedi_conn->iscsi_conn_id);
+ qedi_clear_task_idx(qedi_conn->qedi, cqe->itid);
} else {
qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt);
@@ -944,8 +940,7 @@ void qedi_fp_process_cqes(struct qedi_work *work)
goto exit_fp_process;
case ISCSI_CQE_TYPE_TASK_CLEANUP:
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "CleanUp CqE\n");
- qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited, task,
- conn);
+ qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited, conn);
goto exit_fp_process;
default:
QEDI_ERR(&qedi->dbg_ctx, "Error cqe.\n");
@@ -1372,11 +1367,25 @@ static void qedi_tmf_work(struct work_struct *work)
tmf_hdr = (struct iscsi_tm *)mtask->hdr;
set_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
- ctask = iscsi_itt_to_task(conn, tmf_hdr->rtt);
- if (!ctask || !ctask->sc) {
- QEDI_ERR(&qedi->dbg_ctx, "Task already completed\n");
- goto abort_ret;
+ spin_lock_bh(&conn->session->back_lock);
+ ctask = iscsi_itt_to_ctask(conn, tmf_hdr->rtt);
+ if (!ctask) {
+ spin_unlock_bh(&conn->session->back_lock);
+ QEDI_ERR(&qedi->dbg_ctx, "Invalid RTT. Letting abort timeout.\n");
+ goto clear_cleanup;
+ }
+
+ if (iscsi_task_is_completed(ctask)) {
+ spin_unlock_bh(&conn->session->back_lock);
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "Task already completed\n");
+ /*
+ * We have to still send the TMF because libiscsi needs the
+ * response to avoid a timeout.
+ */
+ goto send_tmf;
}
+ spin_unlock_bh(&conn->session->back_lock);
cmd = (struct qedi_cmd *)ctask->dd_data;
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
@@ -1387,19 +1396,20 @@ static void qedi_tmf_work(struct work_struct *work)
if (qedi_do_not_recover) {
QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n",
qedi_do_not_recover);
- goto abort_ret;
+ goto clear_cleanup;
}
list_work = kzalloc(sizeof(*list_work), GFP_ATOMIC);
if (!list_work) {
QEDI_ERR(&qedi->dbg_ctx, "Memory allocation failed\n");
- goto abort_ret;
+ goto clear_cleanup;
}
qedi_cmd->type = TYPEIO;
list_work->qedi_cmd = qedi_cmd;
list_work->rtid = cmd->task_id;
list_work->state = QEDI_WORK_SCHEDULED;
+ list_work->ctask = ctask;
qedi_cmd->list_tmf_work = list_work;
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
@@ -1422,6 +1432,7 @@ static void qedi_tmf_work(struct work_struct *work)
goto ldel_exit;
}
+send_tmf:
tid = qedi_get_task_idx(qedi);
if (tid == -1) {
QEDI_ERR(&qedi->dbg_ctx, "Invalid tid, cid=0x%x\n",
@@ -1432,7 +1443,7 @@ static void qedi_tmf_work(struct work_struct *work)
qedi_cmd->task_id = tid;
qedi_send_iscsi_tmf(qedi_conn, qedi_cmd->task);
-abort_ret:
+clear_cleanup:
clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
return;
diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h
index 39dc27c85e3c..68ef519f5480 100644
--- a/drivers/scsi/qedi/qedi_iscsi.h
+++ b/drivers/scsi/qedi/qedi_iscsi.h
@@ -212,6 +212,7 @@ struct qedi_cmd {
struct qedi_work_map {
struct list_head list;
struct qedi_cmd *qedi_cmd;
+ struct iscsi_task *ctask;
int rtid;
int state;
--
2.25.1
next prev parent reply other threads:[~2021-05-25 18:19 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-25 18:17 iSCSI error handler fixes v2 Mike Christie
2021-05-25 18:17 ` [PATCH v2 01/28] scsi: iscsi: Add task completion helper Mike Christie
2021-06-08 3:05 ` Martin K. Petersen
2021-05-25 18:17 ` [PATCH v2 02/28] scsi: iscsi: Stop queueing during ep_disconnect Mike Christie
2021-05-25 18:17 ` [PATCH v2 03/28] scsi: iscsi: Drop suspend calls from ep_disconnect Mike Christie
2021-05-25 18:17 ` [PATCH v2 04/28] scsi: iscsi: Force immediate failure during shutdown Mike Christie
2021-05-25 18:17 ` [PATCH v2 05/28] scsi: iscsi: Use system_unbound_wq for destroy_work Mike Christie
2021-05-25 18:17 ` [PATCH v2 06/28] scsi: iscsi: Rel ref after iscsi_lookup_endpoint Mike Christie
2021-05-25 18:18 ` [PATCH v2 07/28] scsi: iscsi: Fix in-kernel conn failure handling Mike Christie
2021-05-25 18:18 ` [PATCH v2 08/28] scsi: iscsi_tcp: Set no linger Mike Christie
2021-05-25 18:18 ` [PATCH v2 09/28] scsi: iscsi_tcp: Start socket shutdown during conn stop Mike Christie
2021-05-25 18:18 ` [PATCH v2 10/28] scsi: iscsi: Add iscsi_cls_conn refcount helpers Mike Christie
2021-05-25 18:18 ` [PATCH v2 11/28] scsi: iscsi: Have abort handler get ref to conn Mike Christie
2021-05-25 18:18 ` [PATCH v2 12/28] scsi: iscsi: Get ref to conn during reset handling Mike Christie
2021-05-25 18:18 ` [PATCH v2 13/28] scsi: iscsi: Fix conn use after free during resets Mike Christie
2021-05-25 18:18 ` [PATCH v2 14/28] scsi: iscsi: Fix shost->max_id use Mike Christie
2021-05-25 18:18 ` [PATCH v2 15/28] scsi: iscsi: Fix completion check during abort races Mike Christie
2021-05-25 18:18 ` [PATCH v2 16/28] scsi: iscsi: Flush block work before unblock Mike Christie
2021-05-25 18:18 ` [PATCH v2 17/28] scsi: iscsi: Hold task ref during TMF timeout handling Mike Christie
2021-05-25 18:18 ` [PATCH v2 18/28] scsi: iscsi: Move pool freeing Mike Christie
2021-05-25 18:18 ` [PATCH v2 19/28] scsi: qedi: Fix null ref during abort handling Mike Christie
2021-05-25 18:18 ` [PATCH v2 20/28] scsi: qedi: Fix race during abort timeouts Mike Christie
2021-05-25 18:18 ` Mike Christie [this message]
2021-05-25 18:18 ` [PATCH v2 22/28] scsi: qedi: Fix TMF tid allocation Mike Christie
2021-05-25 18:18 ` [PATCH v2 23/28] scsi: qedi: Use GFP_NOIO for TMF allocation Mike Christie
2021-05-25 18:18 ` [PATCH v2 24/28] scsi: qedi: Fix TMF session block/unblock use Mike Christie
2021-05-25 18:18 ` [PATCH v2 25/28] scsi: qedi: Fix cleanup " Mike Christie
2021-05-25 18:18 ` [PATCH v2 26/28] scsi: qedi: Pass send_iscsi_tmf task to abort Mike Christie
2021-05-25 18:18 ` [PATCH v2 27/28] scsi: qedi: Complete TMF works before disconnect Mike Christie
2021-05-25 18:18 ` [PATCH v2 28/28] scsi: qedi: Wake up if cmd_cleanup_req is set Mike Christie
2021-06-02 5:28 ` iSCSI error handler fixes v2 Martin K. Petersen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210525181821.7617-22-michael.christie@oracle.com \
--to=michael.christie@oracle.com \
--cc=GR-QLogic-Storage-Upstream@marvell.com \
--cc=cleech@redhat.com \
--cc=jejb@linux.ibm.com \
--cc=lduncan@suse.com \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
--cc=mrangankar@marvell.com \
--cc=njavali@marvell.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).