All of lore.kernel.org
 help / color / mirror / Atom feed
From: Manish Rangankar <mrangankar@marvell.com>
To: Mike Christie <michael.christie@oracle.com>,
	"lduncan@suse.com" <lduncan@suse.com>,
	"martin.petersen@oracle.com" <martin.petersen@oracle.com>,
	Santosh Vernekar <svernekar@marvell.com>,
	"linux-scsi@vger.kernel.org" <linux-scsi@vger.kernel.org>,
	"jejb@linux.ibm.com" <jejb@linux.ibm.com>
Subject: RE: [EXT] [PATCH 05/13] scsi: qedi: fix use after free during abort cleanup
Date: Mon, 12 Apr 2021 10:26:51 +0000	[thread overview]
Message-ID: <BYAPR18MB299833B19DE44B0A4519A2B9D8709@BYAPR18MB2998.namprd18.prod.outlook.com> (raw)
In-Reply-To: <20210410184016.21603-6-michael.christie@oracle.com>


> -----Original Message-----
> From: Mike Christie <michael.christie@oracle.com>
> Sent: Sunday, April 11, 2021 12:10 AM
> To: lduncan@suse.com; martin.petersen@oracle.com; Manish Rangankar
> <mrangankar@marvell.com>; Santosh Vernekar <svernekar@marvell.com>;
> linux-scsi@vger.kernel.org; jejb@linux.ibm.com
> Cc: Mike Christie <michael.christie@oracle.com>
> Subject: [EXT] [PATCH 05/13] scsi: qedi: fix use after free during abort cleanup
> 
> External Email
> 
> ----------------------------------------------------------------------
> This fixes two bugs:
> 
> 1. The scsi cmd task could be completed and the abort could timeout while we
> are running qedi_tmf_work so we need to get a ref to the task.
> 
> 2. If qedi_tmf_work's qedi_wait_for_cleanup_request call times out then we will
> also force the clean up of the qedi_work_map but
> qedi_process_cmd_cleanup_resp could still be accessing the qedi_cmd for the
> abort tmf. We can then race where qedi_process_cmd_cleanup_resp is still
> accessing the mtask's qedi_cmd but libiscsi has escalated to session level
> cleanup and is cleaning up the mtask while we are still accessing it.
> 
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
>  drivers/scsi/qedi/qedi_fw.c    | 110 ++++++++++++++++++---------------
>  drivers/scsi/qedi/qedi_iscsi.h |   1 +
>  2 files changed, 61 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index
> ad4357e4c15d..c5699421ec37 100644
> --- a/drivers/scsi/qedi/qedi_fw.c
> +++ b/drivers/scsi/qedi/qedi_fw.c
> @@ -729,7 +729,6 @@ static void qedi_process_nopin_local_cmpl(struct
> qedi_ctx *qedi,
> 
>  static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
>  					  struct iscsi_cqe_solicited *cqe,
> -					  struct iscsi_task *task,
>  					  struct iscsi_conn *conn)
>  {
>  	struct qedi_work_map *work, *work_tmp; @@ -742,7 +741,7 @@
> static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
>  	u32 iscsi_cid;
>  	struct qedi_conn *qedi_conn;
>  	struct qedi_cmd *dbg_cmd;
> -	struct iscsi_task *mtask;
> +	struct iscsi_task *mtask, *task;
>  	struct iscsi_tm *tmf_hdr = NULL;
> 
>  	iscsi_cid = cqe->conn_id;
> @@ -768,6 +767,7 @@ static void qedi_process_cmd_cleanup_resp(struct
> qedi_ctx *qedi,
>  			}
>  			found = 1;
>  			mtask = qedi_cmd->task;
> +			task = work->ctask;
>  			tmf_hdr = (struct iscsi_tm *)mtask->hdr;
>  			rtid = work->rtid;
> 
> @@ -776,52 +776,47 @@ static void qedi_process_cmd_cleanup_resp(struct
> qedi_ctx *qedi,
>  			qedi_cmd->list_tmf_work = NULL;
>  		}
>  	}
> -	spin_unlock_bh(&qedi_conn->tmf_work_lock);
> -
> -	if (found) {
> -		QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
> -			  "TMF work, cqe->tid=0x%x, tmf flags=0x%x,
> cid=0x%x\n",
> -			  proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id);
> -
> -		if ((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) ==
> -		    ISCSI_TM_FUNC_ABORT_TASK) {
> -			spin_lock_bh(&conn->session->back_lock);
> 
> -			protoitt = build_itt(get_itt(tmf_hdr->rtt),
> -					     conn->session->age);
> -			task = iscsi_itt_to_task(conn, protoitt);
> -
> -			spin_unlock_bh(&conn->session->back_lock);
> +	if (!found) {
> +		spin_unlock_bh(&qedi_conn->tmf_work_lock);
> +		goto check_cleanup_reqs;
> +	}
> 
> -			if (!task) {
> -				QEDI_NOTICE(&qedi->dbg_ctx,
> -					    "IO task completed, tmf rtt=0x%x,
> cid=0x%x\n",
> -					    get_itt(tmf_hdr->rtt),
> -					    qedi_conn->iscsi_conn_id);
> -				return;
> -			}
> +	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
> +		  "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n",
> +		  proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id);
> +
> +	spin_lock_bh(&conn->session->back_lock);
> +	if (iscsi_task_is_completed(task)) {
> +		QEDI_NOTICE(&qedi->dbg_ctx,
> +			    "IO task completed, tmf rtt=0x%x, cid=0x%x\n",
> +			   get_itt(tmf_hdr->rtt), qedi_conn->iscsi_conn_id);
> +		goto unlock;
> +	}
> 
> -			dbg_cmd = task->dd_data;
> +	dbg_cmd = task->dd_data;
> 
> -			QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
> -				  "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o
> tid=0x%x, cid=0x%x\n",
> -				  get_itt(tmf_hdr->rtt), get_itt(task->itt),
> -				  dbg_cmd->task_id, qedi_conn-
> >iscsi_conn_id);
> +	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
> +		  "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o tid=0x%x, cid=0x%x\n",
> +		  get_itt(tmf_hdr->rtt), get_itt(task->itt), dbg_cmd->task_id,
> +		  qedi_conn->iscsi_conn_id);
> 
> -			if (qedi_cmd->state == CLEANUP_WAIT_FAILED)
> -				qedi_cmd->state = CLEANUP_RECV;
> +	spin_lock(&qedi_conn->list_lock);
> +	if (likely(dbg_cmd->io_cmd_in_list)) {
> +		dbg_cmd->io_cmd_in_list = false;
> +		list_del_init(&dbg_cmd->io_cmd);
> +		qedi_conn->active_cmd_count--;
> +	}
> +	spin_unlock(&qedi_conn->list_lock);
> +	qedi_cmd->state = CLEANUP_RECV;
> +unlock:
> +	spin_unlock_bh(&conn->session->back_lock);
> +	spin_unlock_bh(&qedi_conn->tmf_work_lock);
> +	wake_up_interruptible(&qedi_conn->wait_queue);
> +	return;
> 
> -			spin_lock(&qedi_conn->list_lock);
> -			if (likely(dbg_cmd->io_cmd_in_list)) {
> -				dbg_cmd->io_cmd_in_list = false;
> -				list_del_init(&dbg_cmd->io_cmd);
> -				qedi_conn->active_cmd_count--;
> -			}
> -			spin_unlock(&qedi_conn->list_lock);
> -			qedi_cmd->state = CLEANUP_RECV;
> -			wake_up_interruptible(&qedi_conn->wait_queue);
> -		}
> -	} else if (qedi_conn->cmd_cleanup_req > 0) {
> +check_cleanup_reqs:
> +	if (qedi_conn->cmd_cleanup_req > 0) {
>  		spin_lock_bh(&conn->session->back_lock);
>  		qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt);
>  		protoitt = build_itt(ptmp_itt, conn->session->age); @@ -844,6
> +839,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
>  		QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_TID,
>  			  "Freeing tid=0x%x for cid=0x%x\n",
>  			  cqe->itid, qedi_conn->iscsi_conn_id);
> +		qedi_clear_task_idx(qedi_conn->qedi, cqe->itid);
> 
>  	} else {
>  		qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt); @@ -946,8
> +942,7 @@ void qedi_fp_process_cqes(struct qedi_work *work)
>  		goto exit_fp_process;
>  	case ISCSI_CQE_TYPE_TASK_CLEANUP:
>  		QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "CleanUp
> CqE\n");
> -		qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited,
> task,
> -					      conn);
> +		qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited,
> conn);
>  		goto exit_fp_process;
>  	default:
>  		QEDI_ERR(&qedi->dbg_ctx, "Error cqe.\n"); @@ -1374,12
> +1369,22 @@ static void qedi_tmf_work(struct work_struct *work)
>  	tmf_hdr = (struct iscsi_tm *)mtask->hdr;
>  	set_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
> 
> -	ctask = iscsi_itt_to_task(conn, tmf_hdr->rtt);
> -	if (!ctask || !ctask->sc) {
> +	spin_lock_bh(&conn->session->back_lock);
> +	ctask = iscsi_itt_to_ctask(conn, tmf_hdr->rtt);
> +	if (!ctask || iscsi_task_is_completed(ctask)) {
> +		spin_unlock_bh(&conn->session->back_lock);
>  		QEDI_ERR(&qedi->dbg_ctx, "Task already completed\n");
> -		goto abort_ret;
> +		goto clear_cleanup;
>  	}
> 
> +	/*
> +	 * libiscsi gets a ref before sending the abort, but if libiscsi times
> +	 * it out then it could release it and the cmd could complete from
> +	 * under us.
> +	 */
> +	__iscsi_get_task(ctask);
> +	spin_unlock_bh(&conn->session->back_lock);
> +
>  	cmd = (struct qedi_cmd *)ctask->dd_data;
>  	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
>  		  "Abort tmf rtt=0x%x, cmd itt=0x%x, cmd tid=0x%x,
> cid=0x%x\n", @@ -1389,19 +1394,20 @@ static void qedi_tmf_work(struct
> work_struct *work)
>  	if (qedi_do_not_recover) {
>  		QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT
> %d\n",
>  			 qedi_do_not_recover);
> -		goto abort_ret;
> +		goto put_task;
>  	}
> 
>  	list_work = kzalloc(sizeof(*list_work), GFP_ATOMIC);
>  	if (!list_work) {
>  		QEDI_ERR(&qedi->dbg_ctx, "Memory allocation failed\n");
> -		goto abort_ret;
> +		goto put_task;
>  	}
> 
>  	qedi_cmd->type = TYPEIO;
>  	list_work->qedi_cmd = qedi_cmd;
>  	list_work->rtid = cmd->task_id;
>  	list_work->state = QEDI_WORK_SCHEDULED;
> +	list_work->ctask = ctask;
>  	qedi_cmd->list_tmf_work = list_work;
> 
>  	QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, @@ -1434,7 +1440,9
> @@ static void qedi_tmf_work(struct work_struct *work)
>  	qedi_cmd->task_id = tid;
>  	qedi_send_iscsi_tmf(qedi_conn, qedi_cmd->task);
> 
> -abort_ret:
> +put_task:
> +	iscsi_put_task(ctask);
> +clear_cleanup:
>  	clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
>  	return;
> 
> @@ -1455,6 +1463,8 @@ static void qedi_tmf_work(struct work_struct *work)
>  	}
>  	spin_unlock(&qedi_conn->list_lock);
> 
> +	iscsi_put_task(ctask);
> +
>  	clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);  }
> 
> diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h index
> 39dc27c85e3c..68ef519f5480 100644
> --- a/drivers/scsi/qedi/qedi_iscsi.h
> +++ b/drivers/scsi/qedi/qedi_iscsi.h
> @@ -212,6 +212,7 @@ struct qedi_cmd {
>  struct qedi_work_map {
>  	struct list_head list;
>  	struct qedi_cmd *qedi_cmd;
> +	struct iscsi_task *ctask;
>  	int rtid;
> 
>  	int state;
> --
> 2.25.1

Thanks,
Reviewed-by: Manish Rangankar <mrangankar@marvell.com>


  reply	other threads:[~2021-04-12 10:27 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-10 18:40 [RFC PATCH 0/13]: qedi tmf fixes Mike Christie
2021-04-10 18:40 ` [PATCH 01/13] scsi: iscsi: revert Perform connection failure entirely in kernel space Mike Christie
2021-04-10 18:40 ` [PATCH 02/13] scsi: qedi: fix null ref during abort handling Mike Christie
2021-04-12 10:25   ` [EXT] " Manish Rangankar
2021-04-10 18:40 ` [PATCH 03/13] scsi: iscsi: add task completion helper Mike Christie
2021-04-10 18:40 ` [PATCH 04/13] scsi: qedi: fix abort vs cmd re-use race Mike Christie
2021-04-12 10:24   ` [EXT] " Manish Rangankar
2021-04-10 18:40 ` [PATCH 05/13] scsi: qedi: fix use after free during abort cleanup Mike Christie
2021-04-12 10:26   ` Manish Rangankar [this message]
2021-04-10 18:40 ` [PATCH 06/13] scsi: qedi: fix tmf tid allocation Mike Christie
2021-04-12 10:27   ` [EXT] " Manish Rangankar
2021-04-10 18:40 ` [PATCH 07/13] scsi: qedi: use GFP_NOIO for tmf allocation Mike Christie
2021-04-12 10:28   ` [EXT] " Manish Rangankar
2021-04-10 18:40 ` [PATCH 08/13] scsi: iscsi: fix lun/target reset cmd cleanup handling Mike Christie
2021-04-10 18:40 ` [PATCH 09/13] scsi: qedi: fix session block/unblock abuse during tmf handling Mike Christie
2021-04-12 10:31   ` [EXT] " Manish Rangankar
2021-04-10 18:40 ` [PATCH 10/13] scsi: qedi: fix session block/unblock abuse during cleanup Mike Christie
2021-04-12 10:33   ` [EXT] " Manish Rangankar
2021-04-10 18:40 ` [PATCH 11/13] scsi: qedi: pass send_iscsi_tmf task to abort Mike Christie
2021-04-12 10:34   ` [EXT] " Manish Rangankar
2021-04-10 18:40 ` [PATCH 12/13] scsi: qedi: make sure tmf works are done before disconnect Mike Christie
2021-04-12 10:36   ` [EXT] " Manish Rangankar
2021-04-10 18:40 ` [PATCH 13/13] scsi: qedi: always wake up if cmd_cleanup_req is set Mike Christie
2021-04-12 10:35   ` [EXT] " Manish Rangankar
2021-04-12 10:22 ` [EXT] [RFC PATCH 0/13]: qedi tmf fixes Manish Rangankar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=BYAPR18MB299833B19DE44B0A4519A2B9D8709@BYAPR18MB2998.namprd18.prod.outlook.com \
    --to=mrangankar@marvell.com \
    --cc=jejb@linux.ibm.com \
    --cc=lduncan@suse.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=michael.christie@oracle.com \
    --cc=svernekar@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.