Re: [PATCH v6 19/33] target: Avoid that LUN reset sporadically triggers data corruption

From: "Nicholas A. Bellinger" <nab@linux-iscsi.org>
To: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: target-devel@vger.kernel.org, Hannes Reinecke <hare@suse.com>,
	Christoph Hellwig <hch@lst.de>, Andy Grover <agrover@redhat.com>,
	David Disseldorp <ddiss@suse.de>,
	stable@vger.kernel.org
Subject: Re: [PATCH v6 19/33] target: Avoid that LUN reset sporadically triggers data corruption
Date: Mon, 20 Feb 2017 15:52:48 -0800	[thread overview]
Message-ID: <1487634768.19491.50.camel@haakon3.risingtidesystems.com> (raw)
In-Reply-To: <20170215002612.14566-20-bart.vanassche@sandisk.com>

On Tue, 2017-02-14 at 16:25 -0800, Bart Van Assche wrote:
> If on an initiator system a LUN reset is issued while I/O is in
> progress with queue depth > 1, avoid that data corruption occurs
> as follows:
> - The initiator submits a READ (a).
> - The initiator submits a LUN reset before READ (a) completes.
> - The target responds that the LUN reset succeeded after READ (a)
>   has been marked as CMD_T_COMPLETE and before .queue_status() has
>   been called.
> - The initiator receives the LUN reset response and frees the
>   tag used by READ (a).
> - The initiator submits READ (b) and reuses the tag of READ (a).
> - The initiator receives the response for READ (a) and interprets
>   this as a completion for READ (b).
> - The initiator receives the completion for READ (b) and discards
>   it.
> 
> With the SRP initiator and target drivers and when running fio
> concurrently with sg_reset -d it only takes a few minutes to
> reproduce this.
> 

Now this is an interesting one.

AFAICT this can happen regardless of the previous changes in this
series, so I'll treat it as a stand-alone bug-fix.

Comments below.

> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Fixes: commit febe562c20df ("target: Fix LUN_RESET active I/O handling for ACK_KREF")
> Cc: Hannes Reinecke <hare@suse.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Andy Grover <agrover@redhat.com>
> Cc: David Disseldorp <ddiss@suse.de>
> Cc: <stable@vger.kernel.org>
> ---
>  drivers/target/target_core_tmr.c | 9 +++++----
>  1 file changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
> index 32ea7c61d6ac..16e748eb32d2 100644
> --- a/drivers/target/target_core_tmr.c
> +++ b/drivers/target/target_core_tmr.c
> @@ -109,7 +109,7 @@ static int target_check_cdb_and_preempt(struct list_head *list,
>  	return 1;
>  }
>  
> -static bool __target_check_io_state(struct se_cmd *se_cmd,
> +static bool __target_check_io_state(struct se_cmd *se_cmd, u32 skip_flags,
>  				    struct se_session *tmr_sess, int tas)
>  {
>  	struct se_session *sess = se_cmd->se_sess;
> @@ -127,7 +127,7 @@ static bool __target_check_io_state(struct se_cmd *se_cmd,
>  	 * long as se_cmd->cmd_kref is still active unless zero.
>  	 */
>  	spin_lock(&se_cmd->t_state_lock);
> -	if (se_cmd->transport_state & (CMD_T_COMPLETE | CMD_T_FABRIC_STOP)) {
> +	if (se_cmd->transport_state & (skip_flags | CMD_T_FABRIC_STOP)) {
>  		pr_debug("Attempted to abort io tag: %llu already complete or"
>  			" fabric stop, skipping\n", se_cmd->tag);
>  		spin_unlock(&se_cmd->t_state_lock);
> @@ -182,7 +182,8 @@ void core_tmr_abort_task(
>  		printk("ABORT_TASK: Found referenced %s task_tag: %llu\n",
>  			se_cmd->se_tfo->get_fabric_name(), ref_tag);
>  
> -		if (!__target_check_io_state(se_cmd, se_sess, 0))
> +		if (!__target_check_io_state(se_cmd, CMD_T_COMPLETE, se_sess,
> +					     0))
>  			continue;
>  
>  		list_del_init(&se_cmd->se_cmd_list);
> @@ -354,7 +355,7 @@ static void core_tmr_drain_state_list(
>  			continue;
>  
>  		spin_lock(&sess->sess_cmd_lock);
> -		rc = __target_check_io_state(cmd, tmr_sess, tas);
> +		rc = __target_check_io_state(cmd, 0, tmr_sess, tas);
>  		spin_unlock(&sess->sess_cmd_lock);
>  		if (!rc)
>  			continue;

As-is just ignoring CMD_T_COMPLETE in core_tmr_drain_state_list() is not
enough to address this bug and not cause other issues, because once a
se_cmd descriptor is handed back to the fabric driver after
transport_cmd_check_stop_to_fabric() is called,
__target_check_io_state() must not attempt to abort the descriptor.

That said, here is how I'd like to address this particular bug.

1) Allow CMD_T_COMPLETE to occur, but still ignore se_cmds that have
already called transport_cmd_check_stop_to_fabric().  Eg: CMD_T_ACTIVE
is not set, but CMD_T_SENT is set.

2) Since transport_complete_ok() can execute while CMD_T_ABORTED is set,
they both need to check for aborted status, and immediately invoke
transport_cmd_check_stop_to_fabric() if detected.

Here is a first pass at a patch that does both of these things.

Since ib_srpt appears to be the only one who can trigger this bug since
it uses fixed tags, would you be so kind to try to reproduce with this
stand-alone patch..?

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index a806d9b..9ba1427 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -109,25 +109,43 @@ static int target_check_cdb_and_preempt(struct list_head *list,
 	return 1;
 }
 
+static bool target_check_abort_state(struct se_cmd *se_cmd)
+{
+	return (se_cmd->transport_state & (CMD_T_COMPLETE | CMD_T_FABRIC_STOP));
+}
+
+static bool target_check_lur_state(struct se_cmd *se_cmd)
+{
+	return ((se_cmd->transport_state & CMD_T_FABRIC_STOP) ||
+		(!(se_cmd->transport_state & CMD_T_ACTIVE) &&
+		(se_cmd->transport_state & CMD_T_SENT)));
+}
+
 static bool __target_check_io_state(struct se_cmd *se_cmd,
-				    struct se_session *tmr_sess, int tas)
+				    struct se_session *tmr_sess, int tas,
+				    bool (*check_transport_state)(struct se_cmd *))
 {
 	struct se_session *sess = se_cmd->se_sess;
 
 	assert_spin_locked(&sess->sess_cmd_lock);
 	WARN_ON_ONCE(!irqs_disabled());
 	/*
-	 * If command already reached CMD_T_COMPLETE state within
-	 * target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown,
-	 * this se_cmd has been passed to fabric driver and will
-	 * not be aborted.
+	 * For ABORT_TASK, if command already reached CMD_T_COMPLETE
+	 * state within target_complete_cmd() or CMD_T_FABRIC_STOP
+	 * due to shutdown, this se_cmd has been passed to fabric
+	 * driver and will not be aborted.
+	 *
+	 * For LUN_RESET, this is checked using !CMD_T_ACTIVE and
+	 * CMD_T_SENT to determine if se_cmd has already been
+	 * handed off to fabric driver code, and abort here needs
+	 * to be ignored.
 	 *
 	 * Otherwise, obtain a local se_cmd->cmd_kref now for TMR
 	 * ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as
 	 * long as se_cmd->cmd_kref is still active unless zero.
 	 */
 	spin_lock(&se_cmd->t_state_lock);
-	if (se_cmd->transport_state & (CMD_T_COMPLETE | CMD_T_FABRIC_STOP)) {
+	if (check_transport_state(se_cmd)) {
 		pr_debug("Attempted to abort io tag: %llu already complete or"
 			" fabric stop, skipping\n", se_cmd->tag);
 		spin_unlock(&se_cmd->t_state_lock);
@@ -175,7 +193,8 @@ void core_tmr_abort_task(
 		printk("ABORT_TASK: Found referenced %s task_tag: %llu\n",
 			se_cmd->se_tfo->get_fabric_name(), ref_tag);
 
-		if (!__target_check_io_state(se_cmd, se_sess, 0))
+		if (!__target_check_io_state(se_cmd, se_sess, 0,
+					     target_check_abort_state))
 			continue;
 
 		list_del_init(&se_cmd->se_cmd_list);
@@ -339,7 +358,8 @@ static void core_tmr_drain_state_list(
 			continue;
 
 		spin_lock(&sess->sess_cmd_lock);
-		rc = __target_check_io_state(cmd, tmr_sess, tas);
+		rc = __target_check_io_state(cmd, tmr_sess, tas,
+					     target_check_lur_state);
 		spin_unlock(&sess->sess_cmd_lock);
 		if (!rc)
 			continue;
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index efb9e6f..246995a 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2082,6 +2082,10 @@ static void target_complete_ok_work(struct work_struct *work)
 	 */
 	if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) {
 		WARN_ON(!cmd->scsi_status);
+
+		if (cmd->transport_state & CMD_T_ABORTED)
+			goto queue_out;
+
 		ret = transport_send_check_condition_and_sense(
 					cmd, 0, 1);
 		if (ret == -EAGAIN || ret == -ENOMEM)
@@ -2108,6 +2112,9 @@ static void target_complete_ok_work(struct work_struct *work)
 
 			return;
 		} else if (rc) {
+			if (cmd->transport_state & CMD_T_ABORTED)
+				goto queue_out;
+
 			ret = transport_send_check_condition_and_sense(cmd,
 						rc, 0);
 			if (ret == -EAGAIN || ret == -ENOMEM)
@@ -2120,6 +2127,9 @@ static void target_complete_ok_work(struct work_struct *work)
 	}
 
 queue_rsp:
+	if (cmd->transport_state & CMD_T_ABORTED)
+		goto queue_out;
+
 	switch (cmd->data_direction) {
 	case DMA_FROM_DEVICE:
 		if (cmd->scsi_status)
@@ -2174,6 +2184,7 @@ static void target_complete_ok_work(struct work_struct *work)
 		break;
 	}
 
+queue_out:
 	transport_lun_remove_cmd(cmd);
 	transport_cmd_check_stop_to_fabric(cmd);
 	return;
-- 
1.9.1