linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
@ 2018-02-07  1:11 Bart Van Assche
  2018-02-07 17:06 ` Tejun Heo
  0 siblings, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-07  1:11 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, Christoph Hellwig, Bart Van Assche, Tejun Heo

The following race can occur between the code that resets the timer
and completion handling:
- The code that handles BLK_EH_RESET_TIMER resets aborted_gstate.
- A completion occurs and blk_mq_complete_request() calls
  __blk_mq_complete_request().
- The timeout code calls blk_add_timer() and that function sets the
  request deadline and adjusts the timer.
- __blk_mq_complete_request() frees the request tag.
- The timer fires and the timeout handler gets called for a freed
  request.

Since I have not found any approach to fix this race that does not
require the use of atomic instructions to manipulate the request state,
fix this race as follows:
- Introduce a spinlock to protecte request state and deadline changes.
- Use the deadline instead of the request generation to detect whether
  or not a request timer got fired after reinitialization of a request.
- Store the request state in the lowest two bits of the deadline instead
  of the lowest to bits of 'gstate'.
- Remove all request member variables that became superfluous due to
  this change: gstate, aborted_gstate, gstate_seq and aborted_gstate_sync.
- Remove the state information that became superfluous due to this patch,
  namely RQF_MQ_TIMEOUT_EXPIRED.
- Remove the code that became superfluous due to this change, namely
  the RCU lock and unlock statements in blk_mq_complete_request() and
  also the synchronize_rcu() call in the timeout handler.

This patch fixes the following kernel crash:

BUG: unable to handle kernel NULL pointer dereference at           (null)
Oops: 0000 [#1] PREEMPT SMP
CPU: 2 PID: 151 Comm: kworker/2:1H Tainted: G        W        4.15.0-dbg+ #3
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
Workqueue: kblockd blk_mq_timeout_work
RIP: 0010:scsi_times_out+0x17/0x2c0 [scsi_mod]
Call Trace:
blk_mq_terminate_expired+0x42/0x80
bt_iter+0x3d/0x50
blk_mq_queue_tag_busy_iter+0xe9/0x200
blk_mq_timeout_work+0x181/0x2e0
process_one_work+0x21c/0x6d0
worker_thread+0x35/0x380
kthread+0x117/0x130
ret_from_fork+0x24/0x30

Fixes: 1d9bd5161ba3 ("blk-mq: replace timeout synchronization with a RCU and generation based scheme")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Tejun Heo <tj@kernel.org>
---
 block/blk-core.c       |   3 +-
 block/blk-mq-debugfs.c |   1 -
 block/blk-mq.c         | 178 +++++++++++--------------------------------------
 block/blk-mq.h         |  25 ++-----
 block/blk-timeout.c    |   1 -
 block/blk.h            |   4 +-
 include/linux/blkdev.h |  28 ++------
 7 files changed, 53 insertions(+), 187 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d0d104268f1a..2e3cf04f12a8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -126,8 +126,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	rq->start_time = jiffies;
 	set_start_time_ns(rq);
 	rq->part = NULL;
-	seqcount_init(&rq->gstate_seq);
-	u64_stats_init(&rq->aborted_gstate_sync);
+	spin_lock_init(&rq->state_lock);
 }
 EXPORT_SYMBOL(blk_rq_init);
 
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 21cbc1f071c6..43084d64aa41 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -290,7 +290,6 @@ static const char *const rqf_name[] = {
 	RQF_NAME(STATS),
 	RQF_NAME(SPECIAL_PAYLOAD),
 	RQF_NAME(ZONE_WRITE_LOCKED),
-	RQF_NAME(MQ_TIMEOUT_EXPIRED),
 	RQF_NAME(MQ_POLL_SLEPT),
 };
 #undef RQF_NAME
diff --git a/block/blk-mq.c b/block/blk-mq.c
index df93102e2149..e17a2536ac50 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -309,7 +309,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	rq->special = NULL;
 	/* tag was already set */
 	rq->extra_len = 0;
-	rq->__deadline = 0;
 
 	INIT_LIST_HEAD(&rq->timeout_list);
 	rq->timeout = 0;
@@ -531,8 +530,7 @@ static void __blk_mq_complete_request(struct request *rq)
 	bool shared = false;
 	int cpu;
 
-	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT);
-	blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE);
+	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_COMPLETE);
 
 	if (rq->internal_tag != -1)
 		blk_mq_sched_completed_request(rq);
@@ -581,34 +579,26 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx)
 		*srcu_idx = srcu_read_lock(hctx->srcu);
 }
 
-static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate)
+/**
+ * blk_mq_change_rq_state - atomically test and set request state
+ * @rq: Request pointer.
+ * @old: Old request state.
+ * @new: New request state.
+ */
+static bool blk_mq_change_rq_state(struct request *rq, enum mq_rq_state old,
+				   enum mq_rq_state new)
 {
 	unsigned long flags;
+	bool changed_state = false;
 
-	/*
-	 * blk_mq_rq_aborted_gstate() is used from the completion path and
-	 * can thus be called from irq context.  u64_stats_fetch in the
-	 * middle of update on the same CPU leads to lockup.  Disable irq
-	 * while updating.
-	 */
-	local_irq_save(flags);
-	u64_stats_update_begin(&rq->aborted_gstate_sync);
-	rq->aborted_gstate = gstate;
-	u64_stats_update_end(&rq->aborted_gstate_sync);
-	local_irq_restore(flags);
-}
-
-static u64 blk_mq_rq_aborted_gstate(struct request *rq)
-{
-	unsigned int start;
-	u64 aborted_gstate;
-
-	do {
-		start = u64_stats_fetch_begin(&rq->aborted_gstate_sync);
-		aborted_gstate = rq->aborted_gstate;
-	} while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start));
+	spin_lock_irqsave(&rq->state_lock, flags);
+	if (blk_mq_rq_state(rq) == old) {
+		blk_mq_rq_update_state(rq, new);
+		changed_state = true;
+	}
+	spin_unlock_irqrestore(&rq->state_lock, flags);
 
-	return aborted_gstate;
+	return changed_state;
 }
 
 /**
@@ -622,27 +612,12 @@ static u64 blk_mq_rq_aborted_gstate(struct request *rq)
 void blk_mq_complete_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
-	int srcu_idx;
 
 	if (unlikely(blk_should_fake_timeout(q)))
 		return;
 
-	/*
-	 * If @rq->aborted_gstate equals the current instance, timeout is
-	 * claiming @rq and we lost.  This is synchronized through
-	 * hctx_lock().  See blk_mq_timeout_work() for details.
-	 *
-	 * Completion path never blocks and we can directly use RCU here
-	 * instead of hctx_lock() which can be either RCU or SRCU.
-	 * However, that would complicate paths which want to synchronize
-	 * against us.  Let stay in sync with the issue path so that
-	 * hctx_lock() covers both issue and completion paths.
-	 */
-	hctx_lock(hctx, &srcu_idx);
-	if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
+	if (blk_mq_change_rq_state(rq, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE))
 		__blk_mq_complete_request(rq);
-	hctx_unlock(hctx, srcu_idx);
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
@@ -669,24 +644,14 @@ void blk_mq_start_request(struct request *rq)
 	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
 
 	/*
-	 * Mark @rq in-flight which also advances the generation number,
-	 * and register for timeout.  Protect with a seqcount to allow the
-	 * timeout path to read both @rq->gstate and @rq->deadline
-	 * coherently.
-	 *
-	 * This is the only place where a request is marked in-flight.  If
-	 * the timeout path reads an in-flight @rq->gstate, the
-	 * @rq->deadline it reads together under @rq->gstate_seq is
-	 * guaranteed to be the matching one.
+	 * Mark @rq in-flight and register for timeout. Because blk_add_timer()
+	 * updates the deadline, if a timer set by a previous incarnation of
+	 * this request fires this request will be skipped by the timeout code.
 	 */
-	preempt_disable();
-	write_seqcount_begin(&rq->gstate_seq);
-
+	spin_lock_irq(&rq->state_lock);
 	blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
 	blk_add_timer(rq);
-
-	write_seqcount_end(&rq->gstate_seq);
-	preempt_enable();
+	spin_unlock_irq(&rq->state_lock);
 
 	if (q->dma_drain_size && blk_rq_bytes(rq)) {
 		/*
@@ -699,11 +664,6 @@ void blk_mq_start_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_start_request);
 
-/*
- * When we reach here because queue is busy, it's safe to change the state
- * to IDLE without checking @rq->aborted_gstate because we should still be
- * holding the RCU read lock and thus protected against timeout.
- */
 static void __blk_mq_requeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
@@ -813,15 +773,13 @@ EXPORT_SYMBOL(blk_mq_tag_to_rq);
 struct blk_mq_timeout_data {
 	unsigned long next;
 	unsigned int next_set;
-	unsigned int nr_expired;
 };
 
 static void blk_mq_rq_timed_out(struct request *req, bool reserved)
 {
 	const struct blk_mq_ops *ops = req->q->mq_ops;
 	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
-
-	req->rq_flags |= RQF_MQ_TIMEOUT_EXPIRED;
+	unsigned long flags;
 
 	if (ops->timeout)
 		ret = ops->timeout(req, reserved);
@@ -831,13 +789,10 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
 		__blk_mq_complete_request(req);
 		break;
 	case BLK_EH_RESET_TIMER:
-		/*
-		 * As nothing prevents from completion happening while
-		 * ->aborted_gstate is set, this may lead to ignored
-		 * completions and further spurious timeouts.
-		 */
-		blk_mq_rq_update_aborted_gstate(req, 0);
+		spin_lock_irqsave(&req->state_lock, flags);
 		blk_add_timer(req);
+		blk_mq_rq_update_state(req, MQ_RQ_IN_FLIGHT);
+		spin_unlock_irqrestore(&req->state_lock, flags);
 		break;
 	case BLK_EH_NOT_HANDLED:
 		break;
@@ -851,48 +806,23 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 		struct request *rq, void *priv, bool reserved)
 {
 	struct blk_mq_timeout_data *data = priv;
-	unsigned long gstate, deadline;
-	int start;
-
-	might_sleep();
-
-	if (rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED)
-		return;
-
-	/* read coherent snapshots of @rq->state_gen and @rq->deadline */
-	while (true) {
-		start = read_seqcount_begin(&rq->gstate_seq);
-		gstate = READ_ONCE(rq->gstate);
-		deadline = blk_rq_deadline(rq);
-		if (!read_seqcount_retry(&rq->gstate_seq, start))
-			break;
-		cond_resched();
-	}
+	unsigned long deadline;
+	bool timed_out = false;
 
-	/* if in-flight && overdue, mark for abortion */
-	if ((gstate & MQ_RQ_STATE_MASK) == MQ_RQ_IN_FLIGHT &&
+	spin_lock_irq(&rq->state_lock);
+	deadline = blk_rq_deadline(rq);
+	if (blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT &&
 	    time_after_eq(jiffies, deadline)) {
-		blk_mq_rq_update_aborted_gstate(rq, gstate);
-		data->nr_expired++;
+		blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE);
+		timed_out = true;
 		hctx->nr_expired++;
 	} else if (!data->next_set || time_after(data->next, deadline)) {
 		data->next = deadline;
 		data->next_set = 1;
 	}
-}
+	spin_unlock_irq(&rq->state_lock);
 
-static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,
-		struct request *rq, void *priv, bool reserved)
-{
-	/*
-	 * We marked @rq->aborted_gstate and waited for RCU.  If there were
-	 * completions that we lost to, they would have finished and
-	 * updated @rq->gstate by now; otherwise, the completion path is
-	 * now guaranteed to see @rq->aborted_gstate and yield.  If
-	 * @rq->aborted_gstate still matches @rq->gstate, @rq is ours.
-	 */
-	if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&
-	    READ_ONCE(rq->gstate) == rq->aborted_gstate)
+	if (timed_out)
 		blk_mq_rq_timed_out(rq, reserved);
 }
 
@@ -900,11 +830,7 @@ static void blk_mq_timeout_work(struct work_struct *work)
 {
 	struct request_queue *q =
 		container_of(work, struct request_queue, timeout_work);
-	struct blk_mq_timeout_data data = {
-		.next		= 0,
-		.next_set	= 0,
-		.nr_expired	= 0,
-	};
+	struct blk_mq_timeout_data data = { };
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
@@ -927,33 +853,6 @@ static void blk_mq_timeout_work(struct work_struct *work)
 	/* scan for the expired ones and set their ->aborted_gstate */
 	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
 
-	if (data.nr_expired) {
-		bool has_rcu = false;
-
-		/*
-		 * Wait till everyone sees ->aborted_gstate.  The
-		 * sequential waits for SRCUs aren't ideal.  If this ever
-		 * becomes a problem, we can add per-hw_ctx rcu_head and
-		 * wait in parallel.
-		 */
-		queue_for_each_hw_ctx(q, hctx, i) {
-			if (!hctx->nr_expired)
-				continue;
-
-			if (!(hctx->flags & BLK_MQ_F_BLOCKING))
-				has_rcu = true;
-			else
-				synchronize_srcu(hctx->srcu);
-
-			hctx->nr_expired = 0;
-		}
-		if (has_rcu)
-			synchronize_rcu();
-
-		/* terminate the ones we won */
-		blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
-	}
-
 	if (data.next_set) {
 		data.next = blk_rq_timeout(round_jiffies_up(data.next));
 		mod_timer(&q->timeout, data.next);
@@ -2073,8 +1972,7 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 			return ret;
 	}
 
-	seqcount_init(&rq->gstate_seq);
-	u64_stats_init(&rq->aborted_gstate_sync);
+	spin_lock_init(&rq->state_lock);
 	return 0;
 }
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 88c558f71819..d4d72f95d5a9 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -27,10 +27,7 @@ struct blk_mq_ctx {
 	struct kobject		kobj;
 } ____cacheline_aligned_in_smp;
 
-/*
- * Bits for request->gstate.  The lower two bits carry MQ_RQ_* state value
- * and the upper bits the generation number.
- */
+/* Lowest two bits of request->__deadline. */
 enum mq_rq_state {
 	MQ_RQ_IDLE		= 0,
 	MQ_RQ_IN_FLIGHT		= 1,
@@ -38,7 +35,6 @@ enum mq_rq_state {
 
 	MQ_RQ_STATE_BITS	= 2,
 	MQ_RQ_STATE_MASK	= (1 << MQ_RQ_STATE_BITS) - 1,
-	MQ_RQ_GEN_INC		= 1 << MQ_RQ_STATE_BITS,
 };
 
 void blk_mq_freeze_queue(struct request_queue *q);
@@ -104,9 +100,9 @@ void blk_mq_release(struct request_queue *q);
  * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
  * @rq: target request.
  */
-static inline int blk_mq_rq_state(struct request *rq)
+static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
 {
-	return READ_ONCE(rq->gstate) & MQ_RQ_STATE_MASK;
+	return rq->__deadline & MQ_RQ_STATE_MASK;
 }
 
 /**
@@ -115,22 +111,15 @@ static inline int blk_mq_rq_state(struct request *rq)
  * @state: new state to set.
  *
  * Set @rq's state to @state.  The caller is responsible for ensuring that
- * there are no other updaters.  A request can transition into IN_FLIGHT
- * only from IDLE and doing so increments the generation number.
+ * there are no other updaters.
  */
 static inline void blk_mq_rq_update_state(struct request *rq,
 					  enum mq_rq_state state)
 {
-	u64 old_val = READ_ONCE(rq->gstate);
-	u64 new_val = (old_val & ~MQ_RQ_STATE_MASK) | state;
-
-	if (state == MQ_RQ_IN_FLIGHT) {
-		WARN_ON_ONCE((old_val & MQ_RQ_STATE_MASK) != MQ_RQ_IDLE);
-		new_val += MQ_RQ_GEN_INC;
-	}
+	unsigned long d = rq->__deadline;
 
-	/* avoid exposing interim values */
-	WRITE_ONCE(rq->gstate, new_val);
+	d &= ~(unsigned long)MQ_RQ_STATE_MASK;
+	rq->__deadline = d | state;
 }
 
 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index a05e3676d24a..81bf1ff1b4d8 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -216,7 +216,6 @@ void blk_add_timer(struct request *req)
 		req->timeout = q->rq_timeout;
 
 	blk_rq_set_deadline(req, jiffies + req->timeout);
-	req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED;
 
 	/*
 	 * Only the non-mq case needs to add the request to a protected list.
diff --git a/block/blk.h b/block/blk.h
index 46db5dc83dcb..56391f201d5e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -245,12 +245,12 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req)
  */
 static inline void blk_rq_set_deadline(struct request *rq, unsigned long time)
 {
-	rq->__deadline = time & ~0x1UL;
+	rq->__deadline = (time & ~0x3UL) | (rq->__deadline & 3UL);
 }
 
 static inline unsigned long blk_rq_deadline(struct request *rq)
 {
-	return rq->__deadline & ~0x1UL;
+	return rq->__deadline & ~0x3UL;
 }
 
 /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4f3df807cf8f..61438dd09598 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,7 +27,6 @@
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
 #include <linux/blkzoned.h>
-#include <linux/seqlock.h>
 #include <linux/u64_stats_sync.h>
 
 struct module;
@@ -125,8 +124,6 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
 /* The per-zone write lock is held for this request */
 #define RQF_ZONE_WRITE_LOCKED	((__force req_flags_t)(1 << 19))
-/* timeout is expired */
-#define RQF_MQ_TIMEOUT_EXPIRED	((__force req_flags_t)(1 << 20))
 /* already slept for hybrid poll */
 #define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 21))
 
@@ -141,6 +138,7 @@ typedef __u32 __bitwise req_flags_t;
  * especially blk_mq_rq_ctx_init() to take care of the added fields.
  */
 struct request {
+	spinlock_t state_lock;		/* protects __deadline for blk-mq */
 	struct request_queue *q;
 	struct blk_mq_ctx *mq_ctx;
 
@@ -226,27 +224,11 @@ struct request {
 	unsigned int extra_len;	/* length of alignment and padding */
 
 	/*
-	 * On blk-mq, the lower bits of ->gstate (generation number and
-	 * state) carry the MQ_RQ_* state value and the upper bits the
-	 * generation number which is monotonically incremented and used to
-	 * distinguish the reuse instances.
-	 *
-	 * ->gstate_seq allows updates to ->gstate and other fields
-	 * (currently ->deadline) during request start to be read
-	 * atomically from the timeout path, so that it can operate on a
-	 * coherent set of information.
+	 * access through blk_rq_set_deadline(), blk_rq_deadline() and
+	 * blk_mark_rq_complete(), blk_clear_rq_complete() and
+	 * blk_rq_is_complete() for legacy queues or blk_mq_rq_state() for
+	 * blk-mq queues.
 	 */
-	seqcount_t gstate_seq;
-	u64 gstate;
-
-	/*
-	 * ->aborted_gstate is used by the timeout to claim a specific
-	 * recycle instance of this request.  See blk_mq_timeout_work().
-	 */
-	struct u64_stats_sync aborted_gstate_sync;
-	u64 aborted_gstate;
-
-	/* access through blk_rq_set_deadline, blk_rq_deadline */
 	unsigned long __deadline;
 
 	struct list_head timeout_list;
-- 
2.16.1

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-07  1:11 [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling Bart Van Assche
@ 2018-02-07 17:06 ` Tejun Heo
  2018-02-07 17:27   ` Bart Van Assche
  2018-02-07 19:03   ` Bart Van Assche
  0 siblings, 2 replies; 28+ messages in thread
From: Tejun Heo @ 2018-02-07 17:06 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: Jens Axboe, linux-block, Christoph Hellwig

Hello, Bart.

On Tue, Feb 06, 2018 at 05:11:33PM -0800, Bart Van Assche wrote:
> The following race can occur between the code that resets the timer
> and completion handling:
> - The code that handles BLK_EH_RESET_TIMER resets aborted_gstate.
> - A completion occurs and blk_mq_complete_request() calls
>   __blk_mq_complete_request().
> - The timeout code calls blk_add_timer() and that function sets the
>   request deadline and adjusts the timer.
> - __blk_mq_complete_request() frees the request tag.
> - The timer fires and the timeout handler gets called for a freed
>   request.

Can you see whether by any chance the following patch fixes the issue?
If not, can you share the repro case?

Thanks.

diff --git a/block/blk-mq.c b/block/blk-mq.c
index df93102..651d18c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -836,8 +836,8 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
 		 * ->aborted_gstate is set, this may lead to ignored
 		 * completions and further spurious timeouts.
 		 */
-		blk_mq_rq_update_aborted_gstate(req, 0);
 		blk_add_timer(req);
+		blk_mq_rq_update_aborted_gstate(req, 0);
 		break;
 	case BLK_EH_NOT_HANDLED:
 		break;

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-07 17:06 ` Tejun Heo
@ 2018-02-07 17:27   ` Bart Van Assche
  2018-02-07 17:35     ` tj
  2018-02-07 19:03   ` Bart Van Assche
  1 sibling, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-07 17:27 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gV2VkLCAyMDE4LTAyLTA3IGF0IDA5OjA2IC0wODAwLCBUZWp1biBIZW8gd3JvdGU6DQo+IE9u
IFR1ZSwgRmViIDA2LCAyMDE4IGF0IDA1OjExOjMzUE0gLTA4MDAsIEJhcnQgVmFuIEFzc2NoZSB3
cm90ZToNCj4gPiBUaGUgZm9sbG93aW5nIHJhY2UgY2FuIG9jY3VyIGJldHdlZW4gdGhlIGNvZGUg
dGhhdCByZXNldHMgdGhlIHRpbWVyDQo+ID4gYW5kIGNvbXBsZXRpb24gaGFuZGxpbmc6DQo+ID4g
LSBUaGUgY29kZSB0aGF0IGhhbmRsZXMgQkxLX0VIX1JFU0VUX1RJTUVSIHJlc2V0cyBhYm9ydGVk
X2dzdGF0ZS4NCj4gPiAtIEEgY29tcGxldGlvbiBvY2N1cnMgYW5kIGJsa19tcV9jb21wbGV0ZV9y
ZXF1ZXN0KCkgY2FsbHMNCj4gPiAgIF9fYmxrX21xX2NvbXBsZXRlX3JlcXVlc3QoKS4NCj4gPiAt
IFRoZSB0aW1lb3V0IGNvZGUgY2FsbHMgYmxrX2FkZF90aW1lcigpIGFuZCB0aGF0IGZ1bmN0aW9u
IHNldHMgdGhlDQo+ID4gICByZXF1ZXN0IGRlYWRsaW5lIGFuZCBhZGp1c3RzIHRoZSB0aW1lci4N
Cj4gPiAtIF9fYmxrX21xX2NvbXBsZXRlX3JlcXVlc3QoKSBmcmVlcyB0aGUgcmVxdWVzdCB0YWcu
DQo+ID4gLSBUaGUgdGltZXIgZmlyZXMgYW5kIHRoZSB0aW1lb3V0IGhhbmRsZXIgZ2V0cyBjYWxs
ZWQgZm9yIGEgZnJlZWQNCj4gPiAgIHJlcXVlc3QuDQo+IA0KPiBDYW4geW91IHNlZSB3aGV0aGVy
IGJ5IGFueSBjaGFuY2UgdGhlIGZvbGxvd2luZyBwYXRjaCBmaXhlcyB0aGUgaXNzdWU/DQo+IElm
IG5vdCwgY2FuIHlvdSBzaGFyZSB0aGUgcmVwcm8gY2FzZT8NCj4gDQo+IFRoYW5rcy4NCj4gDQo+
IGRpZmYgLS1naXQgYS9ibG9jay9ibGstbXEuYyBiL2Jsb2NrL2Jsay1tcS5jDQo+IGluZGV4IGRm
OTMxMDIuLjY1MWQxOGMgMTAwNjQ0DQo+IC0tLSBhL2Jsb2NrL2Jsay1tcS5jDQo+ICsrKyBiL2Js
b2NrL2Jsay1tcS5jDQo+IEBAIC04MzYsOCArODM2LDggQEAgc3RhdGljIHZvaWQgYmxrX21xX3Jx
X3RpbWVkX291dChzdHJ1Y3QgcmVxdWVzdCAqcmVxLCBib29sIHJlc2VydmVkKQ0KPiAgCQkgKiAt
PmFib3J0ZWRfZ3N0YXRlIGlzIHNldCwgdGhpcyBtYXkgbGVhZCB0byBpZ25vcmVkDQo+ICAJCSAq
IGNvbXBsZXRpb25zIGFuZCBmdXJ0aGVyIHNwdXJpb3VzIHRpbWVvdXRzLg0KPiAgCQkgKi8NCj4g
LQkJYmxrX21xX3JxX3VwZGF0ZV9hYm9ydGVkX2dzdGF0ZShyZXEsIDApOw0KPiAgCQlibGtfYWRk
X3RpbWVyKHJlcSk7DQo+ICsJCWJsa19tcV9ycV91cGRhdGVfYWJvcnRlZF9nc3RhdGUocmVxLCAw
KTsNCj4gIAkJYnJlYWs7DQo+ICAJY2FzZSBCTEtfRUhfTk9UX0hBTkRMRUQ6DQo+ICAJCWJyZWFr
Ow0KDQpIZWxsbyBUZWp1biwNCg0KRXZlbiB3aXRoIHRoZSBhYm92ZSBjaGFuZ2UgSSB0aGluayB0
aGF0IHRoZXJlIGlzIHN0aWxsIGEgcmFjZSBiZXR3ZWVuIHRoZQ0KY29kZSB0aGF0IGhhbmRsZXMg
dGltZXIgcmVzZXRzIGFuZCB0aGUgY29tcGxldGlvbiBoYW5kbGVyLiBBbnl3YXksIHRoZSB0ZXN0
DQp3aXRoIHdoaWNoIEkgdHJpZ2dlcmVkIHRoZXNlIHJhY2VzIGlzIGFzIGZvbGxvd3M6DQotIFN0
YXJ0IGZyb20gd2hhdCB3aWxsIGJlY29tZSBrZXJuZWwgdjQuMTYtcmMxIGFuZCBhcHBseSB0aGUg
cGF0Y2ggdGhhdCBhZGRzDQogIFNSUCBvdmVyIFJvQ0Ugc3VwcG9ydCB0byB0aGUgaWJfc3JwdCBk
cml2ZXIuIFNlZSBhbHNvIHRoZSAiW1BBVENIIHYyIDAwLzE0XQ0KICBJQi9zcnB0OiBBZGQgUkRN
QS9DTSBzdXBwb3J0IiBwYXRjaCBzZXJpZXMNCiAgKGh0dHBzOi8vd3d3LnNwaW5pY3MubmV0L2xp
c3RzL2xpbnV4LXJkbWEvbXNnNTk1ODkuaHRtbCkuDQotIEFwcGx5IG15IHBhdGNoIHNlcmllcyB0
aGF0IGZpeGVzIGEgcmFjZSBiZXR3ZWVuIHRoZSBTQ1NJIGVycm9yIGhhbmRsZXIgYW5kDQogIFND
U0kgdHJhbnNwb3J0IHJlY292ZXJ5Lg0KLSBBcHBseSBteSBwYXRjaCBzZXJpZXMgdGhhdCBpbXBy
b3ZlcyB0aGUgc3RhYmlsaXR5IG9mIHRoZSBTQ1NJIHRhcmdldCBjb3JlDQogIChMSU8pLg0KLSBC
dWlsZCBhbmQgaW5zdGFsbCB0aGF0IGtlcm5lbC4NCi0gQ2xvbmUgdGhlIGZvbGxvd2luZyByZXBv
c2l0b3J5OiBodHRwczovL2dpdGh1Yi5jb20vYnZhbmFzc2NoZS9zcnAtdGVzdC4NCi0gUnVuIHRo
ZSBmb2xsb3dpbmcgdGVzdDoNCiAgd2hpbGUgdHJ1ZTsgZG8gc3JwLXRlc3QvcnVuX3Rlc3RzIC1j
IC10IDAyLW1xOyBkb25lDQotIFdoaWxlIHRoZSB0ZXN0IGlzIHJ1bm5pbmcsIGNoZWNrIHdoZXRo
ZXIgb3Igbm90IHNvbWV0aGluZyB3ZWlyZCBoYXBwZW5zLg0KICBTb21ldGltZXMgSSBzZWUgdGhh
dCBzY3NpX3RpbWVzX291dCgpIGNyYXNoZXMuIFNvbWV0aW1lcyBJIHNlZSB3aGlsZSBydW5uaW5n
DQogIHRoaXMgdGVzdCB0aGF0IGEgc29mdCBsb2NrdXAgaXMgcmVwb3J0ZWQgaW5zaWRlIGJsa19t
cV9kb19kaXNwYXRjaF9jdHgoKS4NCg0KSWYgeW91IHdhbnQgSSBjYW4gc2hhcmUgdGhlIHRyZWUg
b24gZ2l0aHViIHRoYXQgSSB1c2UgbXlzZWxmIGZvciBteSB0ZXN0cy4NCg0KVGhhbmtzLA0KDQpC
YXJ0Lg==

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-07 17:27   ` Bart Van Assche
@ 2018-02-07 17:35     ` tj
  2018-02-07 18:14       ` Bart Van Assche
  0 siblings, 1 reply; 28+ messages in thread
From: tj @ 2018-02-07 17:35 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

Hello, Bart.

On Wed, Feb 07, 2018 at 05:27:10PM +0000, Bart Van Assche wrote:
> Even with the above change I think that there is still a race between the
> code that handles timer resets and the completion handler. Anyway, the test

Can you elaborate the scenario a bit further?  If you're referring to
lost completions, we've always had that and while we can try to close
that hole too, I don't think it's a critical issue.

> with which I triggered these races is as follows:
> - Start from what will become kernel v4.16-rc1 and apply the patch that adds
>   SRP over RoCE support to the ib_srpt driver. See also the "[PATCH v2 00/14]
>   IB/srpt: Add RDMA/CM support" patch series
>   (https://www.spinics.net/lists/linux-rdma/msg59589.html).
> - Apply my patch series that fixes a race between the SCSI error handler and
>   SCSI transport recovery.
> - Apply my patch series that improves the stability of the SCSI target core
>   (LIO).
> - Build and install that kernel.
> - Clone the following repository: https://github.com/bvanassche/srp-test.
> - Run the following test:
>   while true; do srp-test/run_tests -c -t 02-mq; done
> - While the test is running, check whether or not something weird happens.
>   Sometimes I see that scsi_times_out() crashes. Sometimes I see while running
>   this test that a soft lockup is reported inside blk_mq_do_dispatch_ctx().
> 
> If you want I can share the tree on github that I use myself for my tests.

Heh, that's quite a bit.  I'll take up on that git tree later but for
now I'd really appreciate if you can test the patch.

Thank you very much.

-- 
tejun

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-07 17:35     ` tj
@ 2018-02-07 18:14       ` Bart Van Assche
  2018-02-07 20:07         ` tj
  0 siblings, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-07 18:14 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gV2VkLCAyMDE4LTAyLTA3IGF0IDA5OjM1IC0wODAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBPbiBXZWQsIEZlYiAwNywgMjAxOCBhdCAwNToyNzoxMFBNICswMDAwLCBCYXJ0IFZhbiBBc3Nj
aGUgd3JvdGU6DQo+ID4gRXZlbiB3aXRoIHRoZSBhYm92ZSBjaGFuZ2UgSSB0aGluayB0aGF0IHRo
ZXJlIGlzIHN0aWxsIGEgcmFjZSBiZXR3ZWVuIHRoZQ0KPiA+IGNvZGUgdGhhdCBoYW5kbGVzIHRp
bWVyIHJlc2V0cyBhbmQgdGhlIGNvbXBsZXRpb24gaGFuZGxlci4NCj4gDQo+IENhbiB5b3UgZWxh
Ym9yYXRlIHRoZSBzY2VuYXJpbyBhIGJpdCBmdXJ0aGVyPyAgSWYgeW91J3JlIHJlZmVycmluZyB0
bw0KPiBsb3N0IGNvbXBsZXRpb25zLCB3ZSd2ZSBhbHdheXMgaGFkIHRoYXQgYW5kIHdoaWxlIHdl
IGNhbiB0cnkgdG8gY2xvc2UNCj4gdGhhdCBob2xlIHRvbywgSSBkb24ndCB0aGluayBpdCdzIGEg
Y3JpdGljYWwgaXNzdWUuDQoNCkhlbGxvIFRlanVuLA0KDQpXaGVuIEkgd3JvdGUgbXkgY29tbWVu
dCBJIHdhcyBub3Qgc3VyZSB3aGV0aGVyIG9yIG5vdCBub24tcmVlbnRyYW5jeSBpcw0KZ3VhcmFu
dGVlZCBmb3Igd29yayBxdWV1ZSBpdGVtcy4gSG93ZXZlciwgYWNjb3JkaW5nIHRvIHdoYXQgSSBm
b3VuZCBpbiB0aGUNCndvcmtxdWV1ZSBpbXBsZW1lbnRhdGlvbiBJIHRoaW5rIHRoYXQgaXMgZ3Vh
cmFudGVlZC4gU28gaXQgc2hvdWxkbid0IGJlDQpwb3NzaWJsZSB0aGF0IHRoZSB0aW1lciBhY3Rp
dmF0ZWQgYnkgYmxrX2FkZF90aW1lcigpIGdldHMgaGFuZGxlZCBiZWZvcmUNCmFib3J0ZWRfZ3N0
YXRlIGlzIHJlc2V0LiBCdXQgc2luY2UgdGhlIHRpbWVvdXQgaGFuZGxlciBhbmQgY29tcGxldGlv
bg0KaGFuZGxlcnMgY2FuIGJlIGV4ZWN1dGVkIGJ5IGRpZmZlcmVudCBDUFVzLCBzaG91bGRuJ3Qg
YSBtZW1vcnkgYmFycmllciBiZQ0KaW5zZXJ0ZWQgYmV0d2VlbiB0aGUgYmxrX2FkZF90aW1lcigp
IGNhbGwgYW5kIHJlc2V0dGluZyBhYm9ydGVkX2dzeW5jIHRvDQpndWFyYW50ZWUgdGhhdCBhIGNv
bXBsZXRpb24gY2Fubm90IG9jY3VyIGJlZm9yZSBibGtfYWRkX3RpbWVyKCkgaGFzIHJlc2V0DQpS
UUZfTVFfVElNRU9VVF9FWFBJUkVEPw0KDQpUaGFua3MsDQoNCkJhcnQuDQoNCg0K

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-07 17:06 ` Tejun Heo
  2018-02-07 17:27   ` Bart Van Assche
@ 2018-02-07 19:03   ` Bart Van Assche
  2018-02-07 20:09     ` tj
  1 sibling, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-07 19:03 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gV2VkLCAyMDE4LTAyLTA3IGF0IDA5OjA2IC0wODAwLCBUZWp1biBIZW8gd3JvdGU6DQo+IENh
biB5b3Ugc2VlIHdoZXRoZXIgYnkgYW55IGNoYW5jZSB0aGUgZm9sbG93aW5nIHBhdGNoIGZpeGVz
IHRoZSBpc3N1ZT8NCj4gSWYgbm90LCBjYW4geW91IHNoYXJlIHRoZSByZXBybyBjYXNlPw0KPiAN
Cj4gVGhhbmtzLg0KPiANCj4gZGlmZiAtLWdpdCBhL2Jsb2NrL2Jsay1tcS5jIGIvYmxvY2svYmxr
LW1xLmMNCj4gaW5kZXggZGY5MzEwMi4uNjUxZDE4YyAxMDA2NDQNCj4gLS0tIGEvYmxvY2svYmxr
LW1xLmMNCj4gKysrIGIvYmxvY2svYmxrLW1xLmMNCj4gQEAgLTgzNiw4ICs4MzYsOCBAQCBzdGF0
aWMgdm9pZCBibGtfbXFfcnFfdGltZWRfb3V0KHN0cnVjdCByZXF1ZXN0ICpyZXEsIGJvb2wgcmVz
ZXJ2ZWQpDQo+ICAJCSAqIC0+YWJvcnRlZF9nc3RhdGUgaXMgc2V0LCB0aGlzIG1heSBsZWFkIHRv
IGlnbm9yZWQNCj4gIAkJICogY29tcGxldGlvbnMgYW5kIGZ1cnRoZXIgc3B1cmlvdXMgdGltZW91
dHMuDQo+ICAJCSAqLw0KPiAtCQlibGtfbXFfcnFfdXBkYXRlX2Fib3J0ZWRfZ3N0YXRlKHJlcSwg
MCk7DQo+ICAJCWJsa19hZGRfdGltZXIocmVxKTsNCj4gKwkJYmxrX21xX3JxX3VwZGF0ZV9hYm9y
dGVkX2dzdGF0ZShyZXEsIDApOw0KPiAgCQlicmVhazsNCj4gIAljYXNlIEJMS19FSF9OT1RfSEFO
RExFRDoNCj4gIAkJYnJlYWs7DQoNCkhlbGxvIFRlanVuLA0KDQpJIHRyaWVkIHRoZSBhYm92ZSBw
YXRjaCBidXQgYWxyZWFkeSBkdXJpbmcgdGhlIGZpcnN0IGl0ZXJhdGlvbiBvZiB0aGUgdGVzdCBJ
DQpub3RpY2VkIHRoYXQgdGhlIHRlc3QgaHVuZywgcHJvYmFibHkgZHVlIHRvIHRoZSBmb2xsb3dp
bmcgcmVxdWVzdCB0aGF0IGdvdCBzdHVjazoNCg0KJCAoY2QgL3N5cy9rZXJuZWwvZGVidWcvYmxv
Y2sgJiYgZ3JlcCAtYUggLiAqLyovKi9ycV9saXN0KQ0KMDAwMDAwMDBhOThjZmY2MCB7Lm9wPVND
U0lfSU4sIC5jbWRfZmxhZ3M9LCAucnFfZmxhZ3M9TVFfSU5GTElHSFR8UFJFRU1QVHxRVUlFVHxJ
T19TVEFUfFBNLA0KIC5zdGF0ZT1pZGxlLCAudGFnPTIyLCAuaW50ZXJuYWxfdGFnPS0xLCAuY21k
PVN5bmNocm9uaXplIENhY2hlKDEwKSAzNSAwMCAwMCAwMCAwMCAwMCwgLnJldHJpZXM9MCwNCiAu
cmVzdWx0ID0gMHgwLCAuZmxhZ3M9VEFHR0VELCAudGltZW91dD02MC4wMDAsIGFsbG9jYXRlZCA4
NzIuNjkwIHMgYWdvfQ0KDQpUaGFua3MsDQoNCkJhcnQuDQoNCg0K

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-07 18:14       ` Bart Van Assche
@ 2018-02-07 20:07         ` tj
  2018-02-07 23:48           ` Bart Van Assche
  0 siblings, 1 reply; 28+ messages in thread
From: tj @ 2018-02-07 20:07 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

Hello, Bart.

On Wed, Feb 07, 2018 at 06:14:13PM +0000, Bart Van Assche wrote:
> When I wrote my comment I was not sure whether or not non-reentrancy is
> guaranteed for work queue items. However, according to what I found in the
> workqueue implementation I think that is guaranteed. So it shouldn't be
> possible that the timer activated by blk_add_timer() gets handled before
> aborted_gstate is reset. But since the timeout handler and completion

Yeah, we're basically single threaded in the timeout path.

> handlers can be executed by different CPUs, shouldn't a memory barrier be
> inserted between the blk_add_timer() call and resetting aborted_gsync to
> guarantee that a completion cannot occur before blk_add_timer() has reset
> RQF_MQ_TIMEOUT_EXPIRED?

Ah, you're right.  u64_stat_sync doesn't imply barriers, so we want
something like the following.

diff --git a/block/blk-mq.c b/block/blk-mq.c
index df93102..d6edf3b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -593,7 +593,7 @@ static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate)
 	 */
 	local_irq_save(flags);
 	u64_stats_update_begin(&rq->aborted_gstate_sync);
-	rq->aborted_gstate = gstate;
+	smp_store_release(&rq->aborted_gstate, gstate);
 	u64_stats_update_end(&rq->aborted_gstate_sync);
 	local_irq_restore(flags);
 }
@@ -605,7 +605,7 @@ static u64 blk_mq_rq_aborted_gstate(struct request *rq)
 
 	do {
 		start = u64_stats_fetch_begin(&rq->aborted_gstate_sync);
-		aborted_gstate = rq->aborted_gstate;
+		aborted_gstate = smp_load_acquire(&rq->aborted_gstate);
 	} while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start));
 
 	return aborted_gstate;
@@ -836,8 +836,8 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
 		 * ->aborted_gstate is set, this may lead to ignored
 		 * completions and further spurious timeouts.
 		 */
-		blk_mq_rq_update_aborted_gstate(req, 0);
 		blk_add_timer(req);
+		blk_mq_rq_update_aborted_gstate(req, 0);
 		break;
 	case BLK_EH_NOT_HANDLED:
 		break;

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-07 19:03   ` Bart Van Assche
@ 2018-02-07 20:09     ` tj
  2018-02-07 21:02       ` Bart Van Assche
  0 siblings, 1 reply; 28+ messages in thread
From: tj @ 2018-02-07 20:09 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

Hello,

On Wed, Feb 07, 2018 at 07:03:56PM +0000, Bart Van Assche wrote:
> I tried the above patch but already during the first iteration of the test I
> noticed that the test hung, probably due to the following request that got stuck:
> 
> $ (cd /sys/kernel/debug/block && grep -aH . */*/*/rq_list)
> 00000000a98cff60 {.op=SCSI_IN, .cmd_flags=, .rq_flags=MQ_INFLIGHT|PREEMPT|QUIET|IO_STAT|PM,
>  .state=idle, .tag=22, .internal_tag=-1, .cmd=Synchronize Cache(10) 35 00 00 00 00 00, .retries=0,
>  .result = 0x0, .flags=TAGGED, .timeout=60.000, allocated 872.690 s ago}

I'm wonder how this happened, so we can lose a completion when it
races against BLK_EH_RESET_TIMER; however, the command should timeout
later cuz the timer is running again now.  Maybe we actually had the
memory barrier race that you pointed out in the other message?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-07 20:09     ` tj
@ 2018-02-07 21:02       ` Bart Van Assche
  2018-02-07 21:40         ` tj
  0 siblings, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-07 21:02 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gV2VkLCAyMDE4LTAyLTA3IGF0IDEyOjA5IC0wODAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBIZWxsbywNCj4gDQo+IE9uIFdlZCwgRmViIDA3LCAyMDE4IGF0IDA3OjAzOjU2UE0gKzAwMDAs
IEJhcnQgVmFuIEFzc2NoZSB3cm90ZToNCj4gPiBJIHRyaWVkIHRoZSBhYm92ZSBwYXRjaCBidXQg
YWxyZWFkeSBkdXJpbmcgdGhlIGZpcnN0IGl0ZXJhdGlvbiBvZiB0aGUgdGVzdCBJDQo+ID4gbm90
aWNlZCB0aGF0IHRoZSB0ZXN0IGh1bmcsIHByb2JhYmx5IGR1ZSB0byB0aGUgZm9sbG93aW5nIHJl
cXVlc3QgdGhhdCBnb3Qgc3R1Y2s6DQo+ID4gDQo+ID4gJCAoY2QgL3N5cy9rZXJuZWwvZGVidWcv
YmxvY2sgJiYgZ3JlcCAtYUggLiAqLyovKi9ycV9saXN0KQ0KPiA+IDAwMDAwMDAwYTk4Y2ZmNjAg
ey5vcD1TQ1NJX0lOLCAuY21kX2ZsYWdzPSwgLnJxX2ZsYWdzPU1RX0lORkxJR0hUfFBSRUVNUFR8
UVVJRVR8SU9fU1RBVHxQTSwNCj4gPiAgLnN0YXRlPWlkbGUsIC50YWc9MjIsIC5pbnRlcm5hbF90
YWc9LTEsIC5jbWQ9U3luY2hyb25pemUgQ2FjaGUoMTApIDM1IDAwIDAwIDAwIDAwIDAwLCAucmV0
cmllcz0wLA0KPiA+ICAucmVzdWx0ID0gMHgwLCAuZmxhZ3M9VEFHR0VELCAudGltZW91dD02MC4w
MDAsIGFsbG9jYXRlZCA4NzIuNjkwIHMgYWdvfQ0KPiANCj4gSSdtIHdvbmRlciBob3cgdGhpcyBo
YXBwZW5lZCwgc28gd2UgY2FuIGxvc2UgYSBjb21wbGV0aW9uIHdoZW4gaXQNCj4gcmFjZXMgYWdh
aW5zdCBCTEtfRUhfUkVTRVRfVElNRVI7IGhvd2V2ZXIsIHRoZSBjb21tYW5kIHNob3VsZCB0aW1l
b3V0DQo+IGxhdGVyIGN1eiB0aGUgdGltZXIgaXMgcnVubmluZyBhZ2FpbiBub3cuICBNYXliZSB3
ZSBhY3R1YWxseSBoYWQgdGhlDQo+IG1lbW9yeSBiYXJyaWVyIHJhY2UgdGhhdCB5b3UgcG9pbnRl
ZCBvdXQgaW4gdGhlIG90aGVyIG1lc3NhZ2U/DQoNCkhlbGxvIFRlanVuLA0KDQpUaGUgcGF0Y2gg
dGhhdCBJIHVzZWQgaW4gbXkgdGVzdCBoYWQgYW4gc21wX3dtYigpIGNhbGwgKHNlZSBhbHNvIGJl
bG93KS4gQW55d2F5LA0KSSB3aWxsIHNlZSB3aGV0aGVyIEkgY2FuIGV4dHJhY3QgbW9yZSBzdGF0
ZSBpbmZvcm1hdGlvbiB0aHJvdWdoIGRlYnVnZnMuDQoNCmRpZmYgLS1naXQgYS9ibG9jay9ibGst
bXEuYyBiL2Jsb2NrL2Jsay1tcS5jDQppbmRleCBlZjRmNmRmMGYxZGYuLjhlYjIxMDVkODJiNyAx
MDA2NDQNCi0tLSBhL2Jsb2NrL2Jsay1tcS5jDQorKysgYi9ibG9jay9ibGstbXEuYw0KQEAgLTgy
NywxMyArODI3LDkgQEAgc3RhdGljIHZvaWQgYmxrX21xX3JxX3RpbWVkX291dChzdHJ1Y3QgcmVx
dWVzdCAqcmVxLCBib29sIHJlc2VydmVkKQ0KIAkJX19ibGtfbXFfY29tcGxldGVfcmVxdWVzdChy
ZXEpOw0KIAkJYnJlYWs7DQogCWNhc2UgQkxLX0VIX1JFU0VUX1RJTUVSOg0KLQkJLyoNCi0JCSAq
IEFzIG5vdGhpbmcgcHJldmVudHMgZnJvbSBjb21wbGV0aW9uIGhhcHBlbmluZyB3aGlsZQ0KLQkJ
ICogLT5hYm9ydGVkX2dzdGF0ZSBpcyBzZXQsIHRoaXMgbWF5IGxlYWQgdG8gaWdub3JlZA0KLQkJ
ICogY29tcGxldGlvbnMgYW5kIGZ1cnRoZXIgc3B1cmlvdXMgdGltZW91dHMuDQotCQkgKi8NCi0J
CWJsa19tcV9ycV91cGRhdGVfYWJvcnRlZF9nc3RhdGUocmVxLCAwKTsNCiAJCWJsa19hZGRfdGlt
ZXIocmVxKTsNCisJCXNtcF93bWIoKTsNCisJCWJsa19tcV9ycV91cGRhdGVfYWJvcnRlZF9nc3Rh
dGUocmVxLCAwKTsNCiAJCWJyZWFrOw0KIAljYXNlIEJMS19FSF9OT1RfSEFORExFRDoNCiAJCWJy
ZWFrOw0KDQoNCg0K

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-07 21:02       ` Bart Van Assche
@ 2018-02-07 21:40         ` tj
  0 siblings, 0 replies; 28+ messages in thread
From: tj @ 2018-02-07 21:40 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

Hello,

On Wed, Feb 07, 2018 at 09:02:21PM +0000, Bart Van Assche wrote:
> The patch that I used in my test had an smp_wmb() call (see also below). Anyway,
> I will see whether I can extract more state information through debugfs.
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index ef4f6df0f1df..8eb2105d82b7 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -827,13 +827,9 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
>  		__blk_mq_complete_request(req);
>  		break;
>  	case BLK_EH_RESET_TIMER:
> -		/*
> -		 * As nothing prevents from completion happening while
> -		 * ->aborted_gstate is set, this may lead to ignored
> -		 * completions and further spurious timeouts.
> -		 */
> -		blk_mq_rq_update_aborted_gstate(req, 0);
>  		blk_add_timer(req);
> +		smp_wmb();
> +		blk_mq_rq_update_aborted_gstate(req, 0);

Without the matching rmb, just adding rmb won't do much but given the
default strong ordering on x86 and other operations around, what you
were seeing is probably not caused by lack of barriers.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-07 20:07         ` tj
@ 2018-02-07 23:48           ` Bart Van Assche
  2018-02-08  1:09             ` Bart Van Assche
  0 siblings, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-07 23:48 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gV2VkLCAyMDE4LTAyLTA3IGF0IDEyOjA3IC0wODAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBBaCwgeW91J3JlIHJpZ2h0LiAgdTY0X3N0YXRfc3luYyBkb2Vzbid0IGltcGx5IGJhcnJpZXJz
LCBzbyB3ZSB3YW50DQo+IHNvbWV0aGluZyBsaWtlIHRoZSBmb2xsb3dpbmcuDQo+IA0KPiBkaWZm
IC0tZ2l0IGEvYmxvY2svYmxrLW1xLmMgYi9ibG9jay9ibGstbXEuYw0KPiBpbmRleCBkZjkzMTAy
Li5kNmVkZjNiIDEwMDY0NA0KPiAtLS0gYS9ibG9jay9ibGstbXEuYw0KPiArKysgYi9ibG9jay9i
bGstbXEuYw0KPiBAQCAtNTkzLDcgKzU5Myw3IEBAIHN0YXRpYyB2b2lkIGJsa19tcV9ycV91cGRh
dGVfYWJvcnRlZF9nc3RhdGUoc3RydWN0IHJlcXVlc3QgKnJxLCB1NjQgZ3N0YXRlKQ0KPiAgCSAq
Lw0KPiAgCWxvY2FsX2lycV9zYXZlKGZsYWdzKTsNCj4gIAl1NjRfc3RhdHNfdXBkYXRlX2JlZ2lu
KCZycS0+YWJvcnRlZF9nc3RhdGVfc3luYyk7DQo+IC0JcnEtPmFib3J0ZWRfZ3N0YXRlID0gZ3N0
YXRlOw0KPiArCXNtcF9zdG9yZV9yZWxlYXNlKCZycS0+YWJvcnRlZF9nc3RhdGUsIGdzdGF0ZSk7
DQo+ICAJdTY0X3N0YXRzX3VwZGF0ZV9lbmQoJnJxLT5hYm9ydGVkX2dzdGF0ZV9zeW5jKTsNCj4g
IAlsb2NhbF9pcnFfcmVzdG9yZShmbGFncyk7DQo+ICB9DQo+IEBAIC02MDUsNyArNjA1LDcgQEAg
c3RhdGljIHU2NCBibGtfbXFfcnFfYWJvcnRlZF9nc3RhdGUoc3RydWN0IHJlcXVlc3QgKnJxKQ0K
PiAgDQo+ICAJZG8gew0KPiAgCQlzdGFydCA9IHU2NF9zdGF0c19mZXRjaF9iZWdpbigmcnEtPmFi
b3J0ZWRfZ3N0YXRlX3N5bmMpOw0KPiAtCQlhYm9ydGVkX2dzdGF0ZSA9IHJxLT5hYm9ydGVkX2dz
dGF0ZTsNCj4gKwkJYWJvcnRlZF9nc3RhdGUgPSBzbXBfbG9hZF9hY3F1aXJlKCZycS0+YWJvcnRl
ZF9nc3RhdGUpOw0KPiAgCX0gd2hpbGUgKHU2NF9zdGF0c19mZXRjaF9yZXRyeSgmcnEtPmFib3J0
ZWRfZ3N0YXRlX3N5bmMsIHN0YXJ0KSk7DQo+ICANCj4gIAlyZXR1cm4gYWJvcnRlZF9nc3RhdGU7
DQo+IEBAIC04MzYsOCArODM2LDggQEAgc3RhdGljIHZvaWQgYmxrX21xX3JxX3RpbWVkX291dChz
dHJ1Y3QgcmVxdWVzdCAqcmVxLCBib29sIHJlc2VydmVkKQ0KPiAgCQkgKiAtPmFib3J0ZWRfZ3N0
YXRlIGlzIHNldCwgdGhpcyBtYXkgbGVhZCB0byBpZ25vcmVkDQo+ICAJCSAqIGNvbXBsZXRpb25z
IGFuZCBmdXJ0aGVyIHNwdXJpb3VzIHRpbWVvdXRzLg0KPiAgCQkgKi8NCj4gLQkJYmxrX21xX3Jx
X3VwZGF0ZV9hYm9ydGVkX2dzdGF0ZShyZXEsIDApOw0KPiAgCQlibGtfYWRkX3RpbWVyKHJlcSk7
DQo+ICsJCWJsa19tcV9ycV91cGRhdGVfYWJvcnRlZF9nc3RhdGUocmVxLCAwKTsNCj4gIAkJYnJl
YWs7DQo+ICAJY2FzZSBCTEtfRUhfTk9UX0hBTkRMRUQ6DQo+ICAJCWJyZWFrOw0KDQpIZWxsbyBU
ZWp1biwNCg0KV2l0aCB0aGlzIHBhdGNoIGFwcGxpZWQgSSBzZWUgcmVxdWVzdHMgZm9yIHdoaWNo
IGl0IHNlZW1zIGxpa2UgdGhlIHRpbWVvdXQgaGFuZGxlcg0KZGlkIG5vdCBnZXQgaW52b2tlZDoN
Cg0Kc2RjL2hjdHgwL2J1c3k6MDAwMDAwMDA5NWUwNGI3YyB7Lm9wPVdSSVRFLCAuY21kX2ZsYWdz
PUZBSUxGQVNUX1RSQU5TUE9SVHxTWU5DfE4NCk9NRVJHRXxJRExFLCAucnFfZmxhZ3M9TVFfSU5G
TElHSFR8RE9OVFBSRVB8SU9fU1RBVHxNUV9USU1FT1VUX0VYUElSRUQsIC5zdGF0ZT1pDQpuX2Zs
aWdodCwgLmdzdGF0ZT0weGVkLzB4ZWQsIC50YWc9MjYsIC5pbnRlcm5hbF90YWc9LTEsIC5jbWQ9
V3JpdGUoMTApIDJhIDAwIDAwICANCjAwIDYwIGJhIDAwIDAwIDA4IDAwLCAucmV0cmllcz0wLCAu
cmVzdWx0ID0gMHg1MDAwMCwgLmZsYWdzPVRBR0dFRHxJTklUSUFMSVpFRCwgIA0KLnRpbWVvdXQ9
MS4wMDAsIGFsbG9jYXRlZCAxMDkzLjE4MCBzIGFnb30NCg0Kc2RjL2hjdHgwL2J1c3k6MDAwMDAw
MDA2NWE2NGU5YiB7Lm9wPVdSSVRFLCAuY21kX2ZsYWdzPUZBSUxGQVNUX1RSQU5TUE9SVHxTWU5D
fE4NCk9NRVJHRXxJRExFLCAucnFfZmxhZ3M9TVFfSU5GTElHSFR8RE9OVFBSRVB8SU9fU1RBVHxN
UV9USU1FT1VUX0VYUElSRUQsIC5zdGF0ZT1pDQpuX2ZsaWdodCwgLmdzdGF0ZT0weDUvMHg1LCAu
dGFnPTI3LCAuaW50ZXJuYWxfdGFnPS0xLCAuY21kPVdyaXRlKDEwKSAyYSAwMCAwMCAwMA0KIDYy
IGQyIDAwIDAwIDA4IDAwLCAucmV0cmllcz0wLCAucmVzdWx0ID0gMHg1MDAwMCwgLmZsYWdzPVRB
R0dFRHxJTklUSUFMSVpFRCwgLnQNCmltZW91dD0xLjAwMCwgYWxsb2NhdGVkIDEwOTMuMTgwIHMg
YWdvfQ0KDQpbIC4uLiBdDQoNCnNkYy9oY3R4My9idXN5OjAwMDAwMDAwNDc5Y2MyYTkgey5vcD1X
UklURSwgLmNtZF9mbGFncz1GQUlMRkFTVF9UUkFOU1BPUlR8U1lOQ3xODQpPTUVSR0V8SURMRSwg
LnJxX2ZsYWdzPU1RX0lORkxJR0hUfERPTlRQUkVQfElPX1NUQVQsIC5zdGF0ZT1pbl9mbGlnaHQs
IC5nc3RhdGU9MA0KeDExLzB4MTEsIC50YWc9NTcsIC5pbnRlcm5hbF90YWc9LTEsIC5jbWQ9V3Jp
dGUoMTApIDJhIDAwIDAwIDAwIDYxIGQyIDAwIDAwIDA4IDANCjAsIC5yZXRyaWVzPTAsIC5yZXN1
bHQgPSAweDAsIC5mbGFncz1UQUdHRUR8SU5JVElBTElaRUQsIC50aW1lb3V0PTEuMDAwLCBhbGxv
Y2F0DQplZCAxMDkzLjE1MCBzIGFnb30NCg0Kc2RjL2hjdHgzL2J1c3k6MDAwMDAwMDA4ZmQxMzBk
NSB7Lm9wPVdSSVRFLCAuY21kX2ZsYWdzPUZBSUxGQVNUX1RSQU5TUE9SVHxTWU5DfE4NCk9NRVJH
RXxJRExFLCAucnFfZmxhZ3M9TVFfSU5GTElHSFR8RE9OVFBSRVB8SU9fU1RBVCwgLnN0YXRlPWlu
X2ZsaWdodCwgLmdzdGF0ZT0wDQp4ZC8weGQsIC50YWc9NjEsIC5pbnRlcm5hbF90YWc9LTEsIC5j
bWQ9V3JpdGUoMTApIDJhIDAwIDAwIDAwIGMzIDk0IDAwIDAwIDA4IDAwLA0KIC5yZXRyaWVzPTAs
IC5yZXN1bHQgPSAweDAsIC5mbGFncz1UQUdHRUR8SU5JVElBTElaRUQsIC50aW1lb3V0PTEuMDAw
LCBhbGxvY2F0ZWQNCiAxMDkzLjE0MCBzIGFnb30NCg0KQXMgb25lIGNhbiBzZWUgZm9yIHNvbWUg
cmVxdWVzdHMgTVFfVElNRU9VVF9FWFBJUkVEIGlzIHNldCBhbmQgLnJlc3VsdCA9IDB4NTAwMDAu
DQpUaGUgdmFsdWUgb2YgLnJlc3VsdCBtZWFucyB0aGF0IHRoZSBTQ1NJIGVycm9yIGhhbmRsZXIg
aGFzIHN1Ym1pdHRlZCBhbiBhYm9ydCAoc2VlDQphbHNvIHNjbW5kLT5yZXN1bHQgPSBESURfQUJP
UlQgPDwgMTYgaW4gZHJpdmVycy9pbmZpbmliYW5kL3VscC9zcnAvaWJfc3JwLmMpLiBGb3INCnRo
ZSBsYXN0IHR3byByZXF1ZXN0cyBzaG93biBhYm92ZSBob3dldmVyIE1RX1RJTUVPVVRfRVhQSVJF
RCBpcyBub3Qgc2V0IGFuZCB0aGUNClNDU0kgcmVzdWx0IGhhcyB2YWx1ZSAwLiBJIHRoaW5rIHRo
YXQgbWVhbnMgdGhhdCBpdCBjYW4gaGFwcGVuIHRoYXQgYSByZXF1ZXN0DQp0aW1lcyBvdXQgYnV0
IHRoYXQgdGhlIHRpbWVvdXQgaGFuZGxlciBkb2VzIG5vdCBnZXQgaW52b2tlZCAuLi4NCg0KVGhh
bmtzLA0KDQpCYXJ0Lg0KDQoNCg0K

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-07 23:48           ` Bart Van Assche
@ 2018-02-08  1:09             ` Bart Van Assche
  2018-02-08 15:39               ` tj
  0 siblings, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-08  1:09 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gV2VkLCAyMDE4LTAyLTA3IGF0IDIzOjQ4ICswMDAwLCBCYXJ0IFZhbiBBc3NjaGUgd3JvdGU6
DQo+IFdpdGggdGhpcyBwYXRjaCBhcHBsaWVkIEkgc2VlIHJlcXVlc3RzIGZvciB3aGljaCBpdCBz
ZWVtcyBsaWtlIHRoZSB0aW1lb3V0IGhhbmRsZXINCj4gZGlkIG5vdCBnZXQgaW52b2tlZDogWyAu
Li4gXQ0KDQpJIGp1c3Qgbm90aWNlZCB0aGUgZm9sbG93aW5nIGluIHRoZSBzeXN0ZW0gbG9nLCB3
aGljaCBpcyBwcm9iYWJseSB0aGUgcmVhc29uIHdoeSBzb21lDQpyZXF1ZXN0cyBnb3Qgc3R1Y2s6
DQoNCkZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBCVUc6IHVuYWJsZSB0byBoYW5k
bGUga2VybmVsIE5VTEwgcG9pbnRlciBkZXJlZmVyZW5jZSBhdCAgICAgICAgICAgKG51bGwpDQpG
ZWIgIDcgMTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDogSVA6IHNjc2lfdGltZXNfb3V0KzB4MTcv
MHgyYzAgW3Njc2lfbW9kXQ0KRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IFBHRCAw
IFA0RCAwICANCkZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBPb3BzOiAwMDAwIFsj
MV0gUFJFRU1QVCBTTVANCkZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBNb2R1bGVz
IGxpbmtlZCBpbjogZG1fc2VydmljZV90aW1lIGliX3NycCBsaWJjcmMzMmMgY3JjMzJjX2dlbmVy
aWMgc2NzaV90cmFuc3BvcnRfc3JwIHRhcmdldF9jb3JlX3BzY3NpIHRhcmdldF9jb3JlX2ZpbGUg
aWJfc3JwdCB0YXJnZXRfY29yZV9pYmxvY2sgdGFyZ2V0X2NvcmVfbW9kDQpyZG1hX3J4ZSBpcDZf
dWRwX3R1bm5lbCB1ZHBfdHVubmVsIGliX3VtYWQgaWJfdXZlcmJzIHNjc2lfZGVidWcgYnJkIG1x
X2RlYWRsaW5lIGt5YmVyX2lvc2NoZWQgZGVhZGxpbmVfaW9zY2hlZCBjZnFfaW9zY2hlZCBiZnEg
Y3JjdDEwZGlmX3BjbG11bCBjcmMzMl9wY2xtdWwgYWZfcGFja2V0IGdoYXNoX2NsbXVsbmlfaW50
ZWwgcGNiYw0KYWVzbmlfaW50ZWwgYWVzX3g4Nl82NCBjcnlwdG9fc2ltZCBjcnlwdGQgZ2x1ZV9o
ZWxwZXIgc2VyaW9fcmF3IHZpcnRpb19jb25zb2xlIHZpcnRpb19iYWxsb29uIHNnIGJ1dHRvbiBp
MmNfcGlpeDQgZG1fbXVsdGlwYXRoIGRtX21vZCBkYXggc2NzaV9kaF9yZGFjIHNjc2lfZGhfZW1j
IHNjc2lfZGhfYWx1YSBpYl9pc2VyIHJkbWFfY20gaXdfY20NCmliX2NtIGliX2NvcmUgY29uZmln
ZnMgaXNjc2lfdGNwIGxpYmlzY3NpX3RjcCBsaWJpc2NzaSBzY3NpX3RyYW5zcG9ydF9pc2NzaSBp
cF90YWJsZXMgeF90YWJsZXMgaXB2NiBhdXRvZnM0IGV4dDQgY3JjMTYgbWJjYWNoZSBqYmQyIHNk
X21vZCB2aXJ0aW9fbmV0IHZpcnRpb19ibGsgdmlydGlvX3Njc2kgc3JfbW9kIGNkcm9tIGF0YV9n
ZW5lcmljDQpwYXRhX2FjcGkgcHNtb3VzZSBjcmMzMmNfaW50ZWwgaTJjX2NvcmUgYXRrYmQNCkZl
YiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiB1aGNpX2hjZCBlaGNpX2hjZCBpbnRlbF9h
Z3AgYXRhX3BpaXggaW50ZWxfZ3R0IGFncGdhcnQgbGliYXRhIHZpcnRpb19wY2kgdXNiY29yZSB2
aXJ0aW9fcmluZyB2aXJ0aW8gc2NzaV9tb2QgdXNiX2NvbW1vbiB1bml4DQpGZWIgIDcgMTU6MTY6
MjYgdWJ1bnR1LXZtIGtlcm5lbDogQ1BVOiAwIFBJRDogMTQ2IENvbW06IGt3b3JrZXIvMDoxSCBO
b3QgdGFpbnRlZCA0LjE1LjAtZGJnKyAjMQ0KRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJu
ZWw6IEhhcmR3YXJlIG5hbWU6IFFFTVUgU3RhbmRhcmQgUEMgKGk0NDBGWCArIFBJSVgsIDE5OTYp
LCBCSU9TIDEuMC4wLXByZWJ1aWx0LnFlbXUtcHJvamVjdC5vcmcgMDQvMDEvMjAxNA0KRmViICA3
IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IFdvcmtxdWV1ZToga2Jsb2NrZCBibGtfbXFfdGlt
ZW91dF93b3JrDQpGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDogUklQOiAwMDEwOnNj
c2lfdGltZXNfb3V0KzB4MTcvMHgyYzAgW3Njc2lfbW9kXQ0KRmViICA3IDE1OjE2OjI2IHVidW50
dS12bSBrZXJuZWw6IFJTUDogMDAxODpmZmZmOThmMGMwMmFiZDU4IEVGTEFHUzogMDAwMTAyNDYN
CkZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBSQVg6IDAwMDAwMDAwMDAwMDAwMDAg
UkJYOiBmZmZmOTY1ZGUyYjNhMmMwIFJDWDogMDAwMDAwMDAwMDAwMDAwMA0KRmViICA3IDE1OjE2
OjI2IHVidW50dS12bSBrZXJuZWw6IFJEWDogZmZmZmZmZmZjMDA5NDc0MCBSU0k6IDAwMDAwMDAw
MDAwMDAwMDAgUkRJOiBmZmZmOTY1ZGUyYjNhMmMwDQpGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1LXZt
IGtlcm5lbDogUkJQOiBmZmZmOTY1ZGUyYjNhNDM4IFIwODogZmZmZmZmZmZmZmZmZmZmYyBSMDk6
IDAwMDAwMDAwMDAwMDAwMDcNCkZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBSMTA6
IDAwMDAwMDAwMDAwMDAwMDAgUjExOiAwMDAwMDAwMDAwMDAwMDAwIFIxMjogMDAwMDAwMDAwMDAw
MDAwMg0KRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IFIxMzogMDAwMDAwMDAwMDAw
MDAwMCBSMTQ6IGZmZmY5NjVkZTJhNDQyMTggUjE1OiBmZmZmOTY1ZGUyYTQ4NzI4DQpGZWIgIDcg
MTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDogRlM6ICAwMDAwMDAwMDAwMDAwMDAwKDAwMDApIEdT
OmZmZmY5NjVkZmZjMDAwMDAoMDAwMCkga25sR1M6MDAwMDAwMDAwMDAwMDAwMA0KRmViICA3IDE1
OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IENTOiAgMDAxMCBEUzogMDAwMCBFUzogMDAwMCBDUjA6
IDAwMDAwMDAwODAwNTAwMzMNCkZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBDUjI6
IDAwMDAwMDAwMDAwMDAwMDAgQ1IzOiAwMDAwMDAwMDNjZTBmMDAzIENSNDogMDAwMDAwMDAwMDM2
MDZmMA0KRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IERSMDogMDAwMDAwMDAwMDAw
MDAwMCBEUjE6IDAwMDAwMDAwMDAwMDAwMDAgRFIyOiAwMDAwMDAwMDAwMDAwMDAwDQpGZWIgIDcg
MTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDogRFIzOiAwMDAwMDAwMDAwMDAwMDAwIERSNjogMDAw
MDAwMDBmZmZlMGZmMCBEUjc6IDAwMDAwMDAwMDAwMDA0MDANCkZlYiAgNyAxNToxNjoyNiB1YnVu
dHUtdm0ga2VybmVsOiBDYWxsIFRyYWNlOg0KRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJu
ZWw6IGJsa19tcV90ZXJtaW5hdGVfZXhwaXJlZCsweDQyLzB4ODANCkZlYiAgNyAxNToxNjoyNiB1
YnVudHUtdm0ga2VybmVsOiBidF9pdGVyKzB4M2QvMHg1MA0KRmViICA3IDE1OjE2OjI2IHVidW50
dS12bSBrZXJuZWw6IGJsa19tcV9xdWV1ZV90YWdfYnVzeV9pdGVyKzB4ZTkvMHgyMDANCkZlYiAg
NyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBibGtfbXFfdGltZW91dF93b3JrKzB4MTgxLzB4
MmUwDQpGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDogcHJvY2Vzc19vbmVfd29yaysw
eDIxYy8weDZkMA0KRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IHdvcmtlcl90aHJl
YWQrMHgzNS8weDM4MA0KRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IGt0aHJlYWQr
MHgxMTcvMHgxMzANCkZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiByZXRfZnJvbV9m
b3JrKzB4MjQvMHgzMA0KRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IENvZGU6IGZm
IGZmIDBmIGZmIGU5IGZkIGZlIGZmIGZmIDkwIDY2IDJlIDBmIDFmIDg0IDAwIDAwIDAwIDAwIDAw
IDQxIDU1IDQxIDU0IDU1IDQ4IDhkIGFmIDc4IDAxIDAwIDAwIDUzIDQ4IDhiIDg3IGIwIDAxIDAw
IDAwIDQ4IDg5IGZiIDw0Yz4gOGIgMjggMGYgMWYgNDQgMDAgMDAgNjUNCjhiIDA1IDZhIGI1IGY4
IDNmIDgzIGY4IDBmIDBmIDg3IGVkICANCkZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVs
OiBSSVA6IHNjc2lfdGltZXNfb3V0KzB4MTcvMHgyYzAgW3Njc2lfbW9kXSBSU1A6IGZmZmY5OGYw
YzAyYWJkNTgNCkZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBDUjI6IDAwMDAwMDAw
MDAwMDAwMDANCkZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiAtLS1bIGVuZCB0cmFj
ZSBjZTZjMjBkOTViYWI0NTBlIF0tLS0NCg0KQmFydC4NCg0KDQoNCg0KDQoNCg==

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-08  1:09             ` Bart Van Assche
@ 2018-02-08 15:39               ` tj
  2018-02-08 15:40                 ` tj
  2018-02-08 16:31                 ` Bart Van Assche
  0 siblings, 2 replies; 28+ messages in thread
From: tj @ 2018-02-08 15:39 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

On Thu, Feb 08, 2018 at 01:09:57AM +0000, Bart Van Assche wrote:
> On Wed, 2018-02-07 at 23:48 +0000, Bart Van Assche wrote:
> > With this patch applied I see requests for which it seems like the timeout handler
> > did not get invoked: [ ... ]
> 
> I just noticed the following in the system log, which is probably the reason why some
> requests got stuck:
> 
> Feb  7 15:16:26 ubuntu-vm kernel: BUG: unable to handle kernel NULL pointer dereference at           (null)
> Feb  7 15:16:26 ubuntu-vm kernel: IP: scsi_times_out+0x17/0x2c0 [scsi_mod]
> Feb  7 15:16:26 ubuntu-vm kernel: PGD 0 P4D 0  
> Feb  7 15:16:26 ubuntu-vm kernel: Oops: 0000 [#1] PREEMPT SMP
> Feb  7 15:16:26 ubuntu-vm kernel: Modules linked in: dm_service_time ib_srp libcrc32c crc32c_generic scsi_transport_srp target_core_pscsi target_core_file ib_srpt target_core_iblock target_core_mod
> rdma_rxe ip6_udp_tunnel udp_tunnel ib_umad ib_uverbs scsi_debug brd mq_deadline kyber_iosched deadline_iosched cfq_iosched bfq crct10dif_pclmul crc32_pclmul af_packet ghash_clmulni_intel pcbc
> aesni_intel aes_x86_64 crypto_simd cryptd glue_helper serio_raw virtio_console virtio_balloon sg button i2c_piix4 dm_multipath dm_mod dax scsi_dh_rdac scsi_dh_emc scsi_dh_alua ib_iser rdma_cm iw_cm
> ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables ipv6 autofs4 ext4 crc16 mbcache jbd2 sd_mod virtio_net virtio_blk virtio_scsi sr_mod cdrom ata_generic
> pata_acpi psmouse crc32c_intel i2c_core atkbd
> Feb  7 15:16:26 ubuntu-vm kernel: uhci_hcd ehci_hcd intel_agp ata_piix intel_gtt agpgart libata virtio_pci usbcore virtio_ring virtio scsi_mod usb_common unix
> Feb  7 15:16:26 ubuntu-vm kernel: CPU: 0 PID: 146 Comm: kworker/0:1H Not tainted 4.15.0-dbg+ #1
> Feb  7 15:16:26 ubuntu-vm kernel: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
> Feb  7 15:16:26 ubuntu-vm kernel: Workqueue: kblockd blk_mq_timeout_work
> Feb  7 15:16:26 ubuntu-vm kernel: RIP: 0010:scsi_times_out+0x17/0x2c0 [scsi_mod]
> Feb  7 15:16:26 ubuntu-vm kernel: RSP: 0018:ffff98f0c02abd58 EFLAGS: 00010246
> Feb  7 15:16:26 ubuntu-vm kernel: RAX: 0000000000000000 RBX: ffff965de2b3a2c0 RCX: 0000000000000000
> Feb  7 15:16:26 ubuntu-vm kernel: RDX: ffffffffc0094740 RSI: 0000000000000000 RDI: ffff965de2b3a2c0
> Feb  7 15:16:26 ubuntu-vm kernel: RBP: ffff965de2b3a438 R08: fffffffffffffffc R09: 0000000000000007
> Feb  7 15:16:26 ubuntu-vm kernel: R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002
> Feb  7 15:16:26 ubuntu-vm kernel: R13: 0000000000000000 R14: ffff965de2a44218 R15: ffff965de2a48728
> Feb  7 15:16:26 ubuntu-vm kernel: FS:  0000000000000000(0000) GS:ffff965dffc00000(0000) knlGS:0000000000000000
> Feb  7 15:16:26 ubuntu-vm kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> Feb  7 15:16:26 ubuntu-vm kernel: CR2: 0000000000000000 CR3: 000000003ce0f003 CR4: 00000000003606f0
> Feb  7 15:16:26 ubuntu-vm kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> Feb  7 15:16:26 ubuntu-vm kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Feb  7 15:16:26 ubuntu-vm kernel: Call Trace:
> Feb  7 15:16:26 ubuntu-vm kernel: blk_mq_terminate_expired+0x42/0x80
> Feb  7 15:16:26 ubuntu-vm kernel: bt_iter+0x3d/0x50
> Feb  7 15:16:26 ubuntu-vm kernel: blk_mq_queue_tag_busy_iter+0xe9/0x200
> Feb  7 15:16:26 ubuntu-vm kernel: blk_mq_timeout_work+0x181/0x2e0
> Feb  7 15:16:26 ubuntu-vm kernel: process_one_work+0x21c/0x6d0
> Feb  7 15:16:26 ubuntu-vm kernel: worker_thread+0x35/0x380
> Feb  7 15:16:26 ubuntu-vm kernel: kthread+0x117/0x130
> Feb  7 15:16:26 ubuntu-vm kernel: ret_from_fork+0x24/0x30
> Feb  7 15:16:26 ubuntu-vm kernel: Code: ff ff 0f ff e9 fd fe ff ff 90 66 2e 0f 1f 84 00 00 00 00 00 41 55 41 54 55 48 8d af 78 01 00 00 53 48 8b 87 b0 01 00 00 48 89 fb <4c> 8b 28 0f 1f 44 00 00 65
> 8b 05 6a b5 f8 3f 83 f8 0f 0f 87 ed  
> Feb  7 15:16:26 ubuntu-vm kernel: RIP: scsi_times_out+0x17/0x2c0 [scsi_mod] RSP: ffff98f0c02abd58
> Feb  7 15:16:26 ubuntu-vm kernel: CR2: 0000000000000000
> Feb  7 15:16:26 ubuntu-vm kernel: ---[ end trace ce6c20d95bab450e ]---

So, that's dereferencing %rax which is NULL.  That gotta be the ->host
deref in the following.

  enum blk_eh_timer_return scsi_times_out(struct request *req)
  {
	  struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
	  enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED;
	  struct Scsi_Host *host = scmd->device->host;
  ...

That sounds more like a scsi hotplug but than an issue in the timeout
code unless we messed up @req pointer to begin with.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-08 15:39               ` tj
@ 2018-02-08 15:40                 ` tj
  2018-02-08 16:31                 ` Bart Van Assche
  1 sibling, 0 replies; 28+ messages in thread
From: tj @ 2018-02-08 15:40 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

On Thu, Feb 08, 2018 at 07:39:40AM -0800, tj@kernel.org wrote:
> That sounds more like a scsi hotplug but than an issue in the timeout
                                       ^bug
> code unless we messed up @req pointer to begin with.

-- 
tejun

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-08 15:39               ` tj
  2018-02-08 15:40                 ` tj
@ 2018-02-08 16:31                 ` Bart Van Assche
  2018-02-08 17:00                   ` tj
  2018-02-13 21:20                   ` tj
  1 sibling, 2 replies; 28+ messages in thread
From: Bart Van Assche @ 2018-02-08 16:31 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gVGh1LCAyMDE4LTAyLTA4IGF0IDA3OjM5IC0wODAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBPbiBUaHUsIEZlYiAwOCwgMjAxOCBhdCAwMTowOTo1N0FNICswMDAwLCBCYXJ0IFZhbiBBc3Nj
aGUgd3JvdGU6DQo+ID4gT24gV2VkLCAyMDE4LTAyLTA3IGF0IDIzOjQ4ICswMDAwLCBCYXJ0IFZh
biBBc3NjaGUgd3JvdGU6DQo+ID4gPiBXaXRoIHRoaXMgcGF0Y2ggYXBwbGllZCBJIHNlZSByZXF1
ZXN0cyBmb3Igd2hpY2ggaXQgc2VlbXMgbGlrZSB0aGUgdGltZW91dCBoYW5kbGVyDQo+ID4gPiBk
aWQgbm90IGdldCBpbnZva2VkOiBbIC4uLiBdDQo+ID4gDQo+ID4gSSBqdXN0IG5vdGljZWQgdGhl
IGZvbGxvd2luZyBpbiB0aGUgc3lzdGVtIGxvZywgd2hpY2ggaXMgcHJvYmFibHkgdGhlIHJlYXNv
biB3aHkgc29tZQ0KPiA+IHJlcXVlc3RzIGdvdCBzdHVjazoNCj4gPiANCj4gPiBGZWIgIDcgMTU6
MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDogQlVHOiB1bmFibGUgdG8gaGFuZGxlIGtlcm5lbCBOVUxM
IHBvaW50ZXIgZGVyZWZlcmVuY2UgYXQgICAgICAgICAgIChudWxsKQ0KPiA+IEZlYiAgNyAxNTox
NjoyNiB1YnVudHUtdm0ga2VybmVsOiBJUDogc2NzaV90aW1lc19vdXQrMHgxNy8weDJjMCBbc2Nz
aV9tb2RdDQo+ID4gRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IFBHRCAwIFA0RCAw
ICANCj4gPiBGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDogT29wczogMDAwMCBbIzFd
IFBSRUVNUFQgU01QDQo+ID4gRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IE1vZHVs
ZXMgbGlua2VkIGluOiBkbV9zZXJ2aWNlX3RpbWUgaWJfc3JwIGxpYmNyYzMyYyBjcmMzMmNfZ2Vu
ZXJpYyBzY3NpX3RyYW5zcG9ydF9zcnAgdGFyZ2V0X2NvcmVfcHNjc2kgdGFyZ2V0X2NvcmVfZmls
ZSBpYl9zcnB0IHRhcmdldF9jb3JlX2libG9jaw0KPiA+IHRhcmdldF9jb3JlX21vZA0KPiA+IHJk
bWFfcnhlIGlwNl91ZHBfdHVubmVsIHVkcF90dW5uZWwgaWJfdW1hZCBpYl91dmVyYnMgc2NzaV9k
ZWJ1ZyBicmQgbXFfZGVhZGxpbmUga3liZXJfaW9zY2hlZCBkZWFkbGluZV9pb3NjaGVkIGNmcV9p
b3NjaGVkIGJmcSBjcmN0MTBkaWZfcGNsbXVsIGNyYzMyX3BjbG11bCBhZl9wYWNrZXQgZ2hhc2hf
Y2xtdWxuaV9pbnRlbCBwY2JjDQo+ID4gYWVzbmlfaW50ZWwgYWVzX3g4Nl82NCBjcnlwdG9fc2lt
ZCBjcnlwdGQgZ2x1ZV9oZWxwZXIgc2VyaW9fcmF3IHZpcnRpb19jb25zb2xlIHZpcnRpb19iYWxs
b29uIHNnIGJ1dHRvbiBpMmNfcGlpeDQgZG1fbXVsdGlwYXRoIGRtX21vZCBkYXggc2NzaV9kaF9y
ZGFjIHNjc2lfZGhfZW1jIHNjc2lfZGhfYWx1YSBpYl9pc2VyIHJkbWFfY20NCj4gPiBpd19jbQ0K
PiA+IGliX2NtIGliX2NvcmUgY29uZmlnZnMgaXNjc2lfdGNwIGxpYmlzY3NpX3RjcCBsaWJpc2Nz
aSBzY3NpX3RyYW5zcG9ydF9pc2NzaSBpcF90YWJsZXMgeF90YWJsZXMgaXB2NiBhdXRvZnM0IGV4
dDQgY3JjMTYgbWJjYWNoZSBqYmQyIHNkX21vZCB2aXJ0aW9fbmV0IHZpcnRpb19ibGsgdmlydGlv
X3Njc2kgc3JfbW9kIGNkcm9tDQo+ID4gYXRhX2dlbmVyaWMNCj4gPiBwYXRhX2FjcGkgcHNtb3Vz
ZSBjcmMzMmNfaW50ZWwgaTJjX2NvcmUgYXRrYmQNCj4gPiBGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1
LXZtIGtlcm5lbDogdWhjaV9oY2QgZWhjaV9oY2QgaW50ZWxfYWdwIGF0YV9waWl4IGludGVsX2d0
dCBhZ3BnYXJ0IGxpYmF0YSB2aXJ0aW9fcGNpIHVzYmNvcmUgdmlydGlvX3JpbmcgdmlydGlvIHNj
c2lfbW9kIHVzYl9jb21tb24gdW5peA0KPiA+IEZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2Vy
bmVsOiBDUFU6IDAgUElEOiAxNDYgQ29tbToga3dvcmtlci8wOjFIIE5vdCB0YWludGVkIDQuMTUu
MC1kYmcrICMxDQo+ID4gRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IEhhcmR3YXJl
IG5hbWU6IFFFTVUgU3RhbmRhcmQgUEMgKGk0NDBGWCArIFBJSVgsIDE5OTYpLCBCSU9TIDEuMC4w
LXByZWJ1aWx0LnFlbXUtcHJvamVjdC5vcmcgMDQvMDEvMjAxNA0KPiA+IEZlYiAgNyAxNToxNjoy
NiB1YnVudHUtdm0ga2VybmVsOiBXb3JrcXVldWU6IGtibG9ja2QgYmxrX21xX3RpbWVvdXRfd29y
aw0KPiA+IEZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBSSVA6IDAwMTA6c2NzaV90
aW1lc19vdXQrMHgxNy8weDJjMCBbc2NzaV9tb2RdDQo+ID4gRmViICA3IDE1OjE2OjI2IHVidW50
dS12bSBrZXJuZWw6IFJTUDogMDAxODpmZmZmOThmMGMwMmFiZDU4IEVGTEFHUzogMDAwMTAyNDYN
Cj4gPiBGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDogUkFYOiAwMDAwMDAwMDAwMDAw
MDAwIFJCWDogZmZmZjk2NWRlMmIzYTJjMCBSQ1g6IDAwMDAwMDAwMDAwMDAwMDANCj4gPiBGZWIg
IDcgMTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDogUkRYOiBmZmZmZmZmZmMwMDk0NzQwIFJTSTog
MDAwMDAwMDAwMDAwMDAwMCBSREk6IGZmZmY5NjVkZTJiM2EyYzANCj4gPiBGZWIgIDcgMTU6MTY6
MjYgdWJ1bnR1LXZtIGtlcm5lbDogUkJQOiBmZmZmOTY1ZGUyYjNhNDM4IFIwODogZmZmZmZmZmZm
ZmZmZmZmYyBSMDk6IDAwMDAwMDAwMDAwMDAwMDcNCj4gPiBGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1
LXZtIGtlcm5lbDogUjEwOiAwMDAwMDAwMDAwMDAwMDAwIFIxMTogMDAwMDAwMDAwMDAwMDAwMCBS
MTI6IDAwMDAwMDAwMDAwMDAwMDINCj4gPiBGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5l
bDogUjEzOiAwMDAwMDAwMDAwMDAwMDAwIFIxNDogZmZmZjk2NWRlMmE0NDIxOCBSMTU6IGZmZmY5
NjVkZTJhNDg3MjgNCj4gPiBGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDogRlM6ICAw
MDAwMDAwMDAwMDAwMDAwKDAwMDApIEdTOmZmZmY5NjVkZmZjMDAwMDAoMDAwMCkga25sR1M6MDAw
MDAwMDAwMDAwMDAwMA0KPiA+IEZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBDUzog
IDAwMTAgRFM6IDAwMDAgRVM6IDAwMDAgQ1IwOiAwMDAwMDAwMDgwMDUwMDMzDQo+ID4gRmViICA3
IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IENSMjogMDAwMDAwMDAwMDAwMDAwMCBDUjM6IDAw
MDAwMDAwM2NlMGYwMDMgQ1I0OiAwMDAwMDAwMDAwMzYwNmYwDQo+ID4gRmViICA3IDE1OjE2OjI2
IHVidW50dS12bSBrZXJuZWw6IERSMDogMDAwMDAwMDAwMDAwMDAwMCBEUjE6IDAwMDAwMDAwMDAw
MDAwMDAgRFIyOiAwMDAwMDAwMDAwMDAwMDAwDQo+ID4gRmViICA3IDE1OjE2OjI2IHVidW50dS12
bSBrZXJuZWw6IERSMzogMDAwMDAwMDAwMDAwMDAwMCBEUjY6IDAwMDAwMDAwZmZmZTBmZjAgRFI3
OiAwMDAwMDAwMDAwMDAwNDAwDQo+ID4gRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6
IENhbGwgVHJhY2U6DQo+ID4gRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IGJsa19t
cV90ZXJtaW5hdGVfZXhwaXJlZCsweDQyLzB4ODANCj4gPiBGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1
LXZtIGtlcm5lbDogYnRfaXRlcisweDNkLzB4NTANCj4gPiBGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1
LXZtIGtlcm5lbDogYmxrX21xX3F1ZXVlX3RhZ19idXN5X2l0ZXIrMHhlOS8weDIwMA0KPiA+IEZl
YiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBibGtfbXFfdGltZW91dF93b3JrKzB4MTgx
LzB4MmUwDQo+ID4gRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IHByb2Nlc3Nfb25l
X3dvcmsrMHgyMWMvMHg2ZDANCj4gPiBGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDog
d29ya2VyX3RocmVhZCsweDM1LzB4MzgwDQo+ID4gRmViICA3IDE1OjE2OjI2IHVidW50dS12bSBr
ZXJuZWw6IGt0aHJlYWQrMHgxMTcvMHgxMzANCj4gPiBGZWIgIDcgMTU6MTY6MjYgdWJ1bnR1LXZt
IGtlcm5lbDogcmV0X2Zyb21fZm9yaysweDI0LzB4MzANCj4gPiBGZWIgIDcgMTU6MTY6MjYgdWJ1
bnR1LXZtIGtlcm5lbDogQ29kZTogZmYgZmYgMGYgZmYgZTkgZmQgZmUgZmYgZmYgOTAgNjYgMmUg
MGYgMWYgODQgMDAgMDAgMDAgMDAgMDAgNDEgNTUgNDEgNTQgNTUgNDggOGQgYWYgNzggMDEgMDAg
MDAgNTMgNDggOGIgODcgYjAgMDEgMDAgMDAgNDggODkgZmIgPDRjPiA4YiAyOCAwZiAxZiA0NCAw
MCAwMA0KPiA+IDY1DQo+ID4gOGIgMDUgNmEgYjUgZjggM2YgODMgZjggMGYgMGYgODcgZWQgIA0K
PiA+IEZlYiAgNyAxNToxNjoyNiB1YnVudHUtdm0ga2VybmVsOiBSSVA6IHNjc2lfdGltZXNfb3V0
KzB4MTcvMHgyYzAgW3Njc2lfbW9kXSBSU1A6IGZmZmY5OGYwYzAyYWJkNTgNCj4gPiBGZWIgIDcg
MTU6MTY6MjYgdWJ1bnR1LXZtIGtlcm5lbDogQ1IyOiAwMDAwMDAwMDAwMDAwMDAwDQo+ID4gRmVi
ICA3IDE1OjE2OjI2IHVidW50dS12bSBrZXJuZWw6IC0tLVsgZW5kIHRyYWNlIGNlNmMyMGQ5NWJh
YjQ1MGUgXS0tLQ0KPiANCj4gU28sIHRoYXQncyBkZXJlZmVyZW5jaW5nICVyYXggd2hpY2ggaXMg
TlVMTC4gIFRoYXQgZ290dGEgYmUgdGhlIC0+aG9zdA0KPiBkZXJlZiBpbiB0aGUgZm9sbG93aW5n
Lg0KPiANCj4gICBlbnVtIGJsa19laF90aW1lcl9yZXR1cm4gc2NzaV90aW1lc19vdXQoc3RydWN0
IHJlcXVlc3QgKnJlcSkNCj4gICB7DQo+IAkgIHN0cnVjdCBzY3NpX2NtbmQgKnNjbWQgPSBibGtf
bXFfcnFfdG9fcGR1KHJlcSk7DQo+IAkgIGVudW0gYmxrX2VoX3RpbWVyX3JldHVybiBydG4gPSBC
TEtfRUhfTk9UX0hBTkRMRUQ7DQo+IAkgIHN0cnVjdCBTY3NpX0hvc3QgKmhvc3QgPSBzY21kLT5k
ZXZpY2UtPmhvc3Q7DQo+ICAgLi4uDQo+IA0KPiBUaGF0IHNvdW5kcyBtb3JlIGxpa2UgYSBzY3Np
IGhvdHBsdWcgYnVnIHRoYW4gYW4gaXNzdWUgaW4gdGhlIHRpbWVvdXQNCj4gY29kZSB1bmxlc3Mg
d2UgbWVzc2VkIHVwIEByZXEgcG9pbnRlciB0byBiZWdpbiB3aXRoLg0KDQpJIGRvbid0IHRoaW5r
IHRoYXQgdGhpcyBpcyByZWxhdGVkIHRvIFNDU0kgaG90cGx1Z2dpbmc6IHRoaXMgY3Jhc2ggZG9l
cyBub3QNCm9jY3VyIHdpdGggdGhlIHY0LjE1IGJsb2NrIGxheWVyIGNvcmUgYW5kIGl0IGRvZXMg
bm90IG9jY3VyIHdpdGggbXkgdGltZW91dA0KaGFuZGxlciByZXdvcmsgcGF0Y2ggYXBwbGllZCBl
aXRoZXIuIEkgdGhpbmsgdGhhdCBtZWFucyB0aGF0IHdlIGNhbm5vdA0KZXhjbHVkZSB0aGUgYmxv
Y2sgbGF5ZXIgY29yZSB0aW1lb3V0IGhhbmRsZXIgcmV3b3JrIGFzIGEgcG9zc2libGUgY2F1c2Uu
DQoNClRoZSBkaXNhc3NlbWJsZXIgb3V0cHV0IGlzIGFzIGZvbGxvd3M6DQoNCihnZGIpIGRpc2Fz
IC9zIHNjc2lfdGltZXNfb3V0DQpEdW1wIG9mIGFzc2VtYmxlciBjb2RlIGZvciBmdW5jdGlvbiBz
Y3NpX3RpbWVzX291dDoNCmRyaXZlcnMvc2NzaS9zY3NpX2Vycm9yLmM6DQoyODIgICAgIHsNCiAg
IDB4MDAwMDAwMDAwMDAwNWJkMCA8KzA+OiAgICAgcHVzaCAgICVyMTMNCiAgIDB4MDAwMDAwMDAw
MDAwNWJkMiA8KzI+OiAgICAgcHVzaCAgICVyMTINCiAgIDB4MDAwMDAwMDAwMDAwNWJkNCA8KzQ+
OiAgICAgcHVzaCAgICVyYnANCi4vaW5jbHVkZS9saW51eC9ibGstbXEuaDoNCjMwMCAgICAgICAg
ICAgICByZXR1cm4gcnEgKyAxOw0KICAgMHgwMDAwMDAwMDAwMDA1YmQ1IDwrNT46ICAgICBsZWEg
ICAgMHgxNzgoJXJkaSksJXJicA0KZHJpdmVycy9zY3NpL3Njc2lfZXJyb3IuYzoNCjI4MiAgICAg
ew0KICAgMHgwMDAwMDAwMDAwMDA1YmRjIDwrMTI+OiAgICBwdXNoICAgJXJieA0KMjgzICAgICAg
ICAgICAgIHN0cnVjdCBzY3NpX2NtbmQgKnNjbWQgPSBibGtfbXFfcnFfdG9fcGR1KHJlcSk7DQoy
ODQgICAgICAgICAgICAgZW51bSBibGtfZWhfdGltZXJfcmV0dXJuIHJ0biA9IEJMS19FSF9OT1Rf
SEFORExFRDsNCjI4NSAgICAgICAgICAgICBzdHJ1Y3QgU2NzaV9Ib3N0ICpob3N0ID0gc2NtZC0+
ZGV2aWNlLT5ob3N0Ow0KICAgMHgwMDAwMDAwMDAwMDA1YmRkIDwrMTM+OiAgICBtb3YgICAgMHgx
YjAoJXJkaSksJXJheA0KMjgyICAgICB7DQogICAweDAwMDAwMDAwMDAwMDViZTQgPCsyMD46ICAg
IG1vdiAgICAlcmRpLCVyYngNCjI4MyAgICAgICAgICAgICBzdHJ1Y3Qgc2NzaV9jbW5kICpzY21k
ID0gYmxrX21xX3JxX3RvX3BkdShyZXEpOw0KMjg0ICAgICAgICAgICAgIGVudW0gYmxrX2VoX3Rp
bWVyX3JldHVybiBydG4gPSBCTEtfRUhfTk9UX0hBTkRMRUQ7DQoyODUgICAgICAgICAgICAgc3Ry
dWN0IFNjc2lfSG9zdCAqaG9zdCA9IHNjbWQtPmRldmljZS0+aG9zdDsNCiAgIDB4MDAwMDAwMDAw
MDAwNWJlNyA8KzIzPjogICAgbW92ICAgICglcmF4KSwlcjEzDQogICAweDAwMDAwMDAwMDAwMDVi
ZWEgPCsyNj46ICAgIG5vcGwgICAweDAoJXJheCwlcmF4LDEpDQpbIC4uLiBdDQooZ2RiKSBwcmlu
dCAveCBzaXplb2Yoc3RydWN0IHJlcXVlc3QpDQokMiA9IDB4MTc4DQooZ2RiKSBwcmludCAmKCgo
c3RydWN0IHNjc2lfY21uZCopMCktPmRldmljZSkNCiQ0ID0gKHN0cnVjdCBzY3NpX2RldmljZSAq
KikgMHgzOCA8c2NzaV9jbWRfZ2V0X3NlcmlhbCs4Pg0KKGdkYikgcHJpbnQgJigoKHN0cnVjdCBz
Y3NpX2RldmljZSopMCktPmhvc3QpICAgICAgIA0KJDUgPSAoc3RydWN0IFNjc2lfSG9zdCAqKikg
MHgwDQoNClRoZSBjcmFzaCBpcyByZXBvcnRlZCBhdCBhZGRyZXNzIHNjc2lfdGltZXNfb3V0KzB4
MTcgPT0gc2NzaV90aW1lc19vdXQrMjMuIFRoZQ0KaW5zdHJ1Y3Rpb24gYXQgdGhhdCBhZGRyZXNz
IHRyaWVzIHRvIGRlcmVmZXJlbmNlIHNjc2lfY21uZC5kZXZpY2UgKCVyYXgpLiBUaGUNCnJlZ2lz
dGVyIGR1bXAgc2hvd3MgdGhhdCB0aGF0IHBvaW50ZXIgaGFzIHRoZSB2YWx1ZSBOVUxMLiBUaGUg
b25seSBmdW5jdGlvbiBJDQprbm93IG9mIHRoYXQgY2xlYXJzIHRoZSBzY3NpX2NtbmQuZGV2aWNl
IHBvaW50ZXIgaXMgc2NzaV9yZXFfaW5pdCgpLiBUaGUgb25seQ0KY2FsbGVyIG9mIHRoYXQgZnVu
Y3Rpb24gaW4gdGhlIFNDU0kgY29yZSBpcyBzY3NpX2luaXRpYWxpemVfcnEoKS4gVGhhdCBmdW5j
dGlvbg0KaGFzIHR3byBjYWxsZXJzLCBuYW1lbHkgc2NzaV9pbml0X2NvbW1hbmQoKSBhbmQgYmxr
X2dldF9yZXF1ZXN0KCkuIEhvd2V2ZXIsDQp0aGUgc2NzaV9jbW5kLmRldmljZSBwb2ludGVyIGlz
IG5vdCBjbGVhcmVkIHdoZW4gYSByZXF1ZXN0IGZpbmlzaGVzLiBUaGlzIGlzDQp3aHkgSSB0aGlu
ayB0aGF0IHRoZSBhYm92ZSBjcmFzaCByZXBvcnQgaW5kaWNhdGVzIHRoYXQgc2NzaV90aW1lc19v
dXQoKSB3YXMNCmNhbGxlZCBmb3IgYSByZXF1ZXN0IHRoYXQgd2FzIGJlaW5nIHJlaW5pdGlhbGl6
ZWQgYW5kIG5vdCBieSBkZXZpY2UgaG90cGx1Z2dpbmcuDQoNClRoYW5rcywNCg0KQmFydC4NCg0K
DQo=

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-08 16:31                 ` Bart Van Assche
@ 2018-02-08 17:00                   ` tj
  2018-02-08 17:10                     ` Bart Van Assche
  2018-02-13 21:20                   ` tj
  1 sibling, 1 reply; 28+ messages in thread
From: tj @ 2018-02-08 17:00 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

Hello, Bart.

On Thu, Feb 08, 2018 at 04:31:43PM +0000, Bart Van Assche wrote:
> > That sounds more like a scsi hotplug bug than an issue in the timeout
> > code unless we messed up @req pointer to begin with.
> 
> I don't think that this is related to SCSI hotplugging: this crash does not
> occur with the v4.15 block layer core and it does not occur with my timeout
> handler rework patch applied either. I think that means that we cannot
> exclude the block layer core timeout handler rework as a possible cause.
> 
> The disassembler output is as follows:
> 
> (gdb) disas /s scsi_times_out
> Dump of assembler code for function scsi_times_out:
> drivers/scsi/scsi_error.c:
> 282     {
>    0x0000000000005bd0 <+0>:     push   %r13
>    0x0000000000005bd2 <+2>:     push   %r12
>    0x0000000000005bd4 <+4>:     push   %rbp
> ./include/linux/blk-mq.h:
> 300             return rq + 1;
>    0x0000000000005bd5 <+5>:     lea    0x178(%rdi),%rbp
> drivers/scsi/scsi_error.c:
> 282     {
>    0x0000000000005bdc <+12>:    push   %rbx
> 283             struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
> 284             enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED;
> 285             struct Scsi_Host *host = scmd->device->host;
>    0x0000000000005bdd <+13>:    mov    0x1b0(%rdi),%rax
> 282     {
>    0x0000000000005be4 <+20>:    mov    %rdi,%rbx
> 283             struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
> 284             enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED;
> 285             struct Scsi_Host *host = scmd->device->host;
>    0x0000000000005be7 <+23>:    mov    (%rax),%r13
>    0x0000000000005bea <+26>:    nopl   0x0(%rax,%rax,1)
> [ ... ]
> (gdb) print /x sizeof(struct request)
> $2 = 0x178
> (gdb) print &(((struct scsi_cmnd*)0)->device)
> $4 = (struct scsi_device **) 0x38 <scsi_cmd_get_serial+8>
> (gdb) print &(((struct scsi_device*)0)->host)       
> $5 = (struct Scsi_Host **) 0x0
> 
> The crash is reported at address scsi_times_out+0x17 == scsi_times_out+23. The
> instruction at that address tries to dereference scsi_cmnd.device (%rax). The
> register dump shows that that pointer has the value NULL. The only function I
> know of that clears the scsi_cmnd.device pointer is scsi_req_init(). The only
> caller of that function in the SCSI core is scsi_initialize_rq(). That function
> has two callers, namely scsi_init_command() and blk_get_request(). However,
> the scsi_cmnd.device pointer is not cleared when a request finishes. This is
> why I think that the above crash report indicates that scsi_times_out() was
> called for a request that was being reinitialized and not by device hotplugging.

I could be misreading it but scsi_cmnd->device dereference should be
the following.

    0x0000000000005bdd <+13>:    mov    0x1b0(%rdi),%rax

%rdi is @req, 0x1b0(%rdi) seems to be the combined arithmetic of
blk_mq_rq_to_pdu() and ->device dereference - 0x178 + 0x38.  The
faulting access is (%rax), which is deref'ing host from device.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-08 17:00                   ` tj
@ 2018-02-08 17:10                     ` Bart Van Assche
  2018-02-08 17:19                       ` tj
  0 siblings, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-08 17:10 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gVGh1LCAyMDE4LTAyLTA4IGF0IDA5OjAwIC0wODAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBPbiBUaHUsIEZlYiAwOCwgMjAxOCBhdCAwNDozMTo0M1BNICswMDAwLCBCYXJ0IFZhbiBBc3Nj
aGUgd3JvdGU6DQo+ID4gVGhlIGNyYXNoIGlzIHJlcG9ydGVkIGF0IGFkZHJlc3Mgc2NzaV90aW1l
c19vdXQrMHgxNyA9PSBzY3NpX3RpbWVzX291dCsyMy4gVGhlDQo+ID4gaW5zdHJ1Y3Rpb24gYXQg
dGhhdCBhZGRyZXNzIHRyaWVzIHRvIGRlcmVmZXJlbmNlIHNjc2lfY21uZC5kZXZpY2UgKCVyYXgp
LiBUaGUNCj4gPiByZWdpc3RlciBkdW1wIHNob3dzIHRoYXQgdGhhdCBwb2ludGVyIGhhcyB0aGUg
dmFsdWUgTlVMTC4gVGhlIG9ubHkgZnVuY3Rpb24gSQ0KPiA+IGtub3cgb2YgdGhhdCBjbGVhcnMg
dGhlIHNjc2lfY21uZC5kZXZpY2UgcG9pbnRlciBpcyBzY3NpX3JlcV9pbml0KCkuIFRoZSBvbmx5
DQo+ID4gY2FsbGVyIG9mIHRoYXQgZnVuY3Rpb24gaW4gdGhlIFNDU0kgY29yZSBpcyBzY3NpX2lu
aXRpYWxpemVfcnEoKS4gVGhhdCBmdW5jdGlvbg0KPiA+IGhhcyB0d28gY2FsbGVycywgbmFtZWx5
IHNjc2lfaW5pdF9jb21tYW5kKCkgYW5kIGJsa19nZXRfcmVxdWVzdCgpLiBIb3dldmVyLA0KPiA+
IHRoZSBzY3NpX2NtbmQuZGV2aWNlIHBvaW50ZXIgaXMgbm90IGNsZWFyZWQgd2hlbiBhIHJlcXVl
c3QgZmluaXNoZXMuIFRoaXMgaXMNCj4gPiB3aHkgSSB0aGluayB0aGF0IHRoZSBhYm92ZSBjcmFz
aCByZXBvcnQgaW5kaWNhdGVzIHRoYXQgc2NzaV90aW1lc19vdXQoKSB3YXMNCj4gPiBjYWxsZWQg
Zm9yIGEgcmVxdWVzdCB0aGF0IHdhcyBiZWluZyByZWluaXRpYWxpemVkIGFuZCBub3QgYnkgZGV2
aWNlIGhvdHBsdWdnaW5nLg0KPiANCj4gSSBjb3VsZCBiZSBtaXNyZWFkaW5nIGl0IGJ1dCBzY3Np
X2NtbmQtPmRldmljZSBkZXJlZmVyZW5jZSBzaG91bGQgYmUNCj4gdGhlIGZvbGxvd2luZy4NCj4g
DQo+ICAgICAweDAwMDAwMDAwMDAwMDViZGQgPCsxMz46ICAgIG1vdiAgICAweDFiMCglcmRpKSwl
cmF4DQo+IA0KPiAlcmRpIGlzIEByZXEsIDB4MWIwKCVyZGkpIHNlZW1zIHRvIGJlIHRoZSBjb21i
aW5lZCBhcml0aG1ldGljIG9mDQo+IGJsa19tcV9ycV90b19wZHUoKSBhbmQgLT5kZXZpY2UgZGVy
ZWZlcmVuY2UgLSAweDE3OCArIDB4MzguICBUaGUNCj4gZmF1bHRpbmcgYWNjZXNzIGlzICglcmF4
KSwgd2hpY2ggaXMgZGVyZWYnaW5nIGhvc3QgZnJvbSBkZXZpY2UuDQoNCkhlbGxvIFRlanVuLA0K
DQpJIHRoaW5rICJkZXJlZmVyZW5jaW5nIGEgcG9pbnRlciIgbWVhbnMgcmVhZGluZyB0aGUgbWVt
b3J5IGxvY2F0aW9uIHRoYXQgcG9pbnRlciBwb2ludHMNCmF0PyBBbnl3YXksIEkgdGhpbmsgd2Ug
Ym90aCBpbnRlcnByZXQgdGhlIGNyYXNoIHJlcG9ydCBpbiB0aGUgc2FtZSB3YXksIG5hbWVseSB0
aGF0IGl0DQptZWFucyB0aGF0IHNjbWQtPmRldmljZSA9PSBOVUxMLg0KDQpUaGFua3MsDQoNCkJh
cnQuDQoNCg0KDQo=

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-08 17:10                     ` Bart Van Assche
@ 2018-02-08 17:19                       ` tj
  2018-02-08 17:37                         ` Bart Van Assche
  0 siblings, 1 reply; 28+ messages in thread
From: tj @ 2018-02-08 17:19 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

Hello, Bart.

On Thu, Feb 08, 2018 at 05:10:45PM +0000, Bart Van Assche wrote:
> I think "dereferencing a pointer" means reading the memory location that pointer points
> at? Anyway, I think we both interpret the crash report in the same way, namely that it
> means that scmd->device == NULL.

No, what I'm trying to say is that it's is the device->host reference
not the scmd->device reference.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-08 17:19                       ` tj
@ 2018-02-08 17:37                         ` Bart Van Assche
  2018-02-08 17:40                           ` tj
  0 siblings, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-08 17:37 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gVGh1LCAyMDE4LTAyLTA4IGF0IDA5OjE5IC0wODAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBIZWxsbywgQmFydC4NCj4gDQo+IE9uIFRodSwgRmViIDA4LCAyMDE4IGF0IDA1OjEwOjQ1UE0g
KzAwMDAsIEJhcnQgVmFuIEFzc2NoZSB3cm90ZToNCj4gPiBJIHRoaW5rICJkZXJlZmVyZW5jaW5n
IGEgcG9pbnRlciIgbWVhbnMgcmVhZGluZyB0aGUgbWVtb3J5IGxvY2F0aW9uIHRoYXQgcG9pbnRl
ciBwb2ludHMNCj4gPiBhdD8gQW55d2F5LCBJIHRoaW5rIHdlIGJvdGggaW50ZXJwcmV0IHRoZSBj
cmFzaCByZXBvcnQgaW4gdGhlIHNhbWUgd2F5LCBuYW1lbHkgdGhhdCBpdA0KPiA+IG1lYW5zIHRo
YXQgc2NtZC0+ZGV2aWNlID09IE5VTEwuDQo+IA0KPiBObywgd2hhdCBJJ20gdHJ5aW5nIHRvIHNh
eSBpcyB0aGF0IGl0J3MgaXMgdGhlIGRldmljZS0+aG9zdCByZWZlcmVuY2UNCj4gbm90IHRoZSBz
Y21kLT5kZXZpY2UgcmVmZXJlbmNlLg0KDQpBZ2FpbiwgSSB0aGluayB0aGF0IG1lYW5zIHRoYXQg
d2UgYWdyZWUgYWJvdXQgdGhlIGludGVycHJldGF0aW9uIG9mIHRoZSBjcmFzaA0KcmVwb3J0LiBU
aGUgYmlnIHF1ZXN0aW9uIGhlcmUgaXMgd2hhdCB0aGUgbmV4dCBzdGVwIHNob3VsZCBiZSB0byBh
bmFseXplIHRoaXMNCmZ1cnRoZXIgYW5kL29yIHRvIGF2b2lkIHRoYXQgdGhpcyBjcmFzaCBjYW4g
b2NjdXI/DQoNClRoYW5rcywNCg0KQmFydC4NCg0KDQoNCg0KDQo=

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-08 17:37                         ` Bart Van Assche
@ 2018-02-08 17:40                           ` tj
  2018-02-08 17:48                             ` Bart Van Assche
  0 siblings, 1 reply; 28+ messages in thread
From: tj @ 2018-02-08 17:40 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

On Thu, Feb 08, 2018 at 05:37:46PM +0000, Bart Van Assche wrote:
> On Thu, 2018-02-08 at 09:19 -0800, tj@kernel.org wrote:
> > Hello, Bart.
> > 
> > On Thu, Feb 08, 2018 at 05:10:45PM +0000, Bart Van Assche wrote:
> > > I think "dereferencing a pointer" means reading the memory location that pointer points
> > > at? Anyway, I think we both interpret the crash report in the same way, namely that it
> > > means that scmd->device == NULL.
> > 
> > No, what I'm trying to say is that it's is the device->host reference
> > not the scmd->device reference.
> 
> Again, I think that means that we agree about the interpretation of the crash
> report. The big question here is what the next step should be to analyze this
> further and/or to avoid that this crash can occur?

Heh, sorry about not being clear.  What I'm trying to say is that
scmd->device != NULL && device->host == NULL.  Or was this what you
were saying all along?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-08 17:40                           ` tj
@ 2018-02-08 17:48                             ` Bart Van Assche
  2018-02-08 17:54                               ` tj
  0 siblings, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-08 17:48 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gVGh1LCAyMDE4LTAyLTA4IGF0IDA5OjQwIC0wODAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBIZWgsIHNvcnJ5IGFib3V0IG5vdCBiZWluZyBjbGVhci4gIFdoYXQgSSdtIHRyeWluZyB0byBz
YXkgaXMgdGhhdA0KPiBzY21kLT5kZXZpY2UgIT0gTlVMTCAmJiBkZXZpY2UtPmhvc3QgPT0gTlVM
TC4gIE9yIHdhcyB0aGlzIHdoYXQgeW91DQo+IHdlcmUgc2F5aW5nIGFsbCBhbG9uZz8NCg0KV2hh
dCBJIGFncmVlIHdpdGggaXMgdGhhdCB0aGUgcmVxdWVzdCBwb2ludGVyIChyZXEgYXJndW1lbnQp
IGlzIHN0b3JlZCBpbiAlcmRpDQphbmQgdGhhdCBtb3YgMHgxYjAoJXJkaSksJXJheCBsb2FkcyBz
Y21kLT5kZXZpY2UgaW50byAlcmF4LiBTaW5jZSBSSVAgPT0NCnNjc2lfdGltZXNfb3V0KzB4MTcs
IHNpbmNlIHRoZSBpbnN0cnVjdGlvbiBhdCB0aGF0IGFkZHJlc3MgdHJpZXMgdG8gZGVyZWZlcmVu
Y2UNCiVyYXggYW5kIHNpbmNlIHRoZSByZWdpc3RlciBkdW1wIHNob3dzIHRoYXQgJXJheCA9PSBO
VUxMIEkgdGhpbmsgdGhhdCBtZWFucyB0aGF0DQpzY21kLT5kZXZpY2UgPT0gTlVMTC4NCg0KVGhh
bmtzLA0KDQpCYXJ0Lg0KDQoNCg0KDQo=

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-08 17:48                             ` Bart Van Assche
@ 2018-02-08 17:54                               ` tj
  0 siblings, 0 replies; 28+ messages in thread
From: tj @ 2018-02-08 17:54 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

On Thu, Feb 08, 2018 at 05:48:32PM +0000, Bart Van Assche wrote:
> On Thu, 2018-02-08 at 09:40 -0800, tj@kernel.org wrote:
> > Heh, sorry about not being clear.  What I'm trying to say is that
> > scmd->device != NULL && device->host == NULL.  Or was this what you
> > were saying all along?
> 
> What I agree with is that the request pointer (req argument) is stored in %rdi
> and that mov 0x1b0(%rdi),%rax loads scmd->device into %rax. Since RIP ==
> scsi_times_out+0x17, since the instruction at that address tries to dereference
> %rax and since the register dump shows that %rax == NULL I think that means that
> scmd->device == NULL.

Ah, I was completely confused about which one had to be NULL.  You're
absolutely right.  Let's continue earlier in the thread.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-08 16:31                 ` Bart Van Assche
  2018-02-08 17:00                   ` tj
@ 2018-02-13 21:20                   ` tj
  2018-02-14 16:58                     ` Bart Van Assche
  1 sibling, 1 reply; 28+ messages in thread
From: tj @ 2018-02-13 21:20 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

Hello, Bart.

Sorry about the delay.

On Thu, Feb 08, 2018 at 04:31:43PM +0000, Bart Van Assche wrote:
> The crash is reported at address scsi_times_out+0x17 == scsi_times_out+23. The
> instruction at that address tries to dereference scsi_cmnd.device (%rax). The
> register dump shows that that pointer has the value NULL. The only function I
> know of that clears the scsi_cmnd.device pointer is scsi_req_init(). The only
> caller of that function in the SCSI core is scsi_initialize_rq(). That function
> has two callers, namely scsi_init_command() and blk_get_request(). However,
> the scsi_cmnd.device pointer is not cleared when a request finishes. This is
> why I think that the above crash report indicates that scsi_times_out() was
> called for a request that was being reinitialized and not by device hotplugging.

Can you please give the following patch a shot?  While timeout path is
synchornizing against the completion path (and the following re-init)
while taking back control of a timed-out request, it wasn't doing that
while giving it back, so the timer registration could race against
completion and re-issue.  I'm still not quite sure how that can lead
to the oops tho.  Anyways, we need something like this one way or the
other.

This isn't the final patch.  We should add batching-up of rcu
synchronize calls similar to the abort path.

Thanks.

diff --git a/block/blk-mq.c b/block/blk-mq.c
index df93102..b66aec3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -816,7 +816,8 @@ struct blk_mq_timeout_data {
 	unsigned int nr_expired;
 };
 
-static void blk_mq_rq_timed_out(struct request *req, bool reserved)
+static void blk_mq_rq_timed_out(struct blk_mq_hw_ctx *hctx, struct request *req,
+				bool reserved)
 {
 	const struct blk_mq_ops *ops = req->q->mq_ops;
 	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
@@ -836,8 +837,12 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
 		 * ->aborted_gstate is set, this may lead to ignored
 		 * completions and further spurious timeouts.
 		 */
-		blk_mq_rq_update_aborted_gstate(req, 0);
 		blk_add_timer(req);
+		if (!(hctx->flags & BLK_MQ_F_BLOCKING))
+			synchronize_rcu();
+		else
+			synchronize_srcu(hctx->srcu);
+		blk_mq_rq_update_aborted_gstate(req, 0);
 		break;
 	case BLK_EH_NOT_HANDLED:
 		break;
@@ -893,7 +898,7 @@ static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,
 	 */
 	if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&
 	    READ_ONCE(rq->gstate) == rq->aborted_gstate)
-		blk_mq_rq_timed_out(rq, reserved);
+		blk_mq_rq_timed_out(hctx, rq, reserved);
 }
 
 static void blk_mq_timeout_work(struct work_struct *work)

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-13 21:20                   ` tj
@ 2018-02-14 16:58                     ` Bart Van Assche
  2018-02-18 13:11                       ` tj
  0 siblings, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-14 16:58 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gVHVlLCAyMDE4LTAyLTEzIGF0IDEzOjIwIC0wODAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBPbiBUaHUsIEZlYiAwOCwgMjAxOCBhdCAwNDozMTo0M1BNICswMDAwLCBCYXJ0IFZhbiBBc3Nj
aGUgd3JvdGU6DQo+ID4gVGhlIGNyYXNoIGlzIHJlcG9ydGVkIGF0IGFkZHJlc3Mgc2NzaV90aW1l
c19vdXQrMHgxNyA9PSBzY3NpX3RpbWVzX291dCsyMy4gVGhlDQo+ID4gaW5zdHJ1Y3Rpb24gYXQg
dGhhdCBhZGRyZXNzIHRyaWVzIHRvIGRlcmVmZXJlbmNlIHNjc2lfY21uZC5kZXZpY2UgKCVyYXgp
LiBUaGUNCj4gPiByZWdpc3RlciBkdW1wIHNob3dzIHRoYXQgdGhhdCBwb2ludGVyIGhhcyB0aGUg
dmFsdWUgTlVMTC4gVGhlIG9ubHkgZnVuY3Rpb24gSQ0KPiA+IGtub3cgb2YgdGhhdCBjbGVhcnMg
dGhlIHNjc2lfY21uZC5kZXZpY2UgcG9pbnRlciBpcyBzY3NpX3JlcV9pbml0KCkuIFRoZSBvbmx5
DQo+ID4gY2FsbGVyIG9mIHRoYXQgZnVuY3Rpb24gaW4gdGhlIFNDU0kgY29yZSBpcyBzY3NpX2lu
aXRpYWxpemVfcnEoKS4gVGhhdCBmdW5jdGlvbg0KPiA+IGhhcyB0d28gY2FsbGVycywgbmFtZWx5
IHNjc2lfaW5pdF9jb21tYW5kKCkgYW5kIGJsa19nZXRfcmVxdWVzdCgpLiBIb3dldmVyLA0KPiA+
IHRoZSBzY3NpX2NtbmQuZGV2aWNlIHBvaW50ZXIgaXMgbm90IGNsZWFyZWQgd2hlbiBhIHJlcXVl
c3QgZmluaXNoZXMuIFRoaXMgaXMNCj4gPiB3aHkgSSB0aGluayB0aGF0IHRoZSBhYm92ZSBjcmFz
aCByZXBvcnQgaW5kaWNhdGVzIHRoYXQgc2NzaV90aW1lc19vdXQoKSB3YXMNCj4gPiBjYWxsZWQg
Zm9yIGEgcmVxdWVzdCB0aGF0IHdhcyBiZWluZyByZWluaXRpYWxpemVkIGFuZCBub3QgYnkgZGV2
aWNlIGhvdHBsdWdnaW5nLg0KPiANCj4gQ2FuIHlvdSBwbGVhc2UgZ2l2ZSB0aGUgZm9sbG93aW5n
IHBhdGNoIGEgc2hvdD8gIFdoaWxlIHRpbWVvdXQgcGF0aCBpcw0KPiBzeW5jaG9ybml6aW5nIGFn
YWluc3QgdGhlIGNvbXBsZXRpb24gcGF0aCAoYW5kIHRoZSBmb2xsb3dpbmcgcmUtaW5pdCkNCj4g
d2hpbGUgdGFraW5nIGJhY2sgY29udHJvbCBvZiBhIHRpbWVkLW91dCByZXF1ZXN0LCBpdCB3YXNu
J3QgZG9pbmcgdGhhdA0KPiB3aGlsZSBnaXZpbmcgaXQgYmFjaywgc28gdGhlIHRpbWVyIHJlZ2lz
dHJhdGlvbiBjb3VsZCByYWNlIGFnYWluc3QNCj4gY29tcGxldGlvbiBhbmQgcmUtaXNzdWUuICBJ
J20gc3RpbGwgbm90IHF1aXRlIHN1cmUgaG93IHRoYXQgY2FuIGxlYWQNCj4gdG8gdGhlIG9vcHMg
dGhvLiAgQW55d2F5cywgd2UgbmVlZCBzb21ldGhpbmcgbGlrZSB0aGlzIG9uZSB3YXkgb3IgdGhl
DQo+IG90aGVyLg0KPiANCj4gVGhpcyBpc24ndCB0aGUgZmluYWwgcGF0Y2guICBXZSBzaG91bGQg
YWRkIGJhdGNoaW5nLXVwIG9mIHJjdQ0KPiBzeW5jaHJvbml6ZSBjYWxscyBzaW1pbGFyIHRvIHRo
ZSBhYm9ydCBwYXRoLg0KPiANCj4gVGhhbmtzLg0KPiANCj4gZGlmZiAtLWdpdCBhL2Jsb2NrL2Js
ay1tcS5jIGIvYmxvY2svYmxrLW1xLmMNCj4gaW5kZXggZGY5MzEwMi4uYjY2YWVjMyAxMDA2NDQN
Cj4gLS0tIGEvYmxvY2svYmxrLW1xLmMNCj4gKysrIGIvYmxvY2svYmxrLW1xLmMNCj4gQEAgLTgx
Niw3ICs4MTYsOCBAQCBzdHJ1Y3QgYmxrX21xX3RpbWVvdXRfZGF0YSB7DQo+ICAJdW5zaWduZWQg
aW50IG5yX2V4cGlyZWQ7DQo+ICB9Ow0KPiAgDQo+IC1zdGF0aWMgdm9pZCBibGtfbXFfcnFfdGlt
ZWRfb3V0KHN0cnVjdCByZXF1ZXN0ICpyZXEsIGJvb2wgcmVzZXJ2ZWQpDQo+ICtzdGF0aWMgdm9p
ZCBibGtfbXFfcnFfdGltZWRfb3V0KHN0cnVjdCBibGtfbXFfaHdfY3R4ICpoY3R4LCBzdHJ1Y3Qg
cmVxdWVzdCAqcmVxLA0KPiArCQkJCWJvb2wgcmVzZXJ2ZWQpDQo+ICB7DQo+ICAJY29uc3Qgc3Ry
dWN0IGJsa19tcV9vcHMgKm9wcyA9IHJlcS0+cS0+bXFfb3BzOw0KPiAgCWVudW0gYmxrX2VoX3Rp
bWVyX3JldHVybiByZXQgPSBCTEtfRUhfUkVTRVRfVElNRVI7DQo+IEBAIC04MzYsOCArODM3LDEy
IEBAIHN0YXRpYyB2b2lkIGJsa19tcV9ycV90aW1lZF9vdXQoc3RydWN0IHJlcXVlc3QgKnJlcSwg
Ym9vbCByZXNlcnZlZCkNCj4gIAkJICogLT5hYm9ydGVkX2dzdGF0ZSBpcyBzZXQsIHRoaXMgbWF5
IGxlYWQgdG8gaWdub3JlZA0KPiAgCQkgKiBjb21wbGV0aW9ucyBhbmQgZnVydGhlciBzcHVyaW91
cyB0aW1lb3V0cy4NCj4gIAkJICovDQo+IC0JCWJsa19tcV9ycV91cGRhdGVfYWJvcnRlZF9nc3Rh
dGUocmVxLCAwKTsNCj4gIAkJYmxrX2FkZF90aW1lcihyZXEpOw0KPiArCQlpZiAoIShoY3R4LT5m
bGFncyAmIEJMS19NUV9GX0JMT0NLSU5HKSkNCj4gKwkJCXN5bmNocm9uaXplX3JjdSgpOw0KPiAr
CQllbHNlDQo+ICsJCQlzeW5jaHJvbml6ZV9zcmN1KGhjdHgtPnNyY3UpOw0KPiArCQlibGtfbXFf
cnFfdXBkYXRlX2Fib3J0ZWRfZ3N0YXRlKHJlcSwgMCk7DQo+ICAJCWJyZWFrOw0KPiAgCWNhc2Ug
QkxLX0VIX05PVF9IQU5ETEVEOg0KPiAgCQlicmVhazsNCj4gQEAgLTg5Myw3ICs4OTgsNyBAQCBz
dGF0aWMgdm9pZCBibGtfbXFfdGVybWluYXRlX2V4cGlyZWQoc3RydWN0IGJsa19tcV9od19jdHgg
KmhjdHgsDQo+ICAJICovDQo+ICAJaWYgKCEocnEtPnJxX2ZsYWdzICYgUlFGX01RX1RJTUVPVVRf
RVhQSVJFRCkgJiYNCj4gIAkgICAgUkVBRF9PTkNFKHJxLT5nc3RhdGUpID09IHJxLT5hYm9ydGVk
X2dzdGF0ZSkNCj4gLQkJYmxrX21xX3JxX3RpbWVkX291dChycSwgcmVzZXJ2ZWQpOw0KPiArCQli
bGtfbXFfcnFfdGltZWRfb3V0KGhjdHgsIHJxLCByZXNlcnZlZCk7DQo+ICB9DQo+ICANCj4gIHN0
YXRpYyB2b2lkIGJsa19tcV90aW1lb3V0X3dvcmsoc3RydWN0IHdvcmtfc3RydWN0ICp3b3JrKQ0K
DQpIZWxsbyBUZWp1biwNCg0KV2l0aCB0aGlzIHBhdGNoIGFwcGxpZWQgdGhlIHRlc3RzIEkgcmFu
IHNvIGZhciBwYXNzLg0KDQpUaGFua3MsDQoNCkJhcnQuDQoNCg0KDQo=

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-14 16:58                     ` Bart Van Assche
@ 2018-02-18 13:11                       ` tj
  2018-02-21 18:53                         ` Bart Van Assche
  0 siblings, 1 reply; 28+ messages in thread
From: tj @ 2018-02-18 13:11 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

Hello, Bart.

On Wed, Feb 14, 2018 at 04:58:56PM +0000, Bart Van Assche wrote:
> With this patch applied the tests I ran so far pass.

Ah, great to hear.  Thanks a lot for testing.  Can you please verify
the following?  It's the same approach but with RCU sync batching.

Thanks.

Index: work/block/blk-mq.c
===================================================================
--- work.orig/block/blk-mq.c
+++ work/block/blk-mq.c
@@ -816,7 +816,8 @@ struct blk_mq_timeout_data {
 	unsigned int nr_expired;
 };
 
-static void blk_mq_rq_timed_out(struct request *req, bool reserved)
+static void blk_mq_rq_timed_out(struct blk_mq_hw_ctx *hctx, struct request *req,
+				int *nr_resets, bool reserved)
 {
 	const struct blk_mq_ops *ops = req->q->mq_ops;
 	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
@@ -831,13 +832,10 @@ static void blk_mq_rq_timed_out(struct r
 		__blk_mq_complete_request(req);
 		break;
 	case BLK_EH_RESET_TIMER:
-		/*
-		 * As nothing prevents from completion happening while
-		 * ->aborted_gstate is set, this may lead to ignored
-		 * completions and further spurious timeouts.
-		 */
-		blk_mq_rq_update_aborted_gstate(req, 0);
 		blk_add_timer(req);
+		req->rq_flags |= RQF_MQ_TIMEOUT_RESET;
+		(*nr_resets)++;
+		hctx->need_sync_rcu = true;
 		break;
 	case BLK_EH_NOT_HANDLED:
 		break;
@@ -874,13 +872,34 @@ static void blk_mq_check_expired(struct
 	    time_after_eq(jiffies, deadline)) {
 		blk_mq_rq_update_aborted_gstate(rq, gstate);
 		data->nr_expired++;
-		hctx->nr_expired++;
+		hctx->need_sync_rcu = true;
 	} else if (!data->next_set || time_after(data->next, deadline)) {
 		data->next = deadline;
 		data->next_set = 1;
 	}
 }
 
+static void blk_mq_timeout_sync_rcu(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	bool has_rcu = false;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (!hctx->need_sync_rcu)
+			continue;
+
+		if (!(hctx->flags & BLK_MQ_F_BLOCKING))
+			has_rcu = true;
+		else
+			synchronize_srcu(hctx->srcu);
+
+		hctx->need_sync_rcu = false;
+	}
+	if (has_rcu)
+		synchronize_rcu();
+}
+
 static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,
 		struct request *rq, void *priv, bool reserved)
 {
@@ -893,7 +912,25 @@ static void blk_mq_terminate_expired(str
 	 */
 	if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&
 	    READ_ONCE(rq->gstate) == rq->aborted_gstate)
-		blk_mq_rq_timed_out(rq, reserved);
+		blk_mq_rq_timed_out(hctx, rq, priv, reserved);
+}
+
+static void blk_mq_finish_timeout_reset(struct blk_mq_hw_ctx *hctx,
+		struct request *rq, void *priv, bool reserved)
+{
+	/*
+	 * @rq's timer reset has gone through rcu synchronization and is
+	 * visible now.  Allow normal completions again by resetting
+	 * ->aborted_gstate.  Don't clear RQF_MQ_TIMEOUT_RESET here as
+	 * there's no memory barrier around ->aborted_gstate.  Let
+	 * blk_add_timer() clear it later.
+	 *
+	 * As nothing prevents from completion happening while
+	 * ->aborted_gstate is set, this may lead to ignored completions
+	 * and further spurious timeouts.
+	 */
+	if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
+		blk_mq_rq_update_aborted_gstate(rq, 0);
 }
 
 static void blk_mq_timeout_work(struct work_struct *work)
@@ -928,7 +965,7 @@ static void blk_mq_timeout_work(struct w
 	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
 
 	if (data.nr_expired) {
-		bool has_rcu = false;
+		int nr_resets = 0;
 
 		/*
 		 * Wait till everyone sees ->aborted_gstate.  The
@@ -936,22 +973,22 @@ static void blk_mq_timeout_work(struct w
 		 * becomes a problem, we can add per-hw_ctx rcu_head and
 		 * wait in parallel.
 		 */
-		queue_for_each_hw_ctx(q, hctx, i) {
-			if (!hctx->nr_expired)
-				continue;
-
-			if (!(hctx->flags & BLK_MQ_F_BLOCKING))
-				has_rcu = true;
-			else
-				synchronize_srcu(hctx->srcu);
-
-			hctx->nr_expired = 0;
-		}
-		if (has_rcu)
-			synchronize_rcu();
+		blk_mq_timeout_sync_rcu(q);
 
 		/* terminate the ones we won */
-		blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
+		blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired,
+					   &nr_resets);
+
+		/*
+		 * For BLK_EH_RESET_TIMER, release the requests after
+		 * blk_add_timer() from above is visible to avoid timer
+		 * reset racing against recycling.
+		 */
+		if (nr_resets) {
+			blk_mq_timeout_sync_rcu(q);
+			blk_mq_queue_tag_busy_iter(q,
+					blk_mq_finish_timeout_reset, NULL);
+		}
 	}
 
 	if (data.next_set) {
Index: work/include/linux/blk-mq.h
===================================================================
--- work.orig/include/linux/blk-mq.h
+++ work/include/linux/blk-mq.h
@@ -51,7 +51,7 @@ struct blk_mq_hw_ctx {
 	unsigned int		queue_num;
 
 	atomic_t		nr_active;
-	unsigned int		nr_expired;
+	bool			need_sync_rcu;
 
 	struct hlist_node	cpuhp_dead;
 	struct kobject		kobj;
Index: work/block/blk-timeout.c
===================================================================
--- work.orig/block/blk-timeout.c
+++ work/block/blk-timeout.c
@@ -216,7 +216,7 @@ void blk_add_timer(struct request *req)
 		req->timeout = q->rq_timeout;
 
 	blk_rq_set_deadline(req, jiffies + req->timeout);
-	req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED;
+	req->rq_flags &= ~(RQF_MQ_TIMEOUT_EXPIRED | RQF_MQ_TIMEOUT_RESET);
 
 	/*
 	 * Only the non-mq case needs to add the request to a protected list.
Index: work/include/linux/blkdev.h
===================================================================
--- work.orig/include/linux/blkdev.h
+++ work/include/linux/blkdev.h
@@ -127,8 +127,10 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_ZONE_WRITE_LOCKED	((__force req_flags_t)(1 << 19))
 /* timeout is expired */
 #define RQF_MQ_TIMEOUT_EXPIRED	((__force req_flags_t)(1 << 20))
+/* timeout is expired */
+#define RQF_MQ_TIMEOUT_RESET	((__force req_flags_t)(1 << 21))
 /* already slept for hybrid poll */
-#define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 21))
+#define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 22))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-18 13:11                       ` tj
@ 2018-02-21 18:53                         ` Bart Van Assche
  2018-02-21 19:21                           ` tj
  0 siblings, 1 reply; 28+ messages in thread
From: Bart Van Assche @ 2018-02-21 18:53 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gU3VuLCAyMDE4LTAyLTE4IGF0IDA1OjExIC0wODAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBPbiBXZWQsIEZlYiAxNCwgMjAxOCBhdCAwNDo1ODo1NlBNICswMDAwLCBCYXJ0IFZhbiBBc3Nj
aGUgd3JvdGU6DQo+ID4gV2l0aCB0aGlzIHBhdGNoIGFwcGxpZWQgdGhlIHRlc3RzIEkgcmFuIHNv
IGZhciBwYXNzLg0KPiANCj4gQWgsIGdyZWF0IHRvIGhlYXIuICBUaGFua3MgYSBsb3QgZm9yIHRl
c3RpbmcuICBDYW4geW91IHBsZWFzZSB2ZXJpZnkNCj4gdGhlIGZvbGxvd2luZz8gIEl0J3MgdGhl
IHNhbWUgYXBwcm9hY2ggYnV0IHdpdGggUkNVIHN5bmMgYmF0Y2hpbmcuDQoNCkhlbGxvIFRlanVu
LA0KDQpBZnRlciBoYXZpbmcgbWVyZ2VkIGtlcm5lbCB2NC4xNi1yYzIgaW50byBteSBrZXJuZWwg
dHJlZSBhbmQgYWZ0ZXIgaGF2aW5nDQphcHBsaWVkIHBhdGNoICJBdm9pZCB0aGF0IEFUQSBlcnJv
ciBoYW5kbGluZyBoYW5ncyINCihodHRwczovL3d3dy5tYWlsLWFyY2hpdmUuY29tL2xpbnV4LXNj
c2lAdmdlci5rZXJuZWwub3JnL21zZzcxMTQ1Lmh0bWwpIEkNCmhhdmUgbm90IGJlZW4gYWJsZSB0
byByZXByb2R1Y2UgdGhlIHJlcG9ydGVkIGNyYXNoIC0gbmVpdGhlciB3aXRoIHRoZSBwYXRjaA0K
YXBwbGllZCB0aGF0IHdhcyBwb3N0ZWQgb24gRmVicnVhcnkgMTN0aCBub3Igd2l0aG91dCB0aGF0
IHBhdGNoLiBUaGlzIG1ha2VzDQptZSB3b25kZXIgd2hldGhlciB3ZSBzaG91bGQgZHJvcCB0aGUg
ZGlzY3Vzc2VkIHBhdGNoZXMgdW5sZXNzIHNvbWVvbmUgY29tZXMNCnVwIHdpdGggYSByZXByb2R1
Y2libGUgdGVzdCBjYXNlPw0KDQpUaGFua3MsDQoNCkJhcnQuDQoNCg0K

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-21 18:53                         ` Bart Van Assche
@ 2018-02-21 19:21                           ` tj
  2018-02-21 22:55                             ` Bart Van Assche
  0 siblings, 1 reply; 28+ messages in thread
From: tj @ 2018-02-21 19:21 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

Hello, Bart.

On Wed, Feb 21, 2018 at 06:53:05PM +0000, Bart Van Assche wrote:
> On Sun, 2018-02-18 at 05:11 -0800, tj@kernel.org wrote:
> > On Wed, Feb 14, 2018 at 04:58:56PM +0000, Bart Van Assche wrote:
> > > With this patch applied the tests I ran so far pass.
> > 
> > Ah, great to hear.  Thanks a lot for testing.  Can you please verify
> > the following?  It's the same approach but with RCU sync batching.
> 
> Hello Tejun,
> 
> After having merged kernel v4.16-rc2 into my kernel tree and after having
> applied patch "Avoid that ATA error handling hangs"
> (https://www.mail-archive.com/linux-scsi@vger.kernel.org/msg71145.html) I
> have not been able to reproduce the reported crash - neither with the patch
> applied that was posted on February 13th nor without that patch. This makes
> me wonder whether we should drop the discussed patches unless someone comes
> up with a reproducible test case?

It is an actual bug in that we actually can override the timer setting
of the next request instance.  Given that the race window isn't that
large, it makes sense that the reproducibility is affected by
butterflies.  I think it makes sense to fix the bug.  Any chance you
can test the new patch on top of the reproducible setup?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling
  2018-02-21 19:21                           ` tj
@ 2018-02-21 22:55                             ` Bart Van Assche
  0 siblings, 0 replies; 28+ messages in thread
From: Bart Van Assche @ 2018-02-21 22:55 UTC (permalink / raw)
  To: tj; +Cc: hch, linux-block, axboe

T24gV2VkLCAyMDE4LTAyLTIxIGF0IDExOjIxIC0wODAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBIZWxsbywgQmFydC4NCj4gDQo+IE9uIFdlZCwgRmViIDIxLCAyMDE4IGF0IDA2OjUzOjA1UE0g
KzAwMDAsIEJhcnQgVmFuIEFzc2NoZSB3cm90ZToNCj4gPiBPbiBTdW4sIDIwMTgtMDItMTggYXQg
MDU6MTEgLTA4MDAsIHRqQGtlcm5lbC5vcmcgd3JvdGU6DQo+ID4gPiBPbiBXZWQsIEZlYiAxNCwg
MjAxOCBhdCAwNDo1ODo1NlBNICswMDAwLCBCYXJ0IFZhbiBBc3NjaGUgd3JvdGU6DQo+ID4gPiA+
IFdpdGggdGhpcyBwYXRjaCBhcHBsaWVkIHRoZSB0ZXN0cyBJIHJhbiBzbyBmYXIgcGFzcy4NCj4g
PiA+IA0KPiA+ID4gQWgsIGdyZWF0IHRvIGhlYXIuICBUaGFua3MgYSBsb3QgZm9yIHRlc3Rpbmcu
ICBDYW4geW91IHBsZWFzZSB2ZXJpZnkNCj4gPiA+IHRoZSBmb2xsb3dpbmc/ICBJdCdzIHRoZSBz
YW1lIGFwcHJvYWNoIGJ1dCB3aXRoIFJDVSBzeW5jIGJhdGNoaW5nLg0KPiA+IA0KPiA+IEhlbGxv
IFRlanVuLA0KPiA+IA0KPiA+IEFmdGVyIGhhdmluZyBtZXJnZWQga2VybmVsIHY0LjE2LXJjMiBp
bnRvIG15IGtlcm5lbCB0cmVlIGFuZCBhZnRlciBoYXZpbmcNCj4gPiBhcHBsaWVkIHBhdGNoICJB
dm9pZCB0aGF0IEFUQSBlcnJvciBoYW5kbGluZyBoYW5ncyINCj4gPiAoaHR0cHM6Ly93d3cubWFp
bC1hcmNoaXZlLmNvbS9saW51eC1zY3NpQHZnZXIua2VybmVsLm9yZy9tc2c3MTE0NS5odG1sKSBJ
DQo+ID4gaGF2ZSBub3QgYmVlbiBhYmxlIHRvIHJlcHJvZHVjZSB0aGUgcmVwb3J0ZWQgY3Jhc2gg
LSBuZWl0aGVyIHdpdGggdGhlIHBhdGNoDQo+ID4gYXBwbGllZCB0aGF0IHdhcyBwb3N0ZWQgb24g
RmVicnVhcnkgMTN0aCBub3Igd2l0aG91dCB0aGF0IHBhdGNoLiBUaGlzIG1ha2VzDQo+ID4gbWUg
d29uZGVyIHdoZXRoZXIgd2Ugc2hvdWxkIGRyb3AgdGhlIGRpc2N1c3NlZCBwYXRjaGVzIHVubGVz
cyBzb21lb25lIGNvbWVzDQo+ID4gdXAgd2l0aCBhIHJlcHJvZHVjaWJsZSB0ZXN0IGNhc2U/DQo+
IA0KPiBJdCBpcyBhbiBhY3R1YWwgYnVnIGluIHRoYXQgd2UgYWN0dWFsbHkgY2FuIG92ZXJyaWRl
IHRoZSB0aW1lciBzZXR0aW5nDQo+IG9mIHRoZSBuZXh0IHJlcXVlc3QgaW5zdGFuY2UuICBHaXZl
biB0aGF0IHRoZSByYWNlIHdpbmRvdyBpc24ndCB0aGF0DQo+IGxhcmdlLCBpdCBtYWtlcyBzZW5z
ZSB0aGF0IHRoZSByZXByb2R1Y2liaWxpdHkgaXMgYWZmZWN0ZWQgYnkNCj4gYnV0dGVyZmxpZXMu
ICBJIHRoaW5rIGl0IG1ha2VzIHNlbnNlIHRvIGZpeCB0aGUgYnVnLiAgQW55IGNoYW5jZSB5b3UN
Cj4gY2FuIHRlc3QgdGhlIG5ldyBwYXRjaCBvbiB0b3Agb2YgdGhlIHJlcHJvZHVjaWJsZSBzZXR1
cD8NCg0KSGVsbG8gVGVqdW4sDQoNClNpbmNlIEkgaGFkIG5vdCBzYXZlZCBhbnkgb2YgdGhlIHRy
ZWVzIHRoYXQgSSBoYWQgdXNlZCBkdXJpbmcgbXkgdGVzdHMgSSBwaWNrZWQNCnNldmVyYWwgdHJl
ZXMgZnJvbSB0aGUgImdpdCByZWZsb2ciIG91dHB1dCBhbmQgdHJpZWQgdG8gcmVwcm9kdWNlIHRo
ZSBjcmFzaCB0aGF0DQpJIGhhZCByZXBvcnRlZCB3aXRoIHRoZXNlIHRyZWVzLiBVbmZvcnR1bmF0
ZWx5IHNvIGZhciB3aXRob3V0IHN1Y2Nlc3MuDQoNCkJhcnQuDQoNCg0K

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2018-02-21 22:55 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-07  1:11 [PATCH v2] blk-mq: Fix race between resetting the timer and completion handling Bart Van Assche
2018-02-07 17:06 ` Tejun Heo
2018-02-07 17:27   ` Bart Van Assche
2018-02-07 17:35     ` tj
2018-02-07 18:14       ` Bart Van Assche
2018-02-07 20:07         ` tj
2018-02-07 23:48           ` Bart Van Assche
2018-02-08  1:09             ` Bart Van Assche
2018-02-08 15:39               ` tj
2018-02-08 15:40                 ` tj
2018-02-08 16:31                 ` Bart Van Assche
2018-02-08 17:00                   ` tj
2018-02-08 17:10                     ` Bart Van Assche
2018-02-08 17:19                       ` tj
2018-02-08 17:37                         ` Bart Van Assche
2018-02-08 17:40                           ` tj
2018-02-08 17:48                             ` Bart Van Assche
2018-02-08 17:54                               ` tj
2018-02-13 21:20                   ` tj
2018-02-14 16:58                     ` Bart Van Assche
2018-02-18 13:11                       ` tj
2018-02-21 18:53                         ` Bart Van Assche
2018-02-21 19:21                           ` tj
2018-02-21 22:55                             ` Bart Van Assche
2018-02-07 19:03   ` Bart Van Assche
2018-02-07 20:09     ` tj
2018-02-07 21:02       ` Bart Van Assche
2018-02-07 21:40         ` tj

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).