All of lore.kernel.org
 help / color / mirror / Atom feed
* split scsi passthrough fields out of struct request V2
@ 2017-01-25 17:25 Christoph Hellwig
  2017-01-25 17:25 ` [PATCH 01/18] block: add a op_is_flush helper Christoph Hellwig
                   ` (20 more replies)
  0 siblings, 21 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

Hi all,

this series splits the support for SCSI passthrough commands from the
main struct request used all over the block layer into a separate
scsi_request structure that drivers that want to support SCSI passthough
need to embedded as the first thing into their request-private data,
similar to how we handle NVMe passthrough commands.

To support this I've added support for that the private data after
request structure to the legacy request path instead, so that it can
be treated the same way as the blk-mq path.  Compare to the current
scsi_cmnd allocator that actually is a major simplification.

Changes since V1:
 - fix handling of a NULL sense pointer in __scsi_execute
 - clean up handling of the flush flags in the block layer and MD
 - additional small cleanup in dm-rq

^ permalink raw reply	[flat|nested] 172+ messages in thread

* [PATCH 01/18] block: add a op_is_flush helper
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  2:58   ` Martin K. Petersen
  2017-01-26 22:38   ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request Christoph Hellwig
                   ` (19 subsequent siblings)
  20 siblings, 2 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

This centralizes the checks for bios that needs to be go into the flush
state machine.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c             |  8 ++++----
 block/blk-mq-sched.c         |  5 ++---
 block/blk-mq.c               |  4 ++--
 drivers/md/bcache/request.c  |  2 +-
 drivers/md/dm-cache-target.c | 13 +++----------
 drivers/md/dm-thin.c         | 13 +++++--------
 include/linux/blk_types.h    |  9 +++++++++
 7 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index a61f140..b830e14 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1035,7 +1035,7 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
 	 * Flush requests do not use the elevator so skip initialization.
 	 * This allows a request to share the flush and elevator data.
 	 */
-	if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA))
+	if (op_is_flush(bio->bi_opf))
 		return false;
 
 	return true;
@@ -1641,7 +1641,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 		return BLK_QC_T_NONE;
 	}
 
-	if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) {
+	if (op_is_flush(bio->bi_opf)) {
 		spin_lock_irq(q->queue_lock);
 		where = ELEVATOR_INSERT_FLUSH;
 		goto get_rq;
@@ -2145,7 +2145,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 	 */
 	BUG_ON(blk_queued_rq(rq));
 
-	if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
+	if (op_is_flush(rq->cmd_flags))
 		where = ELEVATOR_INSERT_FLUSH;
 
 	add_acct_request(q, rq, where);
@@ -3256,7 +3256,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		/*
 		 * rq is already accounted, so use raw insert
 		 */
-		if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
+		if (op_is_flush(rq->cmd_flags))
 			__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
 		else
 			__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index d05061f..3bd66e5 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -111,7 +111,6 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 	struct blk_mq_hw_ctx *hctx;
 	struct blk_mq_ctx *ctx;
 	struct request *rq;
-	const bool is_flush = op & (REQ_PREFLUSH | REQ_FUA);
 
 	blk_queue_enter_live(q);
 	ctx = blk_mq_get_ctx(q);
@@ -126,7 +125,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 		 * Flush requests are special and go directly to the
 		 * dispatch list.
 		 */
-		if (!is_flush && e->type->ops.mq.get_request) {
+		if (!op_is_flush(op) && e->type->ops.mq.get_request) {
 			rq = e->type->ops.mq.get_request(q, op, data);
 			if (rq)
 				rq->rq_flags |= RQF_QUEUED;
@@ -138,7 +137,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 	}
 
 	if (rq) {
-		if (!is_flush) {
+		if (!op_is_flush(op)) {
 			rq->elv.icq = NULL;
 			if (e && e->type->icq_cache)
 				blk_mq_sched_assign_ioc(q, rq, bio);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ee69e5e..e229f8a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1378,7 +1378,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
 static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = op_is_sync(bio->bi_opf);
-	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
+	const int is_flush_fua = op_is_flush(bio->bi_opf);
 	struct blk_mq_alloc_data data;
 	struct request *rq;
 	unsigned int request_count = 0, srcu_idx;
@@ -1498,7 +1498,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = op_is_sync(bio->bi_opf);
-	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
+	const int is_flush_fua = op_is_flush(bio->bi_opf);
 	struct blk_plug *plug;
 	unsigned int request_count = 0;
 	struct blk_mq_alloc_data data;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 76d2087..01035e7 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -666,7 +666,7 @@ static inline struct search *search_alloc(struct bio *bio,
 	s->iop.write_prio	= 0;
 	s->iop.error		= 0;
 	s->iop.flags		= 0;
-	s->iop.flush_journal	= (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) != 0;
+	s->iop.flush_journal	= op_is_flush(bio->bi_opf);
 	s->iop.wq		= bcache_wq;
 
 	return s;
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index e04c61e..5b9cf56 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -787,8 +787,7 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
 	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
 
 	spin_lock_irqsave(&cache->lock, flags);
-	if (cache->need_tick_bio &&
-	    !(bio->bi_opf & (REQ_FUA | REQ_PREFLUSH)) &&
+	if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
 	    bio_op(bio) != REQ_OP_DISCARD) {
 		pb->tick = true;
 		cache->need_tick_bio = false;
@@ -828,11 +827,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
 	return to_oblock(block_nr);
 }
 
-static int bio_triggers_commit(struct cache *cache, struct bio *bio)
-{
-	return bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
-}
-
 /*
  * You must increment the deferred set whilst the prison cell is held.  To
  * encourage this, we ask for 'cell' to be passed in.
@@ -884,7 +878,7 @@ static void issue(struct cache *cache, struct bio *bio)
 {
 	unsigned long flags;
 
-	if (!bio_triggers_commit(cache, bio)) {
+	if (!op_is_flush(bio->bi_opf)) {
 		accounted_request(cache, bio);
 		return;
 	}
@@ -1069,8 +1063,7 @@ static void dec_io_migrations(struct cache *cache)
 
 static bool discard_or_flush(struct bio *bio)
 {
-	return bio_op(bio) == REQ_OP_DISCARD ||
-	       bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
+	return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
 }
 
 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index d1c05c1..110982d 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -699,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio)
 
 static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
 {
-	return (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) &&
+	return op_is_flush(bio->bi_opf) &&
 		dm_thin_changed_this_transaction(tc->td);
 }
 
@@ -870,8 +870,7 @@ static void __inc_remap_and_issue_cell(void *context,
 	struct bio *bio;
 
 	while ((bio = bio_list_pop(&cell->bios))) {
-		if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) ||
-		    bio_op(bio) == REQ_OP_DISCARD)
+		if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD)
 			bio_list_add(&info->defer_bios, bio);
 		else {
 			inc_all_io_entry(info->tc->pool, bio);
@@ -1716,9 +1715,8 @@ static void __remap_and_issue_shared_cell(void *context,
 	struct bio *bio;
 
 	while ((bio = bio_list_pop(&cell->bios))) {
-		if ((bio_data_dir(bio) == WRITE) ||
-		    (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) ||
-		     bio_op(bio) == REQ_OP_DISCARD))
+		if (bio_data_dir(bio) == WRITE || op_is_flush(bio->bi_opf) ||
+		    bio_op(bio) == REQ_OP_DISCARD)
 			bio_list_add(&info->defer_bios, bio);
 		else {
 			struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));;
@@ -2635,8 +2633,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 		return DM_MAPIO_SUBMITTED;
 	}
 
-	if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) ||
-	    bio_op(bio) == REQ_OP_DISCARD) {
+	if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) {
 		thin_defer_bio_with_throttle(tc, bio);
 		return DM_MAPIO_SUBMITTED;
 	}
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0e5b1cd..37c9a43 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -221,6 +221,15 @@ static inline bool op_is_write(unsigned int op)
 }
 
 /*
+ * Check if the bio or request is one that needs special treatment in the
+ * flush state machine.
+ */
+static inline bool op_is_flush(unsigned int op)
+{
+	return op & (REQ_FUA | REQ_PREFLUSH);
+}
+
+/*
  * Reads are always treated as synchronous, as are requests with the FUA or
  * PREFLUSH flag.  Other operations may be marked as synchronous using the
  * REQ_SYNC flag.
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
  2017-01-25 17:25 ` [PATCH 01/18] block: add a op_is_flush helper Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  2:59   ` Martin K. Petersen
  2017-01-26 23:18     ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 03/18] block: fix elevator init check Christoph Hellwig
                   ` (18 subsequent siblings)
  20 siblings, 2 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

No need for the local variables, the bio is still live and we can just
assigned the bits we want directly.  Make me wonder why we can't assign
all the bio flags to start with.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/md/raid1.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7b0f647..67b0365 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1170,10 +1170,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 	int i, disks;
 	struct bitmap *bitmap = mddev->bitmap;
 	unsigned long flags;
-	const int op = bio_op(bio);
-	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
-	const unsigned long do_flush_fua = (bio->bi_opf &
-						(REQ_PREFLUSH | REQ_FUA));
 	struct md_rdev *blocked_rdev;
 	struct blk_plug_cb *cb;
 	struct raid1_plug_cb *plug = NULL;
@@ -1389,7 +1385,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 				   conf->mirrors[i].rdev->data_offset);
 		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
 		mbio->bi_end_io	= raid1_end_write_request;
-		bio_set_op_attrs(mbio, op, do_flush_fua | do_sync);
+		mbio->bi_opf = bio_op(bio) |
+			(bio->bi_opf & (REQ_SYNC | REQ_PREFLUSH | REQ_FUA));
 		if (test_bit(FailFast, &conf->mirrors[i].rdev->flags) &&
 		    !test_bit(WriteMostly, &conf->mirrors[i].rdev->flags) &&
 		    conf->raid_disks - mddev->degraded > 1)
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 03/18] block: fix elevator init check
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
  2017-01-25 17:25 ` [PATCH 01/18] block: add a op_is_flush helper Christoph Hellwig
  2017-01-25 17:25 ` [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:01   ` Martin K. Petersen
  2017-01-26 23:21     ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 04/18] block: simplify blk_init_allocated_queue Christoph Hellwig
                   ` (17 subsequent siblings)
  20 siblings, 2 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

We can't initalize the elevator fields for flushes as flush share space
in struct request with the elevator data.  But currently we can't
commnicate that a request is a flush through blk_get_request as we
can only pass READ or WRITE, and the low-level code looks at the
possible NULL bio to check for a flush.

Fix this by allowing to pass any block op and flags, and by checking for
the flush flags in __get_request.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 block/blk-core.c | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index b830e14..a84c1b9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1022,25 +1022,6 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
 	return 0;
 }
 
-/*
- * Determine if elevator data should be initialized when allocating the
- * request associated with @bio.
- */
-static bool blk_rq_should_init_elevator(struct bio *bio)
-{
-	if (!bio)
-		return true;
-
-	/*
-	 * Flush requests do not use the elevator so skip initialization.
-	 * This allows a request to share the flush and elevator data.
-	 */
-	if (op_is_flush(bio->bi_opf))
-		return false;
-
-	return true;
-}
-
 /**
  * __get_request - get a free request
  * @rl: request list to allocate from
@@ -1119,10 +1100,13 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
 	 * request is freed.  This guarantees icq's won't be destroyed and
 	 * makes creating new ones safe.
 	 *
+	 * Flush requests do not use the elevator so skip initialization.
+	 * This allows a request to share the flush and elevator data.
+	 *
 	 * Also, lookup icq while holding queue_lock.  If it doesn't exist,
 	 * it will be created after releasing queue_lock.
 	 */
-	if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
+	if (!op_is_flush(op) && !blk_queue_bypass(q)) {
 		rq_flags |= RQF_ELVPRIV;
 		q->nr_rqs_elvpriv++;
 		if (et->icq_cache && ioc)
@@ -1276,8 +1260,6 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 {
 	struct request *rq;
 
-	BUG_ON(rw != READ && rw != WRITE);
-
 	/* create ioc upfront */
 	create_io_context(gfp_mask, q->node);
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 04/18] block: simplify blk_init_allocated_queue
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 03/18] block: fix elevator init check Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:02   ` Martin K. Petersen
  2017-01-26 23:27     ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 05/18] block: allow specifying size for extra command data Christoph Hellwig
                   ` (16 subsequent siblings)
  20 siblings, 2 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

Return an errno value instead of the passed in queue so that the callers
don't have to keep track of two queues, and move the assignment of the
request_fn and lock to the caller as passing them as argument doesn't
simplify anything.  While we're at it also remove two pointless NULL
assignments, given that the request structure is zeroed on allocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 block/blk-core.c       | 38 +++++++++++++++-----------------------
 drivers/md/dm-rq.c     |  3 ++-
 include/linux/blkdev.h |  3 +--
 3 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index a84c1b9..54b5512 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -823,15 +823,19 @@ EXPORT_SYMBOL(blk_init_queue);
 struct request_queue *
 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
-	struct request_queue *uninit_q, *q;
+	struct request_queue *q;
 
-	uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
-	if (!uninit_q)
+	q = blk_alloc_queue_node(GFP_KERNEL, node_id);
+	if (!q)
 		return NULL;
 
-	q = blk_init_allocated_queue(uninit_q, rfn, lock);
-	if (!q)
-		blk_cleanup_queue(uninit_q);
+	q->request_fn = rfn;
+	if (lock)
+		q->queue_lock = lock;
+	if (blk_init_allocated_queue(q) < 0) {
+		blk_cleanup_queue(q);
+		return NULL;
+	}
 
 	return q;
 }
@@ -839,30 +843,19 @@ EXPORT_SYMBOL(blk_init_queue_node);
 
 static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
 
-struct request_queue *
-blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
-			 spinlock_t *lock)
-{
-	if (!q)
-		return NULL;
 
+int blk_init_allocated_queue(struct request_queue *q)
+{
 	q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
 	if (!q->fq)
-		return NULL;
+		return -ENOMEM;
 
 	if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
 		goto fail;
 
 	INIT_WORK(&q->timeout_work, blk_timeout_work);
-	q->request_fn		= rfn;
-	q->prep_rq_fn		= NULL;
-	q->unprep_rq_fn		= NULL;
 	q->queue_flags		|= QUEUE_FLAG_DEFAULT;
 
-	/* Override internal queue lock with supplied lock pointer */
-	if (lock)
-		q->queue_lock		= lock;
-
 	/*
 	 * This also sets hw/phys segments, boundary and size
 	 */
@@ -880,13 +873,12 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 	}
 
 	mutex_unlock(&q->sysfs_lock);
-
-	return q;
+	return 0;
 
 fail:
 	blk_free_flush_queue(q->fq);
 	wbt_exit(q);
-	return NULL;
+	return -ENOMEM;
 }
 EXPORT_SYMBOL(blk_init_allocated_queue);
 
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 9d7275f..93f6e9f 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -823,7 +823,8 @@ static void dm_old_request_fn(struct request_queue *q)
 int dm_old_init_request_queue(struct mapped_device *md)
 {
 	/* Fully initialize the queue */
-	if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL))
+	md->queue->request_fn = dm_old_request_fn;
+	if (blk_init_allocated_queue(md->queue) < 0)
 		return -EINVAL;
 
 	/* disable dm_old_request_fn's merge heuristic by default */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8e0b57e..a036c4a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1131,8 +1131,7 @@ extern void blk_unprep_request(struct request *);
 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
 					spinlock_t *lock, int node_id);
 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
-extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
-						      request_fn_proc *, spinlock_t *);
+extern int blk_init_allocated_queue(struct request_queue *);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 05/18] block: allow specifying size for extra command data
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 04/18] block: simplify blk_init_allocated_queue Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:15   ` Martin K. Petersen
  2017-01-25 17:25 ` [PATCH 06/18] dm: remove incomple BLOCK_PC support Christoph Hellwig
                   ` (15 subsequent siblings)
  20 siblings, 1 reply; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

This mirrors the blk-mq capabilities to allocate extra drivers-specific
data behind struct request by setting a cmd_size field, as well as having
a constructor / destructor for it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 block/blk-core.c       | 59 ++++++++++++++++++++++++++++++++++++++++----------
 block/blk-flush.c      |  5 ++---
 block/blk-sysfs.c      |  7 ++++--
 include/linux/blkdev.h |  7 ++++++
 4 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 54b5512..7de7164 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -606,17 +606,41 @@ void blk_cleanup_queue(struct request_queue *q)
 EXPORT_SYMBOL(blk_cleanup_queue);
 
 /* Allocate memory local to the request queue */
-static void *alloc_request_struct(gfp_t gfp_mask, void *data)
+static void *alloc_request_simple(gfp_t gfp_mask, void *data)
 {
-	int nid = (int)(long)data;
-	return kmem_cache_alloc_node(request_cachep, gfp_mask, nid);
+	struct request_queue *q = data;
+
+	return kmem_cache_alloc_node(request_cachep, gfp_mask, q->node);
 }
 
-static void free_request_struct(void *element, void *unused)
+static void free_request_simple(void *element, void *data)
 {
 	kmem_cache_free(request_cachep, element);
 }
 
+static void *alloc_request_size(gfp_t gfp_mask, void *data)
+{
+	struct request_queue *q = data;
+	struct request *rq;
+
+	rq = kmalloc_node(sizeof(struct request) + q->cmd_size, gfp_mask,
+			q->node);
+	if (rq && q->init_rq_fn && q->init_rq_fn(q, rq, gfp_mask) < 0) {
+		kfree(rq);
+		rq = NULL;
+	}
+	return rq;
+}
+
+static void free_request_size(void *element, void *data)
+{
+	struct request_queue *q = data;
+
+	if (q->exit_rq_fn)
+		q->exit_rq_fn(q, element);
+	kfree(element);
+}
+
 int blk_init_rl(struct request_list *rl, struct request_queue *q,
 		gfp_t gfp_mask)
 {
@@ -629,10 +653,15 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q,
 	init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
 	init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
 
-	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, alloc_request_struct,
-					  free_request_struct,
-					  (void *)(long)q->node, gfp_mask,
-					  q->node);
+	if (q->cmd_size) {
+		rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ,
+				alloc_request_size, free_request_size,
+				q, gfp_mask, q->node);
+	} else {
+		rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ,
+				alloc_request_simple, free_request_simple,
+				q, gfp_mask, q->node);
+	}
 	if (!rl->rq_pool)
 		return -ENOMEM;
 
@@ -846,12 +875,15 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
 
 int blk_init_allocated_queue(struct request_queue *q)
 {
-	q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
+	q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
 	if (!q->fq)
 		return -ENOMEM;
 
+	if (q->init_rq_fn && q->init_rq_fn(q, q->fq->flush_rq, GFP_KERNEL))
+		goto out_free_flush_queue;
+
 	if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
-		goto fail;
+		goto out_exit_flush_rq;
 
 	INIT_WORK(&q->timeout_work, blk_timeout_work);
 	q->queue_flags		|= QUEUE_FLAG_DEFAULT;
@@ -869,13 +901,16 @@ int blk_init_allocated_queue(struct request_queue *q)
 	/* init elevator */
 	if (elevator_init(q, NULL)) {
 		mutex_unlock(&q->sysfs_lock);
-		goto fail;
+		goto out_exit_flush_rq;
 	}
 
 	mutex_unlock(&q->sysfs_lock);
 	return 0;
 
-fail:
+out_exit_flush_rq:
+	if (q->exit_rq_fn)
+		q->exit_rq_fn(q, q->fq->flush_rq);
+out_free_flush_queue:
 	blk_free_flush_queue(q->fq);
 	wbt_exit(q);
 	return -ENOMEM;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index d7de34e..bf3ba3c 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -547,11 +547,10 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
 	if (!fq)
 		goto fail;
 
-	if (q->mq_ops) {
+	if (q->mq_ops)
 		spin_lock_init(&fq->mq_flush_lock);
-		rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
-	}
 
+	rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
 	fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
 	if (!fq->flush_rq)
 		goto fail_rq;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 1dbce05..894f773 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -814,10 +814,13 @@ static void blk_release_queue(struct kobject *kobj)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
-	if (!q->mq_ops)
+	if (!q->mq_ops) {
+		if (q->exit_rq_fn)
+			q->exit_rq_fn(q, q->fq->flush_rq);
 		blk_free_flush_queue(q->fq);
-	else
+	} else {
 		blk_mq_release(q);
+	}
 
 	blk_trace_shutdown(q);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a036c4a..648ecf5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -273,6 +273,8 @@ typedef void (softirq_done_fn)(struct request *);
 typedef int (dma_drain_needed_fn)(struct request *);
 typedef int (lld_busy_fn) (struct request_queue *q);
 typedef int (bsg_job_fn) (struct bsg_job *);
+typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t);
+typedef void (exit_rq_fn)(struct request_queue *, struct request *);
 
 enum blk_eh_timer_return {
 	BLK_EH_NOT_HANDLED,
@@ -408,6 +410,8 @@ struct request_queue {
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
+	init_rq_fn		*init_rq_fn;
+	exit_rq_fn		*exit_rq_fn;
 
 	const struct blk_mq_ops	*mq_ops;
 
@@ -572,6 +576,9 @@ struct request_queue {
 	struct bio_set		*bio_split;
 
 	bool			mq_sysfs_init_done;
+
+	size_t			cmd_size;
+	void			*rq_alloc_data;
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 06/18] dm: remove incomple BLOCK_PC support
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (4 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 05/18] block: allow specifying size for extra command data Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-27 17:32     ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 07/18] dm: always defer request allocation to the owner of the request_queue Christoph Hellwig
                   ` (14 subsequent siblings)
  20 siblings, 1 reply; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

DM tries to copy a few fields around for BLOCK_PC requests, but given
that no dm-target ever wires up scsi_cmd_ioctl BLOCK_PC can't actually
be sent to dm.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 drivers/md/dm-rq.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 93f6e9f..3f12916 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -270,19 +270,6 @@ static void dm_end_request(struct request *clone, int error)
 	struct mapped_device *md = tio->md;
 	struct request *rq = tio->orig;
 
-	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
-		rq->errors = clone->errors;
-		rq->resid_len = clone->resid_len;
-
-		if (rq->sense)
-			/*
-			 * We are using the sense buffer of the original
-			 * request.
-			 * So setting the length of the sense data is enough.
-			 */
-			rq->sense_len = clone->sense_len;
-	}
-
 	free_rq_clone(clone);
 	rq_end_stats(md, rq);
 	if (!rq->q->mq_ops)
@@ -511,9 +498,6 @@ static int setup_clone(struct request *clone, struct request *rq,
 	if (r)
 		return r;
 
-	clone->cmd = rq->cmd;
-	clone->cmd_len = rq->cmd_len;
-	clone->sense = rq->sense;
 	clone->end_io = end_clone_request;
 	clone->end_io_data = tio;
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 07/18] dm: always defer request allocation to the owner of the request_queue
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (5 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 06/18] dm: remove incomple BLOCK_PC support Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-27 16:34     ` Mike Snitzer
  2017-01-25 17:25 ` [PATCH 08/18] scsi_dh_rdac: switch to scsi_execute_req_flags() Christoph Hellwig
                   ` (13 subsequent siblings)
  20 siblings, 1 reply; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

DM already calls blk_mq_alloc_request on the request_queue of the
underlying device if it is a blk-mq device.  But now that we allow drivers
to allocate additional data and initialize it ahead of time we need to do
the same for all drivers.   Doing so and using the new cmd_size
infrastructure in the block layer greatly simplifies the dm-rq and mpath
code, and should also make arbitrary combinations of SQ and MQ devices
with SQ or MQ device mapper tables easily possible as a further step.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-core.h          |   1 -
 drivers/md/dm-mpath.c         | 132 ++++------------------
 drivers/md/dm-rq.c            | 251 ++++++++++--------------------------------
 drivers/md/dm-rq.h            |   2 +-
 drivers/md/dm-target.c        |   7 --
 drivers/md/dm.c               |  30 ++---
 drivers/md/dm.h               |   3 +-
 include/linux/device-mapper.h |   3 -
 8 files changed, 85 insertions(+), 344 deletions(-)

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 40ceba1..136fda3 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -92,7 +92,6 @@ struct mapped_device {
 	 * io objects are allocated from here.
 	 */
 	mempool_t *io_pool;
-	mempool_t *rq_pool;
 
 	struct bio_set *bs;
 
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 6400cff..784f237 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -92,12 +92,6 @@ struct multipath {
 
 	unsigned queue_mode;
 
-	/*
-	 * We must use a mempool of dm_mpath_io structs so that we
-	 * can resubmit bios on error.
-	 */
-	mempool_t *mpio_pool;
-
 	struct mutex work_mutex;
 	struct work_struct trigger_event;
 
@@ -115,8 +109,6 @@ struct dm_mpath_io {
 
 typedef int (*action_fn) (struct pgpath *pgpath);
 
-static struct kmem_cache *_mpio_cache;
-
 static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
 static void trigger_event(struct work_struct *work);
 static void activate_path(struct work_struct *work);
@@ -209,7 +201,6 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
 		init_waitqueue_head(&m->pg_init_wait);
 		mutex_init(&m->work_mutex);
 
-		m->mpio_pool = NULL;
 		m->queue_mode = DM_TYPE_NONE;
 
 		m->ti = ti;
@@ -229,16 +220,7 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
 			m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
 		else
 			m->queue_mode = DM_TYPE_REQUEST_BASED;
-	}
-
-	if (m->queue_mode == DM_TYPE_REQUEST_BASED) {
-		unsigned min_ios = dm_get_reserved_rq_based_ios();
-
-		m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
-		if (!m->mpio_pool)
-			return -ENOMEM;
-	}
-	else if (m->queue_mode == DM_TYPE_BIO_BASED) {
+	} else if (m->queue_mode == DM_TYPE_BIO_BASED) {
 		INIT_WORK(&m->process_queued_bios, process_queued_bios);
 		/*
 		 * bio-based doesn't support any direct scsi_dh management;
@@ -263,7 +245,6 @@ static void free_multipath(struct multipath *m)
 
 	kfree(m->hw_handler_name);
 	kfree(m->hw_handler_params);
-	mempool_destroy(m->mpio_pool);
 	kfree(m);
 }
 
@@ -272,38 +253,6 @@ static struct dm_mpath_io *get_mpio(union map_info *info)
 	return info->ptr;
 }
 
-static struct dm_mpath_io *set_mpio(struct multipath *m, union map_info *info)
-{
-	struct dm_mpath_io *mpio;
-
-	if (!m->mpio_pool) {
-		/* Use blk-mq pdu memory requested via per_io_data_size */
-		mpio = get_mpio(info);
-		memset(mpio, 0, sizeof(*mpio));
-		return mpio;
-	}
-
-	mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
-	if (!mpio)
-		return NULL;
-
-	memset(mpio, 0, sizeof(*mpio));
-	info->ptr = mpio;
-
-	return mpio;
-}
-
-static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
-{
-	/* Only needed for non blk-mq (.request_fn) multipath */
-	if (m->mpio_pool) {
-		struct dm_mpath_io *mpio = info->ptr;
-
-		info->ptr = NULL;
-		mempool_free(mpio, m->mpio_pool);
-	}
-}
-
 static size_t multipath_per_bio_data_size(void)
 {
 	return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
@@ -530,16 +479,17 @@ static bool must_push_back_bio(struct multipath *m)
 /*
  * Map cloned requests (request-based multipath)
  */
-static int __multipath_map(struct dm_target *ti, struct request *clone,
-			   union map_info *map_context,
-			   struct request *rq, struct request **__clone)
+static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
+				   union map_info *map_context,
+				   struct request **__clone)
 {
 	struct multipath *m = ti->private;
 	int r = DM_MAPIO_REQUEUE;
-	size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq);
+	size_t nr_bytes = blk_rq_bytes(rq);
 	struct pgpath *pgpath;
 	struct block_device *bdev;
-	struct dm_mpath_io *mpio;
+	struct dm_mpath_io *mpio = get_mpio(map_context);
+	struct request *clone;
 
 	/* Do we need to select a new pgpath? */
 	pgpath = lockless_dereference(m->current_pgpath);
@@ -556,42 +506,23 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
 		return r;
 	}
 
-	mpio = set_mpio(m, map_context);
-	if (!mpio)
-		/* ENOMEM, requeue */
-		return r;
-
+	memset(mpio, 0, sizeof(*mpio));
 	mpio->pgpath = pgpath;
 	mpio->nr_bytes = nr_bytes;
 
 	bdev = pgpath->path.dev->bdev;
 
-	if (clone) {
-		/*
-		 * Old request-based interface: allocated clone is passed in.
-		 * Used by: .request_fn stacked on .request_fn path(s).
-		 */
-		clone->q = bdev_get_queue(bdev);
-		clone->rq_disk = bdev->bd_disk;
-		clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
-	} else {
-		/*
-		 * blk-mq request-based interface; used by both:
-		 * .request_fn stacked on blk-mq path(s) and
-		 * blk-mq stacked on blk-mq path(s).
-		 */
-		clone = blk_mq_alloc_request(bdev_get_queue(bdev),
-					     rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
-		if (IS_ERR(clone)) {
-			/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
-			clear_request_fn_mpio(m, map_context);
-			return r;
-		}
-		clone->bio = clone->biotail = NULL;
-		clone->rq_disk = bdev->bd_disk;
-		clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
-		*__clone = clone;
+	clone = blk_get_request(bdev_get_queue(bdev),
+			rq->cmd_flags | REQ_NOMERGE,
+			GFP_ATOMIC);
+	if (IS_ERR(clone)) {
+		/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
+		return r;
 	}
+	clone->bio = clone->biotail = NULL;
+	clone->rq_disk = bdev->bd_disk;
+	clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
+	*__clone = clone;
 
 	if (pgpath->pg->ps.type->start_io)
 		pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
@@ -600,22 +531,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
 	return DM_MAPIO_REMAPPED;
 }
 
-static int multipath_map(struct dm_target *ti, struct request *clone,
-			 union map_info *map_context)
-{
-	return __multipath_map(ti, clone, map_context, NULL, NULL);
-}
-
-static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
-				   union map_info *map_context,
-				   struct request **clone)
-{
-	return __multipath_map(ti, NULL, map_context, rq, clone);
-}
-
 static void multipath_release_clone(struct request *clone)
 {
-	blk_mq_free_request(clone);
+	blk_put_request(clone);
 }
 
 /*
@@ -1187,7 +1105,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	ti->num_write_same_bios = 1;
 	if (m->queue_mode == DM_TYPE_BIO_BASED)
 		ti->per_io_data_size = multipath_per_bio_data_size();
-	else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
+	else
 		ti->per_io_data_size = sizeof(struct dm_mpath_io);
 
 	return 0;
@@ -1610,7 +1528,6 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
 		if (ps->type->end_io)
 			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
 	}
-	clear_request_fn_mpio(m, map_context);
 
 	return r;
 }
@@ -2060,7 +1977,6 @@ static struct target_type multipath_target = {
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
 	.dtr = multipath_dtr,
-	.map_rq = multipath_map,
 	.clone_and_map_rq = multipath_clone_and_map,
 	.release_clone_rq = multipath_release_clone,
 	.rq_end_io = multipath_end_io,
@@ -2080,11 +1996,6 @@ static int __init dm_multipath_init(void)
 {
 	int r;
 
-	/* allocate a slab for the dm_mpath_ios */
-	_mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
-	if (!_mpio_cache)
-		return -ENOMEM;
-
 	r = dm_register_target(&multipath_target);
 	if (r < 0) {
 		DMERR("request-based register failed %d", r);
@@ -2120,8 +2031,6 @@ static int __init dm_multipath_init(void)
 bad_alloc_kmultipathd:
 	dm_unregister_target(&multipath_target);
 bad_register_target:
-	kmem_cache_destroy(_mpio_cache);
-
 	return r;
 }
 
@@ -2131,7 +2040,6 @@ static void __exit dm_multipath_exit(void)
 	destroy_workqueue(kmultipathd);
 
 	dm_unregister_target(&multipath_target);
-	kmem_cache_destroy(_mpio_cache);
 }
 
 module_init(dm_multipath_init);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 3f12916..8d06834 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -109,28 +109,6 @@ void dm_stop_queue(struct request_queue *q)
 		dm_mq_stop_queue(q);
 }
 
-static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
-						gfp_t gfp_mask)
-{
-	return mempool_alloc(md->io_pool, gfp_mask);
-}
-
-static void free_old_rq_tio(struct dm_rq_target_io *tio)
-{
-	mempool_free(tio, tio->md->io_pool);
-}
-
-static struct request *alloc_old_clone_request(struct mapped_device *md,
-					       gfp_t gfp_mask)
-{
-	return mempool_alloc(md->rq_pool, gfp_mask);
-}
-
-static void free_old_clone_request(struct mapped_device *md, struct request *rq)
-{
-	mempool_free(rq, md->rq_pool);
-}
-
 /*
  * Partial completion handling for request-based dm
  */
@@ -185,7 +163,7 @@ static void end_clone_bio(struct bio *clone)
 
 static struct dm_rq_target_io *tio_from_request(struct request *rq)
 {
-	return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
+	return blk_mq_rq_to_pdu(rq);
 }
 
 static void rq_end_stats(struct mapped_device *md, struct request *orig)
@@ -233,31 +211,6 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
 	dm_put(md);
 }
 
-static void free_rq_clone(struct request *clone)
-{
-	struct dm_rq_target_io *tio = clone->end_io_data;
-	struct mapped_device *md = tio->md;
-
-	blk_rq_unprep_clone(clone);
-
-	/*
-	 * It is possible for a clone_old_rq() allocated clone to
-	 * get passed in -- it may not yet have a request_queue.
-	 * This is known to occur if the error target replaces
-	 * a multipath target that has a request_fn queue stacked
-	 * on blk-mq queue(s).
-	 */
-	if (clone->q && clone->q->mq_ops)
-		/* stacked on blk-mq queue(s) */
-		tio->ti->type->release_clone_rq(clone);
-	else if (!md->queue->mq_ops)
-		/* request_fn queue stacked on request_fn queue(s) */
-		free_old_clone_request(md, clone);
-
-	if (!md->queue->mq_ops)
-		free_old_rq_tio(tio);
-}
-
 /*
  * Complete the clone and the original request.
  * Must be called without clone's queue lock held,
@@ -270,7 +223,9 @@ static void dm_end_request(struct request *clone, int error)
 	struct mapped_device *md = tio->md;
 	struct request *rq = tio->orig;
 
-	free_rq_clone(clone);
+	blk_rq_unprep_clone(clone);
+	tio->ti->type->release_clone_rq(clone);
+
 	rq_end_stats(md, rq);
 	if (!rq->q->mq_ops)
 		blk_end_request_all(rq, error);
@@ -279,22 +234,6 @@ static void dm_end_request(struct request *clone, int error)
 	rq_completed(md, rw, true);
 }
 
-static void dm_unprep_request(struct request *rq)
-{
-	struct dm_rq_target_io *tio = tio_from_request(rq);
-	struct request *clone = tio->clone;
-
-	if (!rq->q->mq_ops) {
-		rq->special = NULL;
-		rq->rq_flags &= ~RQF_DONTPREP;
-	}
-
-	if (clone)
-		free_rq_clone(clone);
-	else if (!tio->md->queue->mq_ops)
-		free_old_rq_tio(tio);
-}
-
 /*
  * Requeue the original request of a clone.
  */
@@ -333,7 +272,10 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
 	int rw = rq_data_dir(rq);
 
 	rq_end_stats(md, rq);
-	dm_unprep_request(rq);
+	if (tio->clone) {
+		blk_rq_unprep_clone(tio->clone);
+		tio->ti->type->release_clone_rq(tio->clone);
+	}
 
 	if (!rq->q->mq_ops)
 		dm_old_requeue_request(rq);
@@ -388,14 +330,11 @@ static void dm_softirq_done(struct request *rq)
 	if (!clone) {
 		rq_end_stats(tio->md, rq);
 		rw = rq_data_dir(rq);
-		if (!rq->q->mq_ops) {
+		if (!rq->q->mq_ops)
 			blk_end_request_all(rq, tio->error);
-			rq_completed(tio->md, rw, false);
-			free_old_rq_tio(tio);
-		} else {
+		else
 			blk_mq_end_request(rq, tio->error);
-			rq_completed(tio->md, rw, false);
-		}
+		rq_completed(tio->md, rw, false);
 		return;
 	}
 
@@ -439,16 +378,6 @@ static void end_clone_request(struct request *clone, int error)
 {
 	struct dm_rq_target_io *tio = clone->end_io_data;
 
-	if (!clone->q->mq_ops) {
-		/*
-		 * For just cleaning up the information of the queue in which
-		 * the clone was dispatched.
-		 * The clone is *NOT* freed actually here because it is alloced
-		 * from dm own mempool (RQF_ALLOCED isn't set).
-		 */
-		__blk_put_request(clone->q, clone);
-	}
-
 	/*
 	 * Actual request completion is done in a softirq context which doesn't
 	 * hold the clone's queue lock.  Otherwise, deadlock could occur because:
@@ -506,28 +435,6 @@ static int setup_clone(struct request *clone, struct request *rq,
 	return 0;
 }
 
-static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
-				    struct dm_rq_target_io *tio, gfp_t gfp_mask)
-{
-	/*
-	 * Create clone for use with .request_fn request_queue
-	 */
-	struct request *clone;
-
-	clone = alloc_old_clone_request(md, gfp_mask);
-	if (!clone)
-		return NULL;
-
-	blk_rq_init(NULL, clone);
-	if (setup_clone(clone, rq, tio, gfp_mask)) {
-		/* -ENOMEM */
-		free_old_clone_request(md, clone);
-		return NULL;
-	}
-
-	return clone;
-}
-
 static void map_tio_request(struct kthread_work *work);
 
 static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
@@ -549,60 +456,6 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
 		kthread_init_work(&tio->work, map_tio_request);
 }
 
-static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
-					       struct mapped_device *md,
-					       gfp_t gfp_mask)
-{
-	struct dm_rq_target_io *tio;
-	int srcu_idx;
-	struct dm_table *table;
-
-	tio = alloc_old_rq_tio(md, gfp_mask);
-	if (!tio)
-		return NULL;
-
-	init_tio(tio, rq, md);
-
-	table = dm_get_live_table(md, &srcu_idx);
-	/*
-	 * Must clone a request if this .request_fn DM device
-	 * is stacked on .request_fn device(s).
-	 */
-	if (!dm_table_all_blk_mq_devices(table)) {
-		if (!clone_old_rq(rq, md, tio, gfp_mask)) {
-			dm_put_live_table(md, srcu_idx);
-			free_old_rq_tio(tio);
-			return NULL;
-		}
-	}
-	dm_put_live_table(md, srcu_idx);
-
-	return tio;
-}
-
-/*
- * Called with the queue lock held.
- */
-static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
-{
-	struct mapped_device *md = q->queuedata;
-	struct dm_rq_target_io *tio;
-
-	if (unlikely(rq->special)) {
-		DMWARN("Already has something in rq->special.");
-		return BLKPREP_KILL;
-	}
-
-	tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
-	if (!tio)
-		return BLKPREP_DEFER;
-
-	rq->special = tio;
-	rq->rq_flags |= RQF_DONTPREP;
-
-	return BLKPREP_OK;
-}
-
 /*
  * Returns:
  * DM_MAPIO_*       : the request has been processed as indicated
@@ -617,31 +470,18 @@ static int map_request(struct dm_rq_target_io *tio)
 	struct request *rq = tio->orig;
 	struct request *clone = NULL;
 
-	if (tio->clone) {
-		clone = tio->clone;
-		r = ti->type->map_rq(ti, clone, &tio->info);
-		if (r == DM_MAPIO_DELAY_REQUEUE)
-			return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */
-	} else {
-		r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
-		if (r < 0) {
-			/* The target wants to complete the I/O */
-			dm_kill_unmapped_request(rq, r);
-			return r;
-		}
-		if (r == DM_MAPIO_REMAPPED &&
-		    setup_clone(clone, rq, tio, GFP_ATOMIC)) {
-			/* -ENOMEM */
-			ti->type->release_clone_rq(clone);
-			return DM_MAPIO_REQUEUE;
-		}
-	}
-
+	r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
 	switch (r) {
 	case DM_MAPIO_SUBMITTED:
 		/* The target has taken the I/O to submit by itself later */
 		break;
 	case DM_MAPIO_REMAPPED:
+		if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
+			/* -ENOMEM */
+			ti->type->release_clone_rq(clone);
+			return DM_MAPIO_REQUEUE;
+		}
+
 		/* The target has remapped the I/O so dispatch it */
 		trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
 				     blk_rq_pos(rq));
@@ -700,6 +540,29 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
 	dm_get(md);
 }
 
+static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq)
+{
+	struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
+
+	/*
+	 * Must initialize md member of tio, otherwise it won't
+	 * be available in dm_mq_queue_rq.
+	 */
+	tio->md = md;
+
+	if (md->init_tio_pdu) {
+		/* target-specific per-io data is immediately after the tio */
+		tio->info.ptr = tio + 1;
+	}
+
+	return 0;
+}
+
+static int dm_rq_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
+{
+	return __dm_rq_init_rq(q->rq_alloc_data, rq);
+}
+
 static void map_tio_request(struct kthread_work *work)
 {
 	struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
@@ -794,6 +657,7 @@ static void dm_old_request_fn(struct request_queue *q)
 		dm_start_request(md, rq);
 
 		tio = tio_from_request(rq);
+		init_tio(tio, rq, md);
 		/* Establish tio->ti before queuing work (map_tio_request) */
 		tio->ti = ti;
 		kthread_queue_work(&md->kworker, &tio->work);
@@ -804,10 +668,22 @@ static void dm_old_request_fn(struct request_queue *q)
 /*
  * Fully initialize a .request_fn request-based queue.
  */
-int dm_old_init_request_queue(struct mapped_device *md)
+int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t)
 {
+	struct dm_target *immutable_tgt;
+
 	/* Fully initialize the queue */
+	md->queue->cmd_size = sizeof(struct dm_rq_target_io);
+	md->queue->rq_alloc_data = md;
 	md->queue->request_fn = dm_old_request_fn;
+	md->queue->init_rq_fn = dm_rq_init_rq;
+
+	immutable_tgt = dm_table_get_immutable_target(t);
+	if (immutable_tgt && immutable_tgt->per_io_data_size) {
+		/* any target-specific per-io data is immediately after the tio */
+		md->queue->cmd_size += immutable_tgt->per_io_data_size;
+		md->init_tio_pdu = true;
+	}
 	if (blk_init_allocated_queue(md->queue) < 0)
 		return -EINVAL;
 
@@ -816,7 +692,6 @@ int dm_old_init_request_queue(struct mapped_device *md)
 
 	dm_init_normal_md_queue(md);
 	blk_queue_softirq_done(md->queue, dm_softirq_done);
-	blk_queue_prep_rq(md->queue, dm_old_prep_fn);
 
 	/* Initialize the request-based DM worker thread */
 	kthread_init_worker(&md->kworker);
@@ -837,21 +712,7 @@ static int dm_mq_init_request(void *data, struct request *rq,
 		       unsigned int hctx_idx, unsigned int request_idx,
 		       unsigned int numa_node)
 {
-	struct mapped_device *md = data;
-	struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
-
-	/*
-	 * Must initialize md member of tio, otherwise it won't
-	 * be available in dm_mq_queue_rq.
-	 */
-	tio->md = md;
-
-	if (md->init_tio_pdu) {
-		/* target-specific per-io data is immediately after the tio */
-		tio->info.ptr = tio + 1;
-	}
-
-	return 0;
+	return __dm_rq_init_rq(data, rq);
 }
 
 static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index 4da06ca..f0020d2 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -48,7 +48,7 @@ struct dm_rq_clone_bio_info {
 bool dm_use_blk_mq_default(void);
 bool dm_use_blk_mq(struct mapped_device *md);
 
-int dm_old_init_request_queue(struct mapped_device *md);
+int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t);
 int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t);
 void dm_mq_cleanup_mapped_device(struct mapped_device *md);
 
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 710ae28..43d3445 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -131,12 +131,6 @@ static int io_err_map(struct dm_target *tt, struct bio *bio)
 	return -EIO;
 }
 
-static int io_err_map_rq(struct dm_target *ti, struct request *clone,
-			 union map_info *map_context)
-{
-	return -EIO;
-}
-
 static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
 				   union map_info *map_context,
 				   struct request **clone)
@@ -161,7 +155,6 @@ static struct target_type error_target = {
 	.ctr  = io_err_ctr,
 	.dtr  = io_err_dtr,
 	.map  = io_err_map,
-	.map_rq = io_err_map_rq,
 	.clone_and_map_rq = io_err_clone_and_map_rq,
 	.release_clone_rq = io_err_release_clone_rq,
 	.direct_access = io_err_direct_access,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3086da5..ff4a29a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -91,7 +91,6 @@ static int dm_numa_node = DM_NUMA_NODE;
  */
 struct dm_md_mempools {
 	mempool_t *io_pool;
-	mempool_t *rq_pool;
 	struct bio_set *bs;
 };
 
@@ -1419,7 +1418,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
 	if (md->kworker_task)
 		kthread_stop(md->kworker_task);
 	mempool_destroy(md->io_pool);
-	mempool_destroy(md->rq_pool);
 	if (md->bs)
 		bioset_free(md->bs);
 
@@ -1595,12 +1593,10 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
 		goto out;
 	}
 
-	BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
+	BUG_ON(!p || md->io_pool || md->bs);
 
 	md->io_pool = p->io_pool;
 	p->io_pool = NULL;
-	md->rq_pool = p->rq_pool;
-	p->rq_pool = NULL;
 	md->bs = p->bs;
 	p->bs = NULL;
 
@@ -1777,7 +1773,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
 
 	switch (type) {
 	case DM_TYPE_REQUEST_BASED:
-		r = dm_old_init_request_queue(md);
+		r = dm_old_init_request_queue(md, t);
 		if (r) {
 			DMERR("Cannot initialize queue for request-based mapped device");
 			return r;
@@ -2493,7 +2489,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
 					    unsigned integrity, unsigned per_io_data_size)
 {
 	struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
-	struct kmem_cache *cachep = NULL;
 	unsigned int pool_size = 0;
 	unsigned int front_pad;
 
@@ -2503,20 +2498,16 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
 	switch (type) {
 	case DM_TYPE_BIO_BASED:
 	case DM_TYPE_DAX_BIO_BASED:
-		cachep = _io_cache;
 		pool_size = dm_get_reserved_bio_based_ios();
 		front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
+	
+		pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache);
+		if (!pools->io_pool)
+			goto out;
 		break;
 	case DM_TYPE_REQUEST_BASED:
-		cachep = _rq_tio_cache;
-		pool_size = dm_get_reserved_rq_based_ios();
-		pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
-		if (!pools->rq_pool)
-			goto out;
-		/* fall through to setup remaining rq-based pools */
 	case DM_TYPE_MQ_REQUEST_BASED:
-		if (!pool_size)
-			pool_size = dm_get_reserved_rq_based_ios();
+		pool_size = dm_get_reserved_rq_based_ios();
 		front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
 		/* per_io_data_size is used for blk-mq pdu at queue allocation */
 		break;
@@ -2524,12 +2515,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
 		BUG();
 	}
 
-	if (cachep) {
-		pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
-		if (!pools->io_pool)
-			goto out;
-	}
-
 	pools->bs = bioset_create_nobvec(pool_size, front_pad);
 	if (!pools->bs)
 		goto out;
@@ -2551,7 +2536,6 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
 		return;
 
 	mempool_destroy(pools->io_pool);
-	mempool_destroy(pools->rq_pool);
 
 	if (pools->bs)
 		bioset_free(pools->bs);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index f0aad08..f298b01 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -95,8 +95,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
 /*
  * To check whether the target type is request-based or not (bio-based).
  */
-#define dm_target_request_based(t) (((t)->type->map_rq != NULL) || \
-				    ((t)->type->clone_and_map_rq != NULL))
+#define dm_target_request_based(t) ((t)->type->clone_and_map_rq != NULL)
 
 /*
  * To check whether the target type is a hybrid (capable of being
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index ef7962e..a7e6903 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -55,8 +55,6 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
  * = 2: The target wants to push back the io
  */
 typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
-typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
-				  union map_info *map_context);
 typedef int (*dm_clone_and_map_request_fn) (struct dm_target *ti,
 					    struct request *rq,
 					    union map_info *map_context,
@@ -163,7 +161,6 @@ struct target_type {
 	dm_ctr_fn ctr;
 	dm_dtr_fn dtr;
 	dm_map_fn map;
-	dm_map_request_fn map_rq;
 	dm_clone_and_map_request_fn clone_and_map_rq;
 	dm_release_clone_request_fn release_clone_rq;
 	dm_endio_fn end_io;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 08/18] scsi_dh_rdac: switch to scsi_execute_req_flags()
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (6 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 07/18] dm: always defer request allocation to the owner of the request_queue Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:18   ` Martin K. Petersen
  2017-01-25 17:25 ` [PATCH 09/18] scsi_dh_emc: " Christoph Hellwig
                   ` (12 subsequent siblings)
  20 siblings, 1 reply; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel, Hannes Reinecke, Hannes Reinecke

From: Hannes Reinecke <hare@suse.de>

Switch to scsi_execute_req_flags() and scsi_get_vpd_page() instead of
open-coding it.  Using scsi_execute_req_flags() will set REQ_QUIET and
REQ_PREEMPT, but this is okay as we're evaluating the errors anyway and
should be able to send the command even if the device is quiesced.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/scsi/device_handler/scsi_dh_rdac.c | 174 +++++++++--------------------
 1 file changed, 51 insertions(+), 123 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 00d9c32..b64eaae 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -205,7 +205,6 @@ struct rdac_dh_data {
 #define RDAC_NON_PREFERRED	1
 	char			preferred;
 
-	unsigned char		sense[SCSI_SENSE_BUFFERSIZE];
 	union			{
 		struct c2_inquiry c2;
 		struct c4_inquiry c4;
@@ -262,40 +261,12 @@ do { \
 		sdev_printk(KERN_INFO, sdev, RDAC_NAME ": " f "\n", ## arg); \
 } while (0);
 
-static struct request *get_rdac_req(struct scsi_device *sdev,
-			void *buffer, unsigned buflen, int rw)
+static unsigned int rdac_failover_get(struct rdac_controller *ctlr,
+				      struct list_head *list,
+				      unsigned char *cdb)
 {
-	struct request *rq;
-	struct request_queue *q = sdev->request_queue;
-
-	rq = blk_get_request(q, rw, GFP_NOIO);
-
-	if (IS_ERR(rq)) {
-		sdev_printk(KERN_INFO, sdev,
-				"get_rdac_req: blk_get_request failed.\n");
-		return NULL;
-	}
-	blk_rq_set_block_pc(rq);
-
-	if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
-		blk_put_request(rq);
-		sdev_printk(KERN_INFO, sdev,
-				"get_rdac_req: blk_rq_map_kern failed.\n");
-		return NULL;
-	}
-
-	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
-			 REQ_FAILFAST_DRIVER;
-	rq->retries = RDAC_RETRIES;
-	rq->timeout = RDAC_TIMEOUT;
-
-	return rq;
-}
-
-static struct request *rdac_failover_get(struct scsi_device *sdev,
-			struct rdac_dh_data *h, struct list_head *list)
-{
-	struct request *rq;
+	struct scsi_device *sdev = ctlr->ms_sdev;
+	struct rdac_dh_data *h = sdev->handler_data;
 	struct rdac_mode_common *common;
 	unsigned data_size;
 	struct rdac_queue_data *qdata;
@@ -332,27 +303,17 @@ static struct request *rdac_failover_get(struct scsi_device *sdev,
 		lun_table[qdata->h->lun] = 0x81;
 	}
 
-	/* get request for block layer packet command */
-	rq = get_rdac_req(sdev, &h->ctlr->mode_select, data_size, WRITE);
-	if (!rq)
-		return NULL;
-
 	/* Prepare the command. */
 	if (h->ctlr->use_ms10) {
-		rq->cmd[0] = MODE_SELECT_10;
-		rq->cmd[7] = data_size >> 8;
-		rq->cmd[8] = data_size & 0xff;
+		cdb[0] = MODE_SELECT_10;
+		cdb[7] = data_size >> 8;
+		cdb[8] = data_size & 0xff;
 	} else {
-		rq->cmd[0] = MODE_SELECT;
-		rq->cmd[4] = data_size;
+		cdb[0] = MODE_SELECT;
+		cdb[4] = data_size;
 	}
-	rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
-
-	rq->sense = h->sense;
-	memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
-	rq->sense_len = 0;
 
-	return rq;
+	return data_size;
 }
 
 static void release_controller(struct kref *kref)
@@ -400,46 +361,14 @@ static struct rdac_controller *get_controller(int index, char *array_name,
 	return ctlr;
 }
 
-static int submit_inquiry(struct scsi_device *sdev, int page_code,
-			  unsigned int len, struct rdac_dh_data *h)
-{
-	struct request *rq;
-	struct request_queue *q = sdev->request_queue;
-	int err = SCSI_DH_RES_TEMP_UNAVAIL;
-
-	rq = get_rdac_req(sdev, &h->inq, len, READ);
-	if (!rq)
-		goto done;
-
-	/* Prepare the command. */
-	rq->cmd[0] = INQUIRY;
-	rq->cmd[1] = 1;
-	rq->cmd[2] = page_code;
-	rq->cmd[4] = len;
-	rq->cmd_len = COMMAND_SIZE(INQUIRY);
-
-	rq->sense = h->sense;
-	memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
-	rq->sense_len = 0;
-
-	err = blk_execute_rq(q, NULL, rq, 1);
-	if (err == -EIO)
-		err = SCSI_DH_IO;
-
-	blk_put_request(rq);
-done:
-	return err;
-}
-
 static int get_lun_info(struct scsi_device *sdev, struct rdac_dh_data *h,
 			char *array_name, u8 *array_id)
 {
-	int err, i;
-	struct c8_inquiry *inqp;
+	int err = SCSI_DH_IO, i;
+	struct c8_inquiry *inqp = &h->inq.c8;
 
-	err = submit_inquiry(sdev, 0xC8, sizeof(struct c8_inquiry), h);
-	if (err == SCSI_DH_OK) {
-		inqp = &h->inq.c8;
+	if (!scsi_get_vpd_page(sdev, 0xC8, (unsigned char *)inqp,
+			       sizeof(struct c8_inquiry))) {
 		if (inqp->page_code != 0xc8)
 			return SCSI_DH_NOSYS;
 		if (inqp->page_id[0] != 'e' || inqp->page_id[1] != 'd' ||
@@ -453,20 +382,20 @@ static int get_lun_info(struct scsi_device *sdev, struct rdac_dh_data *h,
 		*(array_name+ARRAY_LABEL_LEN-1) = '\0';
 		memset(array_id, 0, UNIQUE_ID_LEN);
 		memcpy(array_id, inqp->array_unique_id, inqp->array_uniq_id_len);
+		err = SCSI_DH_OK;
 	}
 	return err;
 }
 
 static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h)
 {
-	int err, access_state;
+	int err = SCSI_DH_IO, access_state;
 	struct rdac_dh_data *tmp;
-	struct c9_inquiry *inqp;
+	struct c9_inquiry *inqp = &h->inq.c9;
 
 	h->state = RDAC_STATE_ACTIVE;
-	err = submit_inquiry(sdev, 0xC9, sizeof(struct c9_inquiry), h);
-	if (err == SCSI_DH_OK) {
-		inqp = &h->inq.c9;
+	if (!scsi_get_vpd_page(sdev, 0xC9, (unsigned char *)inqp,
+			       sizeof(struct c9_inquiry))) {
 		/* detect the operating mode */
 		if ((inqp->avte_cvp >> 5) & 0x1)
 			h->mode = RDAC_MODE_IOSHIP; /* LUN in IOSHIP mode */
@@ -501,6 +430,7 @@ static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h)
 			tmp->sdev->access_state = access_state;
 		}
 		rcu_read_unlock();
+		err = SCSI_DH_OK;
 	}
 
 	return err;
@@ -509,12 +439,11 @@ static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h)
 static int initialize_controller(struct scsi_device *sdev,
 		struct rdac_dh_data *h, char *array_name, u8 *array_id)
 {
-	int err, index;
-	struct c4_inquiry *inqp;
+	int err = SCSI_DH_IO, index;
+	struct c4_inquiry *inqp = &h->inq.c4;
 
-	err = submit_inquiry(sdev, 0xC4, sizeof(struct c4_inquiry), h);
-	if (err == SCSI_DH_OK) {
-		inqp = &h->inq.c4;
+	if (!scsi_get_vpd_page(sdev, 0xC4, (unsigned char *)inqp,
+			       sizeof(struct c4_inquiry))) {
 		/* get the controller index */
 		if (inqp->slot_id[1] == 0x31)
 			index = 0;
@@ -530,18 +459,18 @@ static int initialize_controller(struct scsi_device *sdev,
 			h->sdev = sdev;
 		}
 		spin_unlock(&list_lock);
+		err = SCSI_DH_OK;
 	}
 	return err;
 }
 
 static int set_mode_select(struct scsi_device *sdev, struct rdac_dh_data *h)
 {
-	int err;
-	struct c2_inquiry *inqp;
+	int err = SCSI_DH_IO;
+	struct c2_inquiry *inqp = &h->inq.c2;
 
-	err = submit_inquiry(sdev, 0xC2, sizeof(struct c2_inquiry), h);
-	if (err == SCSI_DH_OK) {
-		inqp = &h->inq.c2;
+	if (!scsi_get_vpd_page(sdev, 0xC2, (unsigned char *)inqp,
+			       sizeof(struct c2_inquiry))) {
 		/*
 		 * If more than MODE6_MAX_LUN luns are supported, use
 		 * mode select 10
@@ -550,36 +479,35 @@ static int set_mode_select(struct scsi_device *sdev, struct rdac_dh_data *h)
 			h->ctlr->use_ms10 = 1;
 		else
 			h->ctlr->use_ms10 = 0;
+		err = SCSI_DH_OK;
 	}
 	return err;
 }
 
 static int mode_select_handle_sense(struct scsi_device *sdev,
-					unsigned char *sensebuf)
+				    struct scsi_sense_hdr *sense_hdr)
 {
-	struct scsi_sense_hdr sense_hdr;
-	int err = SCSI_DH_IO, ret;
+	int err = SCSI_DH_IO;
 	struct rdac_dh_data *h = sdev->handler_data;
 
-	ret = scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE, &sense_hdr);
-	if (!ret)
+	if (!scsi_sense_valid(sense_hdr))
 		goto done;
 
-	switch (sense_hdr.sense_key) {
+	switch (sense_hdr->sense_key) {
 	case NO_SENSE:
 	case ABORTED_COMMAND:
 	case UNIT_ATTENTION:
 		err = SCSI_DH_RETRY;
 		break;
 	case NOT_READY:
-		if (sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x01)
+		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x01)
 			/* LUN Not Ready and is in the Process of Becoming
 			 * Ready
 			 */
 			err = SCSI_DH_RETRY;
 		break;
 	case ILLEGAL_REQUEST:
-		if (sense_hdr.asc == 0x91 && sense_hdr.ascq == 0x36)
+		if (sense_hdr->asc == 0x91 && sense_hdr->ascq == 0x36)
 			/*
 			 * Command Lock contention
 			 */
@@ -592,7 +520,7 @@ static int mode_select_handle_sense(struct scsi_device *sdev,
 	RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, "
 		"MODE_SELECT returned with sense %02x/%02x/%02x",
 		(char *) h->ctlr->array_name, h->ctlr->index,
-		sense_hdr.sense_key, sense_hdr.asc, sense_hdr.ascq);
+		sense_hdr->sense_key, sense_hdr->asc, sense_hdr->ascq);
 
 done:
 	return err;
@@ -602,13 +530,16 @@ static void send_mode_select(struct work_struct *work)
 {
 	struct rdac_controller *ctlr =
 		container_of(work, struct rdac_controller, ms_work);
-	struct request *rq;
 	struct scsi_device *sdev = ctlr->ms_sdev;
 	struct rdac_dh_data *h = sdev->handler_data;
-	struct request_queue *q = sdev->request_queue;
-	int err, retry_cnt = RDAC_RETRY_COUNT;
+	int err = SCSI_DH_OK, retry_cnt = RDAC_RETRY_COUNT;
 	struct rdac_queue_data *tmp, *qdata;
 	LIST_HEAD(list);
+	unsigned char cdb[COMMAND_SIZE(MODE_SELECT_10)];
+	struct scsi_sense_hdr sshdr;
+	unsigned int data_size;
+	u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
+		REQ_FAILFAST_DRIVER;
 
 	spin_lock(&ctlr->ms_lock);
 	list_splice_init(&ctlr->ms_head, &list);
@@ -616,21 +547,19 @@ static void send_mode_select(struct work_struct *work)
 	ctlr->ms_sdev = NULL;
 	spin_unlock(&ctlr->ms_lock);
 
-retry:
-	err = SCSI_DH_RES_TEMP_UNAVAIL;
-	rq = rdac_failover_get(sdev, h, &list);
-	if (!rq)
-		goto done;
+ retry:
+	data_size = rdac_failover_get(ctlr, &list, cdb);
 
 	RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, "
 		"%s MODE_SELECT command",
 		(char *) h->ctlr->array_name, h->ctlr->index,
 		(retry_cnt == RDAC_RETRY_COUNT) ? "queueing" : "retrying");
 
-	err = blk_execute_rq(q, NULL, rq, 1);
-	blk_put_request(rq);
-	if (err != SCSI_DH_OK) {
-		err = mode_select_handle_sense(sdev, h->sense);
+	if (scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
+				   &h->ctlr->mode_select, data_size, &sshdr,
+				   RDAC_TIMEOUT * HZ,
+				   RDAC_RETRIES, NULL, req_flags, 0)) {
+		err = mode_select_handle_sense(sdev, &sshdr);
 		if (err == SCSI_DH_RETRY && retry_cnt--)
 			goto retry;
 		if (err == SCSI_DH_IMM_RETRY)
@@ -643,7 +572,6 @@ static void send_mode_select(struct work_struct *work)
 				(char *) h->ctlr->array_name, h->ctlr->index);
 	}
 
-done:
 	list_for_each_entry_safe(qdata, tmp, &list, entry) {
 		list_del(&qdata->entry);
 		if (err == SCSI_DH_OK)
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 09/18] scsi_dh_emc: switch to scsi_execute_req_flags()
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (7 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 08/18] scsi_dh_rdac: switch to scsi_execute_req_flags() Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:19   ` Martin K. Petersen
  2017-01-25 17:25 ` [PATCH 10/18] scsi_dh_hp_sw: " Christoph Hellwig
                   ` (11 subsequent siblings)
  20 siblings, 1 reply; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel, Hannes Reinecke, Hannes Reinecke

From: Hannes Reinecke <hare@suse.de>

Switch to scsi_execute_req_flags() and scsi_get_vpd_page() instead of
open-coding it.  Using scsi_execute_req_flags() will set REQ_QUIET and
REQ_PREEMPT, but this is okay as we're evaluating the errors anyway and
should be able to send the command even if the device is quiesced.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/scsi/device_handler/scsi_dh_emc.c | 247 +++++++-----------------------
 1 file changed, 56 insertions(+), 191 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index 5b80746..4a7679f 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -88,12 +88,6 @@ struct clariion_dh_data {
 	 */
 	unsigned char buffer[CLARIION_BUFFER_SIZE];
 	/*
-	 * SCSI sense buffer for commands -- assumes serial issuance
-	 * and completion sequence of all commands for same multipath.
-	 */
-	unsigned char sense[SCSI_SENSE_BUFFERSIZE];
-	unsigned int senselen;
-	/*
 	 * LUN state
 	 */
 	int lun_state;
@@ -116,44 +110,38 @@ struct clariion_dh_data {
 /*
  * Parse MODE_SELECT cmd reply.
  */
-static int trespass_endio(struct scsi_device *sdev, char *sense)
+static int trespass_endio(struct scsi_device *sdev,
+			  struct scsi_sense_hdr *sshdr)
 {
 	int err = SCSI_DH_IO;
-	struct scsi_sense_hdr sshdr;
-
-	if (!scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr)) {
-		sdev_printk(KERN_ERR, sdev, "%s: Found valid sense data 0x%2x, "
-			    "0x%2x, 0x%2x while sending CLARiiON trespass "
-			    "command.\n", CLARIION_NAME, sshdr.sense_key,
-			    sshdr.asc, sshdr.ascq);
 
-		if ((sshdr.sense_key == 0x05) && (sshdr.asc == 0x04) &&
-		     (sshdr.ascq == 0x00)) {
-			/*
-			 * Array based copy in progress -- do not send
-			 * mode_select or copy will be aborted mid-stream.
-			 */
-			sdev_printk(KERN_INFO, sdev, "%s: Array Based Copy in "
-				    "progress while sending CLARiiON trespass "
-				    "command.\n", CLARIION_NAME);
-			err = SCSI_DH_DEV_TEMP_BUSY;
-		} else if ((sshdr.sense_key == 0x02) && (sshdr.asc == 0x04) &&
-			    (sshdr.ascq == 0x03)) {
-			/*
-			 * LUN Not Ready - Manual Intervention Required
-			 * indicates in-progress ucode upgrade (NDU).
-			 */
-			sdev_printk(KERN_INFO, sdev, "%s: Detected in-progress "
-				    "ucode upgrade NDU operation while sending "
-				    "CLARiiON trespass command.\n", CLARIION_NAME);
-			err = SCSI_DH_DEV_TEMP_BUSY;
-		} else
-			err = SCSI_DH_DEV_FAILED;
-	} else {
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: failed to send MODE SELECT, no sense available\n",
-			    CLARIION_NAME);
-	}
+	sdev_printk(KERN_ERR, sdev, "%s: Found valid sense data 0x%2x, "
+		    "0x%2x, 0x%2x while sending CLARiiON trespass "
+		    "command.\n", CLARIION_NAME, sshdr->sense_key,
+		    sshdr->asc, sshdr->ascq);
+
+	if (sshdr->sense_key == 0x05 && sshdr->asc == 0x04 &&
+	    sshdr->ascq == 0x00) {
+		/*
+		 * Array based copy in progress -- do not send
+		 * mode_select or copy will be aborted mid-stream.
+		 */
+		sdev_printk(KERN_INFO, sdev, "%s: Array Based Copy in "
+			    "progress while sending CLARiiON trespass "
+			    "command.\n", CLARIION_NAME);
+		err = SCSI_DH_DEV_TEMP_BUSY;
+	} else if (sshdr->sense_key == 0x02 && sshdr->asc == 0x04 &&
+		   sshdr->ascq == 0x03) {
+		/*
+		 * LUN Not Ready - Manual Intervention Required
+		 * indicates in-progress ucode upgrade (NDU).
+		 */
+		sdev_printk(KERN_INFO, sdev, "%s: Detected in-progress "
+			    "ucode upgrade NDU operation while sending "
+			    "CLARiiON trespass command.\n", CLARIION_NAME);
+		err = SCSI_DH_DEV_TEMP_BUSY;
+	} else
+		err = SCSI_DH_DEV_FAILED;
 	return err;
 }
 
@@ -257,103 +245,15 @@ static char * parse_sp_model(struct scsi_device *sdev, unsigned char *buffer)
 	return sp_model;
 }
 
-/*
- * Get block request for REQ_BLOCK_PC command issued to path.  Currently
- * limited to MODE_SELECT (trespass) and INQUIRY (VPD page 0xC0) commands.
- *
- * Uses data and sense buffers in hardware handler context structure and
- * assumes serial servicing of commands, both issuance and completion.
- */
-static struct request *get_req(struct scsi_device *sdev, int cmd,
-				unsigned char *buffer)
-{
-	struct request *rq;
-	int len = 0;
-
-	rq = blk_get_request(sdev->request_queue,
-			(cmd != INQUIRY) ? WRITE : READ, GFP_NOIO);
-	if (IS_ERR(rq)) {
-		sdev_printk(KERN_INFO, sdev, "get_req: blk_get_request failed");
-		return NULL;
-	}
-
-	blk_rq_set_block_pc(rq);
-	rq->cmd_len = COMMAND_SIZE(cmd);
-	rq->cmd[0] = cmd;
-
-	switch (cmd) {
-	case MODE_SELECT:
-		len = sizeof(short_trespass);
-		rq->cmd[1] = 0x10;
-		rq->cmd[4] = len;
-		break;
-	case MODE_SELECT_10:
-		len = sizeof(long_trespass);
-		rq->cmd[1] = 0x10;
-		rq->cmd[8] = len;
-		break;
-	case INQUIRY:
-		len = CLARIION_BUFFER_SIZE;
-		rq->cmd[4] = len;
-		memset(buffer, 0, len);
-		break;
-	default:
-		BUG_ON(1);
-		break;
-	}
-
-	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
-			 REQ_FAILFAST_DRIVER;
-	rq->timeout = CLARIION_TIMEOUT;
-	rq->retries = CLARIION_RETRIES;
-
-	if (blk_rq_map_kern(rq->q, rq, buffer, len, GFP_NOIO)) {
-		blk_put_request(rq);
-		return NULL;
-	}
-
-	return rq;
-}
-
-static int send_inquiry_cmd(struct scsi_device *sdev, int page,
-			    struct clariion_dh_data *csdev)
-{
-	struct request *rq = get_req(sdev, INQUIRY, csdev->buffer);
-	int err;
-
-	if (!rq)
-		return SCSI_DH_RES_TEMP_UNAVAIL;
-
-	rq->sense = csdev->sense;
-	memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
-	rq->sense_len = csdev->senselen = 0;
-
-	rq->cmd[0] = INQUIRY;
-	if (page != 0) {
-		rq->cmd[1] = 1;
-		rq->cmd[2] = page;
-	}
-	err = blk_execute_rq(sdev->request_queue, NULL, rq, 1);
-	if (err == -EIO) {
-		sdev_printk(KERN_INFO, sdev,
-			    "%s: failed to send %s INQUIRY: %x\n",
-			    CLARIION_NAME, page?"EVPD":"standard",
-			    rq->errors);
-		csdev->senselen = rq->sense_len;
-		err = SCSI_DH_IO;
-	}
-
-	blk_put_request(rq);
-
-	return err;
-}
-
 static int send_trespass_cmd(struct scsi_device *sdev,
 			    struct clariion_dh_data *csdev)
 {
-	struct request *rq;
 	unsigned char *page22;
-	int err, len, cmd;
+	unsigned char cdb[COMMAND_SIZE(MODE_SELECT)];
+	int err, res = SCSI_DH_OK, len;
+	struct scsi_sense_hdr sshdr;
+	u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
+		REQ_FAILFAST_DRIVER;
 
 	if (csdev->flags & CLARIION_SHORT_TRESPASS) {
 		page22 = short_trespass;
@@ -361,40 +261,37 @@ static int send_trespass_cmd(struct scsi_device *sdev,
 			/* Set Honor Reservations bit */
 			page22[6] |= 0x80;
 		len = sizeof(short_trespass);
-		cmd = MODE_SELECT;
+		cdb[0] = MODE_SELECT;
+		cdb[1] = 0x10;
+		cdb[4] = len;
 	} else {
 		page22 = long_trespass;
 		if (!(csdev->flags & CLARIION_HONOR_RESERVATIONS))
 			/* Set Honor Reservations bit */
 			page22[10] |= 0x80;
 		len = sizeof(long_trespass);
-		cmd = MODE_SELECT_10;
+		cdb[0] = MODE_SELECT_10;
+		cdb[8] = len;
 	}
 	BUG_ON((len > CLARIION_BUFFER_SIZE));
 	memcpy(csdev->buffer, page22, len);
 
-	rq = get_req(sdev, cmd, csdev->buffer);
-	if (!rq)
-		return SCSI_DH_RES_TEMP_UNAVAIL;
-
-	rq->sense = csdev->sense;
-	memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
-	rq->sense_len = csdev->senselen = 0;
-
-	err = blk_execute_rq(sdev->request_queue, NULL, rq, 1);
-	if (err == -EIO) {
-		if (rq->sense_len) {
-			err = trespass_endio(sdev, csdev->sense);
-		} else {
+	err = scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
+				     csdev->buffer, len, &sshdr,
+				     CLARIION_TIMEOUT * HZ, CLARIION_RETRIES,
+				     NULL, req_flags, 0);
+	if (err) {
+		if (scsi_sense_valid(&sshdr))
+			res = trespass_endio(sdev, &sshdr);
+		else {
 			sdev_printk(KERN_INFO, sdev,
 				    "%s: failed to send MODE SELECT: %x\n",
-				    CLARIION_NAME, rq->errors);
+				    CLARIION_NAME, err);
+			res = SCSI_DH_IO;
 		}
 	}
 
-	blk_put_request(rq);
-
-	return err;
+	return res;
 }
 
 static int clariion_check_sense(struct scsi_device *sdev,
@@ -464,21 +361,7 @@ static int clariion_std_inquiry(struct scsi_device *sdev,
 	int err;
 	char *sp_model;
 
-	err = send_inquiry_cmd(sdev, 0, csdev);
-	if (err != SCSI_DH_OK && csdev->senselen) {
-		struct scsi_sense_hdr sshdr;
-
-		if (scsi_normalize_sense(csdev->sense, SCSI_SENSE_BUFFERSIZE,
-					 &sshdr)) {
-			sdev_printk(KERN_ERR, sdev, "%s: INQUIRY sense code "
-				    "%02x/%02x/%02x\n", CLARIION_NAME,
-				    sshdr.sense_key, sshdr.asc, sshdr.ascq);
-		}
-		err = SCSI_DH_IO;
-		goto out;
-	}
-
-	sp_model = parse_sp_model(sdev, csdev->buffer);
+	sp_model = parse_sp_model(sdev, sdev->inquiry);
 	if (!sp_model) {
 		err = SCSI_DH_DEV_UNSUPP;
 		goto out;
@@ -500,30 +383,12 @@ static int clariion_std_inquiry(struct scsi_device *sdev,
 static int clariion_send_inquiry(struct scsi_device *sdev,
 				 struct clariion_dh_data *csdev)
 {
-	int err, retry = CLARIION_RETRIES;
-
-retry:
-	err = send_inquiry_cmd(sdev, 0xC0, csdev);
-	if (err != SCSI_DH_OK && csdev->senselen) {
-		struct scsi_sense_hdr sshdr;
-
-		err = scsi_normalize_sense(csdev->sense, SCSI_SENSE_BUFFERSIZE,
-					   &sshdr);
-		if (!err)
-			return SCSI_DH_IO;
-
-		err = clariion_check_sense(sdev, &sshdr);
-		if (retry > 0 && err == ADD_TO_MLQUEUE) {
-			retry--;
-			goto retry;
-		}
-		sdev_printk(KERN_ERR, sdev, "%s: INQUIRY sense code "
-			    "%02x/%02x/%02x\n", CLARIION_NAME,
-			      sshdr.sense_key, sshdr.asc, sshdr.ascq);
-		err = SCSI_DH_IO;
-	} else {
+	int err = SCSI_DH_IO;
+
+	if (!scsi_get_vpd_page(sdev, 0xC0, csdev->buffer,
+			       CLARIION_BUFFER_SIZE))
 		err = parse_sp_info_reply(sdev, csdev);
-	}
+
 	return err;
 }
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 10/18] scsi_dh_hp_sw: switch to scsi_execute_req_flags()
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (8 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 09/18] scsi_dh_emc: " Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:20     ` Martin K. Petersen
  2017-01-25 17:25 ` [PATCH 11/18] scsi: remove gfp_flags member in scsi_host_cmd_pool Christoph Hellwig
                   ` (10 subsequent siblings)
  20 siblings, 1 reply; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel, Hannes Reinecke, Hannes Reinecke

From: Hannes Reinecke <hare@suse.de>

Switch to scsi_execute_req_flags() instead of using the block interface
directly.  This will set REQ_QUIET and REQ_PREEMPT, but this is okay as
we're evaluating the errors anyway and should be able to send the command
even if the device is quiesced.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/scsi/device_handler/scsi_dh_hp_sw.c | 222 ++++++++--------------------
 1 file changed, 65 insertions(+), 157 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index 308e871..be43c94 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -38,13 +38,10 @@
 #define HP_SW_PATH_PASSIVE		1
 
 struct hp_sw_dh_data {
-	unsigned char sense[SCSI_SENSE_BUFFERSIZE];
 	int path_state;
 	int retries;
 	int retry_cnt;
 	struct scsi_device *sdev;
-	activate_complete	callback_fn;
-	void			*callback_data;
 };
 
 static int hp_sw_start_stop(struct hp_sw_dh_data *);
@@ -56,43 +53,34 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *);
  *
  * Returns SCSI_DH_DEV_OFFLINED if the sdev is on the passive path
  */
-static int tur_done(struct scsi_device *sdev, unsigned char *sense)
+static int tur_done(struct scsi_device *sdev, struct hp_sw_dh_data *h,
+		    struct scsi_sense_hdr *sshdr)
 {
-	struct scsi_sense_hdr sshdr;
-	int ret;
+	int ret = SCSI_DH_IO;
 
-	ret = scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr);
-	if (!ret) {
-		sdev_printk(KERN_WARNING, sdev,
-			    "%s: sending tur failed, no sense available\n",
-			    HP_SW_NAME);
-		ret = SCSI_DH_IO;
-		goto done;
-	}
-	switch (sshdr.sense_key) {
+	switch (sshdr->sense_key) {
 	case UNIT_ATTENTION:
 		ret = SCSI_DH_IMM_RETRY;
 		break;
 	case NOT_READY:
-		if ((sshdr.asc == 0x04) && (sshdr.ascq == 2)) {
+		if (sshdr->asc == 0x04 && sshdr->ascq == 2) {
 			/*
 			 * LUN not ready - Initialization command required
 			 *
 			 * This is the passive path
 			 */
-			ret = SCSI_DH_DEV_OFFLINED;
+			h->path_state = HP_SW_PATH_PASSIVE;
+			ret = SCSI_DH_OK;
 			break;
 		}
 		/* Fallthrough */
 	default:
 		sdev_printk(KERN_WARNING, sdev,
 			   "%s: sending tur failed, sense %x/%x/%x\n",
-			   HP_SW_NAME, sshdr.sense_key, sshdr.asc,
-			   sshdr.ascq);
+			   HP_SW_NAME, sshdr->sense_key, sshdr->asc,
+			   sshdr->ascq);
 		break;
 	}
-
-done:
 	return ret;
 }
 
@@ -105,131 +93,36 @@ static int tur_done(struct scsi_device *sdev, unsigned char *sense)
  */
 static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h)
 {
-	struct request *req;
-	int ret;
+	unsigned char cmd[6] = { TEST_UNIT_READY };
+	struct scsi_sense_hdr sshdr;
+	int ret = SCSI_DH_OK, res;
+	u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
+		REQ_FAILFAST_DRIVER;
 
 retry:
-	req = blk_get_request(sdev->request_queue, WRITE, GFP_NOIO);
-	if (IS_ERR(req))
-		return SCSI_DH_RES_TEMP_UNAVAIL;
-
-	blk_rq_set_block_pc(req);
-	req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
-			  REQ_FAILFAST_DRIVER;
-	req->cmd_len = COMMAND_SIZE(TEST_UNIT_READY);
-	req->cmd[0] = TEST_UNIT_READY;
-	req->timeout = HP_SW_TIMEOUT;
-	req->sense = h->sense;
-	memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
-	req->sense_len = 0;
-
-	ret = blk_execute_rq(req->q, NULL, req, 1);
-	if (ret == -EIO) {
-		if (req->sense_len > 0) {
-			ret = tur_done(sdev, h->sense);
-		} else {
+	res = scsi_execute_req_flags(sdev, cmd, DMA_NONE, NULL, 0, &sshdr,
+				     HP_SW_TIMEOUT, HP_SW_RETRIES,
+				     NULL, req_flags, 0);
+	if (res) {
+		if (scsi_sense_valid(&sshdr))
+			ret = tur_done(sdev, h, &sshdr);
+		else {
 			sdev_printk(KERN_WARNING, sdev,
 				    "%s: sending tur failed with %x\n",
-				    HP_SW_NAME, req->errors);
+				    HP_SW_NAME, res);
 			ret = SCSI_DH_IO;
 		}
 	} else {
 		h->path_state = HP_SW_PATH_ACTIVE;
 		ret = SCSI_DH_OK;
 	}
-	if (ret == SCSI_DH_IMM_RETRY) {
-		blk_put_request(req);
+	if (ret == SCSI_DH_IMM_RETRY)
 		goto retry;
-	}
-	if (ret == SCSI_DH_DEV_OFFLINED) {
-		h->path_state = HP_SW_PATH_PASSIVE;
-		ret = SCSI_DH_OK;
-	}
-
-	blk_put_request(req);
 
 	return ret;
 }
 
 /*
- * start_done - Handle START STOP UNIT return status
- * @sdev: sdev the command has been sent to
- * @errors: blk error code
- */
-static int start_done(struct scsi_device *sdev, unsigned char *sense)
-{
-	struct scsi_sense_hdr sshdr;
-	int rc;
-
-	rc = scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr);
-	if (!rc) {
-		sdev_printk(KERN_WARNING, sdev,
-			    "%s: sending start_stop_unit failed, "
-			    "no sense available\n",
-			    HP_SW_NAME);
-		return SCSI_DH_IO;
-	}
-	switch (sshdr.sense_key) {
-	case NOT_READY:
-		if ((sshdr.asc == 0x04) && (sshdr.ascq == 3)) {
-			/*
-			 * LUN not ready - manual intervention required
-			 *
-			 * Switch-over in progress, retry.
-			 */
-			rc = SCSI_DH_RETRY;
-			break;
-		}
-		/* fall through */
-	default:
-		sdev_printk(KERN_WARNING, sdev,
-			   "%s: sending start_stop_unit failed, sense %x/%x/%x\n",
-			   HP_SW_NAME, sshdr.sense_key, sshdr.asc,
-			   sshdr.ascq);
-		rc = SCSI_DH_IO;
-	}
-
-	return rc;
-}
-
-static void start_stop_endio(struct request *req, int error)
-{
-	struct hp_sw_dh_data *h = req->end_io_data;
-	unsigned err = SCSI_DH_OK;
-
-	if (error || host_byte(req->errors) != DID_OK ||
-			msg_byte(req->errors) != COMMAND_COMPLETE) {
-		sdev_printk(KERN_WARNING, h->sdev,
-			    "%s: sending start_stop_unit failed with %x\n",
-			    HP_SW_NAME, req->errors);
-		err = SCSI_DH_IO;
-		goto done;
-	}
-
-	if (req->sense_len > 0) {
-		err = start_done(h->sdev, h->sense);
-		if (err == SCSI_DH_RETRY) {
-			err = SCSI_DH_IO;
-			if (--h->retry_cnt) {
-				blk_put_request(req);
-				err = hp_sw_start_stop(h);
-				if (err == SCSI_DH_OK)
-					return;
-			}
-		}
-	}
-done:
-	req->end_io_data = NULL;
-	__blk_put_request(req->q, req);
-	if (h->callback_fn) {
-		h->callback_fn(h->callback_data, err);
-		h->callback_fn = h->callback_data = NULL;
-	}
-	return;
-
-}
-
-/*
  * hp_sw_start_stop - Send START STOP UNIT command
  * @sdev: sdev command should be sent to
  *
@@ -237,26 +130,48 @@ static void start_stop_endio(struct request *req, int error)
  */
 static int hp_sw_start_stop(struct hp_sw_dh_data *h)
 {
-	struct request *req;
-
-	req = blk_get_request(h->sdev->request_queue, WRITE, GFP_ATOMIC);
-	if (IS_ERR(req))
-		return SCSI_DH_RES_TEMP_UNAVAIL;
-
-	blk_rq_set_block_pc(req);
-	req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
-			  REQ_FAILFAST_DRIVER;
-	req->cmd_len = COMMAND_SIZE(START_STOP);
-	req->cmd[0] = START_STOP;
-	req->cmd[4] = 1;	/* Start spin cycle */
-	req->timeout = HP_SW_TIMEOUT;
-	req->sense = h->sense;
-	memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
-	req->sense_len = 0;
-	req->end_io_data = h;
+	unsigned char cmd[6] = { START_STOP, 0, 0, 0, 1, 0 };
+	struct scsi_sense_hdr sshdr;
+	struct scsi_device *sdev = h->sdev;
+	int res, rc = SCSI_DH_OK;
+	int retry_cnt = HP_SW_RETRIES;
+	u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
+		REQ_FAILFAST_DRIVER;
 
-	blk_execute_rq_nowait(req->q, NULL, req, 1, start_stop_endio);
-	return SCSI_DH_OK;
+retry:
+	res = scsi_execute_req_flags(sdev, cmd, DMA_NONE, NULL, 0, &sshdr,
+				     HP_SW_TIMEOUT, HP_SW_RETRIES,
+				     NULL, req_flags, 0);
+	if (res) {
+		if (!scsi_sense_valid(&sshdr)) {
+			sdev_printk(KERN_WARNING, sdev,
+				    "%s: sending start_stop_unit failed, "
+				    "no sense available\n", HP_SW_NAME);
+			return SCSI_DH_IO;
+		}
+		switch (sshdr.sense_key) {
+		case NOT_READY:
+			if (sshdr.asc == 0x04 && sshdr.ascq == 3) {
+				/*
+				 * LUN not ready - manual intervention required
+				 *
+				 * Switch-over in progress, retry.
+				 */
+				if (--retry_cnt)
+					goto retry;
+				rc = SCSI_DH_RETRY;
+				break;
+			}
+			/* fall through */
+		default:
+			sdev_printk(KERN_WARNING, sdev,
+				    "%s: sending start_stop_unit failed, "
+				    "sense %x/%x/%x\n", HP_SW_NAME,
+				    sshdr.sense_key, sshdr.asc, sshdr.ascq);
+			rc = SCSI_DH_IO;
+		}
+	}
+	return rc;
 }
 
 static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req)
@@ -290,15 +205,8 @@ static int hp_sw_activate(struct scsi_device *sdev,
 
 	ret = hp_sw_tur(sdev, h);
 
-	if (ret == SCSI_DH_OK && h->path_state == HP_SW_PATH_PASSIVE) {
-		h->retry_cnt = h->retries;
-		h->callback_fn = fn;
-		h->callback_data = data;
+	if (ret == SCSI_DH_OK && h->path_state == HP_SW_PATH_PASSIVE)
 		ret = hp_sw_start_stop(h);
-		if (ret == SCSI_DH_OK)
-			return 0;
-		h->callback_fn = h->callback_data = NULL;
-	}
 
 	if (fn)
 		fn(data, ret);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 11/18] scsi: remove gfp_flags member in scsi_host_cmd_pool
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (9 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 10/18] scsi_dh_hp_sw: " Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:21   ` Martin K. Petersen
  2017-01-27 17:38     ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 12/18] scsi: respect unchecked_isa_dma for blk-mq Christoph Hellwig
                   ` (9 subsequent siblings)
  20 siblings, 2 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

When using the slab allocator we already decide at cache creation time if
an allocation comes from a GFP_DMA pool using the SLAB_CACHE_DMA flag,
and there is no point passing the kmalloc-family only GFP_DMA flag to
kmem_cache_alloc.  Drop all the infrastructure for doing so.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 drivers/scsi/scsi.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 75455d4..0f93892 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -105,7 +105,6 @@ struct scsi_host_cmd_pool {
 	char			*cmd_name;
 	char			*sense_name;
 	unsigned int		slab_flags;
-	gfp_t			gfp_mask;
 };
 
 static struct scsi_host_cmd_pool scsi_cmd_pool = {
@@ -118,7 +117,6 @@ static struct scsi_host_cmd_pool scsi_cmd_dma_pool = {
 	.cmd_name	= "scsi_cmd_cache(DMA)",
 	.sense_name	= "scsi_sense_cache(DMA)",
 	.slab_flags	= SLAB_HWCACHE_ALIGN|SLAB_CACHE_DMA,
-	.gfp_mask	= __GFP_DMA,
 };
 
 static DEFINE_MUTEX(host_cmd_pool_mutex);
@@ -156,12 +154,11 @@ scsi_host_alloc_command(struct Scsi_Host *shost, gfp_t gfp_mask)
 	struct scsi_host_cmd_pool *pool = shost->cmd_pool;
 	struct scsi_cmnd *cmd;
 
-	cmd = kmem_cache_zalloc(pool->cmd_slab, gfp_mask | pool->gfp_mask);
+	cmd = kmem_cache_zalloc(pool->cmd_slab, gfp_mask);
 	if (!cmd)
 		goto fail;
 
-	cmd->sense_buffer = kmem_cache_alloc(pool->sense_slab,
-					     gfp_mask | pool->gfp_mask);
+	cmd->sense_buffer = kmem_cache_alloc(pool->sense_slab, gfp_mask);
 	if (!cmd->sense_buffer)
 		goto fail_free_cmd;
 
@@ -327,10 +324,8 @@ scsi_alloc_host_cmd_pool(struct Scsi_Host *shost)
 	}
 
 	pool->slab_flags = SLAB_HWCACHE_ALIGN;
-	if (shost->unchecked_isa_dma) {
+	if (shost->unchecked_isa_dma)
 		pool->slab_flags |= SLAB_CACHE_DMA;
-		pool->gfp_mask = __GFP_DMA;
-	}
 
 	if (hostt->cmd_size)
 		hostt->cmd_pool = pool;
@@ -424,7 +419,6 @@ static void scsi_put_host_cmd_pool(struct Scsi_Host *shost)
  */
 int scsi_setup_command_freelist(struct Scsi_Host *shost)
 {
-	const gfp_t gfp_mask = shost->unchecked_isa_dma ? GFP_DMA : GFP_KERNEL;
 	struct scsi_cmnd *cmd;
 
 	spin_lock_init(&shost->free_list_lock);
@@ -437,7 +431,7 @@ int scsi_setup_command_freelist(struct Scsi_Host *shost)
 	/*
 	 * Get one backup command for this host.
 	 */
-	cmd = scsi_host_alloc_command(shost, gfp_mask);
+	cmd = scsi_host_alloc_command(shost, GFP_KERNEL);
 	if (!cmd) {
 		scsi_put_host_cmd_pool(shost);
 		shost->cmd_pool = NULL;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 12/18] scsi: respect unchecked_isa_dma for blk-mq
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (10 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 11/18] scsi: remove gfp_flags member in scsi_host_cmd_pool Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:23   ` Martin K. Petersen
  2017-01-27 17:45     ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 13/18] scsi: remove scsi_cmd_dma_pool Christoph Hellwig
                   ` (8 subsequent siblings)
  20 siblings, 2 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

Currently blk-mq always allocates the sense buffer using normal GFP_KERNEL
allocation.  Refactor the cmd pool code to split the cmd and sense allocation
and share the code to allocate the sense buffers as well as the sense buffer
slab caches between the legacy and blk-mq path.

Note that this switches to lazy allocation of the sense slab caches - the
slab caches (not the actual allocations) won't be destroy until the scsi
module is unloaded instead of keeping track of hosts using them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/hosts.c     |  4 ++++
 drivers/scsi/scsi.c      | 24 ++++---------------
 drivers/scsi/scsi_lib.c  | 62 +++++++++++++++++++++++++++++++++++++++++++++---
 drivers/scsi/scsi_priv.h |  5 ++++
 4 files changed, 73 insertions(+), 22 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 258a3f9..6d29c4a 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -213,6 +213,10 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 		goto fail;
 	}
 
+	error = scsi_init_sense_cache(shost);
+	if (error)
+		goto fail;
+
 	if (shost_use_blk_mq(shost)) {
 		error = scsi_mq_setup_tags(shost);
 		if (error)
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 0f93892..469aa0f 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -100,22 +100,18 @@ EXPORT_SYMBOL(scsi_sd_pm_domain);
 
 struct scsi_host_cmd_pool {
 	struct kmem_cache	*cmd_slab;
-	struct kmem_cache	*sense_slab;
 	unsigned int		users;
 	char			*cmd_name;
-	char			*sense_name;
 	unsigned int		slab_flags;
 };
 
 static struct scsi_host_cmd_pool scsi_cmd_pool = {
 	.cmd_name	= "scsi_cmd_cache",
-	.sense_name	= "scsi_sense_cache",
 	.slab_flags	= SLAB_HWCACHE_ALIGN,
 };
 
 static struct scsi_host_cmd_pool scsi_cmd_dma_pool = {
 	.cmd_name	= "scsi_cmd_cache(DMA)",
-	.sense_name	= "scsi_sense_cache(DMA)",
 	.slab_flags	= SLAB_HWCACHE_ALIGN|SLAB_CACHE_DMA,
 };
 
@@ -136,7 +132,7 @@ scsi_host_free_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 
 	if (cmd->prot_sdb)
 		kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb);
-	kmem_cache_free(pool->sense_slab, cmd->sense_buffer);
+	scsi_free_sense_buffer(shost, cmd->sense_buffer);
 	kmem_cache_free(pool->cmd_slab, cmd);
 }
 
@@ -158,7 +154,8 @@ scsi_host_alloc_command(struct Scsi_Host *shost, gfp_t gfp_mask)
 	if (!cmd)
 		goto fail;
 
-	cmd->sense_buffer = kmem_cache_alloc(pool->sense_slab, gfp_mask);
+	cmd->sense_buffer = scsi_alloc_sense_buffer(shost, gfp_mask,
+			NUMA_NO_NODE);
 	if (!cmd->sense_buffer)
 		goto fail_free_cmd;
 
@@ -171,7 +168,7 @@ scsi_host_alloc_command(struct Scsi_Host *shost, gfp_t gfp_mask)
 	return cmd;
 
 fail_free_sense:
-	kmem_cache_free(pool->sense_slab, cmd->sense_buffer);
+	scsi_free_sense_buffer(shost, cmd->sense_buffer);
 fail_free_cmd:
 	kmem_cache_free(pool->cmd_slab, cmd);
 fail:
@@ -301,7 +298,6 @@ scsi_find_host_cmd_pool(struct Scsi_Host *shost)
 static void
 scsi_free_host_cmd_pool(struct scsi_host_cmd_pool *pool)
 {
-	kfree(pool->sense_name);
 	kfree(pool->cmd_name);
 	kfree(pool);
 }
@@ -317,8 +313,7 @@ scsi_alloc_host_cmd_pool(struct Scsi_Host *shost)
 		return NULL;
 
 	pool->cmd_name = kasprintf(GFP_KERNEL, "%s_cmd", hostt->proc_name);
-	pool->sense_name = kasprintf(GFP_KERNEL, "%s_sense", hostt->proc_name);
-	if (!pool->cmd_name || !pool->sense_name) {
+	if (!pool->cmd_name) {
 		scsi_free_host_cmd_pool(pool);
 		return NULL;
 	}
@@ -357,12 +352,6 @@ scsi_get_host_cmd_pool(struct Scsi_Host *shost)
 						   pool->slab_flags, NULL);
 		if (!pool->cmd_slab)
 			goto out_free_pool;
-
-		pool->sense_slab = kmem_cache_create(pool->sense_name,
-						     SCSI_SENSE_BUFFERSIZE, 0,
-						     pool->slab_flags, NULL);
-		if (!pool->sense_slab)
-			goto out_free_slab;
 	}
 
 	pool->users++;
@@ -371,8 +360,6 @@ scsi_get_host_cmd_pool(struct Scsi_Host *shost)
 	mutex_unlock(&host_cmd_pool_mutex);
 	return retval;
 
-out_free_slab:
-	kmem_cache_destroy(pool->cmd_slab);
 out_free_pool:
 	if (hostt->cmd_size) {
 		scsi_free_host_cmd_pool(pool);
@@ -398,7 +385,6 @@ static void scsi_put_host_cmd_pool(struct Scsi_Host *shost)
 
 	if (!--pool->users) {
 		kmem_cache_destroy(pool->cmd_slab);
-		kmem_cache_destroy(pool->sense_slab);
 		if (hostt->cmd_size) {
 			scsi_free_host_cmd_pool(pool);
 			hostt->cmd_pool = NULL;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e9e1e14..3d6b364 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -39,6 +39,58 @@
 
 
 struct kmem_cache *scsi_sdb_cache;
+static struct kmem_cache *scsi_sense_cache;
+static struct kmem_cache *scsi_sense_isadma_cache;
+static DEFINE_MUTEX(scsi_sense_cache_mutex);
+
+static inline struct kmem_cache *
+scsi_select_sense_cache(struct Scsi_Host *shost)
+{
+	return shost->unchecked_isa_dma ?
+		scsi_sense_isadma_cache : scsi_sense_cache;
+}
+
+void scsi_free_sense_buffer(struct Scsi_Host *shost,
+		unsigned char *sense_buffer)
+{
+	kmem_cache_free(scsi_select_sense_cache(shost), sense_buffer);
+}
+
+unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost, gfp_t gfp_mask,
+		int numa_node)
+{
+	return kmem_cache_alloc_node(scsi_select_sense_cache(shost), gfp_mask,
+			numa_node);
+}
+
+int scsi_init_sense_cache(struct Scsi_Host *shost)
+{
+	struct kmem_cache *cache;
+	int ret = 0;
+
+	cache = scsi_select_sense_cache(shost);
+	if (cache)
+		return 0;
+
+	mutex_lock(&scsi_sense_cache_mutex);
+	if (shost->unchecked_isa_dma) {
+		scsi_sense_isadma_cache =
+			kmem_cache_create("scsi_sense_cache(DMA)",
+			SCSI_SENSE_BUFFERSIZE, 0,
+			SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL);
+		if (!scsi_sense_isadma_cache)
+			ret = -ENOMEM;
+	} else {
+		scsi_sense_cache =
+			kmem_cache_create("scsi_sense_cache",
+			SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, NULL);
+		if (!scsi_sense_cache)
+			ret = -ENOMEM;
+	}
+
+	mutex_unlock(&scsi_sense_cache_mutex);
+	return ret;
+}
 
 /*
  * When to reinvoke queueing after a resource shortage. It's 3 msecs to
@@ -1981,10 +2033,11 @@ static int scsi_init_request(void *data, struct request *rq,
 		unsigned int hctx_idx, unsigned int request_idx,
 		unsigned int numa_node)
 {
+	struct Scsi_Host *shost = data;
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
 
-	cmd->sense_buffer = kzalloc_node(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL,
-			numa_node);
+	cmd->sense_buffer =
+		scsi_alloc_sense_buffer(shost, GFP_KERNEL, numa_node);
 	if (!cmd->sense_buffer)
 		return -ENOMEM;
 	return 0;
@@ -1993,9 +2046,10 @@ static int scsi_init_request(void *data, struct request *rq,
 static void scsi_exit_request(void *data, struct request *rq,
 		unsigned int hctx_idx, unsigned int request_idx)
 {
+	struct Scsi_Host *shost = data;
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
 
-	kfree(cmd->sense_buffer);
+	scsi_free_sense_buffer(shost, cmd->sense_buffer);
 }
 
 static int scsi_map_queues(struct blk_mq_tag_set *set)
@@ -2208,6 +2262,8 @@ int __init scsi_init_queue(void)
 
 void scsi_exit_queue(void)
 {
+	kmem_cache_destroy(scsi_sense_cache);
+	kmem_cache_destroy(scsi_sense_isadma_cache);
 	kmem_cache_destroy(scsi_sdb_cache);
 }
 
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 193636a..1a712c6 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -30,6 +30,11 @@ extern void scsi_exit_hosts(void);
 
 /* scsi.c */
 extern bool scsi_use_blk_mq;
+void scsi_free_sense_buffer(struct Scsi_Host *shost,
+		unsigned char *sense_buffer);
+unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost, gfp_t gfp_mask,
+		int numa_node);
+int scsi_init_sense_cache(struct Scsi_Host *shost);
 extern int scsi_setup_command_freelist(struct Scsi_Host *shost);
 extern void scsi_destroy_command_freelist(struct Scsi_Host *shost);
 #ifdef CONFIG_SCSI_LOGGING
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 13/18] scsi: remove scsi_cmd_dma_pool
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (11 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 12/18] scsi: respect unchecked_isa_dma for blk-mq Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:24   ` Martin K. Petersen
  2017-01-27 17:51     ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 14/18] scsi: remove __scsi_alloc_queue Christoph Hellwig
                   ` (7 subsequent siblings)
  20 siblings, 2 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

There is no need for GFP_DMA allocations of the scsi_cmnd structures
themselves, all that might be DMAed to or from is the actual payload,
or the sense buffers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/scsi.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 469aa0f..2e24f31 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -102,17 +102,10 @@ struct scsi_host_cmd_pool {
 	struct kmem_cache	*cmd_slab;
 	unsigned int		users;
 	char			*cmd_name;
-	unsigned int		slab_flags;
 };
 
 static struct scsi_host_cmd_pool scsi_cmd_pool = {
 	.cmd_name	= "scsi_cmd_cache",
-	.slab_flags	= SLAB_HWCACHE_ALIGN,
-};
-
-static struct scsi_host_cmd_pool scsi_cmd_dma_pool = {
-	.cmd_name	= "scsi_cmd_cache(DMA)",
-	.slab_flags	= SLAB_HWCACHE_ALIGN|SLAB_CACHE_DMA,
 };
 
 static DEFINE_MUTEX(host_cmd_pool_mutex);
@@ -290,8 +283,6 @@ scsi_find_host_cmd_pool(struct Scsi_Host *shost)
 {
 	if (shost->hostt->cmd_size)
 		return shost->hostt->cmd_pool;
-	if (shost->unchecked_isa_dma)
-		return &scsi_cmd_dma_pool;
 	return &scsi_cmd_pool;
 }
 
@@ -318,10 +309,6 @@ scsi_alloc_host_cmd_pool(struct Scsi_Host *shost)
 		return NULL;
 	}
 
-	pool->slab_flags = SLAB_HWCACHE_ALIGN;
-	if (shost->unchecked_isa_dma)
-		pool->slab_flags |= SLAB_CACHE_DMA;
-
 	if (hostt->cmd_size)
 		hostt->cmd_pool = pool;
 
@@ -349,7 +336,7 @@ scsi_get_host_cmd_pool(struct Scsi_Host *shost)
 
 	if (!pool->users) {
 		pool->cmd_slab = kmem_cache_create(pool->cmd_name, cmd_size, 0,
-						   pool->slab_flags, NULL);
+						   SLAB_HWCACHE_ALIGN, NULL);
 		if (!pool->cmd_slab)
 			goto out_free_pool;
 	}
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 14/18] scsi: remove __scsi_alloc_queue
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (12 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 13/18] scsi: remove scsi_cmd_dma_pool Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:25   ` Martin K. Petersen
  2017-01-27 17:58     ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 15/18] scsi: allocate scsi_cmnd structures as part of struct request Christoph Hellwig
                   ` (6 subsequent siblings)
  20 siblings, 2 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

Instead do an internal export of __scsi_init_queue for the transport
classes that export BSG nodes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 drivers/scsi/scsi_lib.c             | 19 ++++---------------
 drivers/scsi/scsi_transport_fc.c    |  6 ++++--
 drivers/scsi/scsi_transport_iscsi.c |  3 ++-
 include/scsi/scsi_host.h            |  2 --
 include/scsi/scsi_transport.h       |  2 ++
 5 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 3d6b364..7950516 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2082,7 +2082,7 @@ static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
 	return bounce_limit;
 }
 
-static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
+void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 {
 	struct device *dev = shost->dma_dev;
 
@@ -2117,28 +2117,17 @@ static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 	 */
 	blk_queue_dma_alignment(q, 0x03);
 }
-
-struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
-					 request_fn_proc *request_fn)
-{
-	struct request_queue *q;
-
-	q = blk_init_queue(request_fn, NULL);
-	if (!q)
-		return NULL;
-	__scsi_init_queue(shost, q);
-	return q;
-}
-EXPORT_SYMBOL(__scsi_alloc_queue);
+EXPORT_SYMBOL_GPL(__scsi_init_queue);
 
 struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
 {
 	struct request_queue *q;
 
-	q = __scsi_alloc_queue(sdev->host, scsi_request_fn);
+	q = blk_init_queue(scsi_request_fn, NULL);
 	if (!q)
 		return NULL;
 
+	__scsi_init_queue(sdev->host, q);
 	blk_queue_prep_rq(q, scsi_prep_fn);
 	blk_queue_unprep_rq(q, scsi_unprep_fn);
 	blk_queue_softirq_done(q, scsi_softirq_done);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 03577bd..afcedec 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3776,7 +3776,7 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
 	snprintf(bsg_name, sizeof(bsg_name),
 		 "fc_host%d", shost->host_no);
 
-	q = __scsi_alloc_queue(shost, bsg_request_fn);
+	q = blk_init_queue(bsg_request_fn, NULL);
 	if (!q) {
 		dev_err(dev,
 			"fc_host%d: bsg interface failed to initialize - no request queue\n",
@@ -3784,6 +3784,7 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
 		return -ENOMEM;
 	}
 
+	__scsi_init_queue(shost, q);
 	err = bsg_setup_queue(dev, q, bsg_name, fc_bsg_dispatch,
 				 i->f->dd_bsg_size);
 	if (err) {
@@ -3831,12 +3832,13 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
 	if (!i->f->bsg_request)
 		return -ENOTSUPP;
 
-	q = __scsi_alloc_queue(shost, bsg_request_fn);
+	q = blk_init_queue(bsg_request_fn, NULL);
 	if (!q) {
 		dev_err(dev, "bsg interface failed to initialize - no request queue\n");
 		return -ENOMEM;
 	}
 
+	__scsi_init_queue(shost, q);
 	err = bsg_setup_queue(dev, q, NULL, fc_bsg_dispatch, i->f->dd_bsg_size);
 	if (err) {
 		dev_err(dev, "failed to setup bsg queue\n");
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 42bca61..04ebe6e 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1544,10 +1544,11 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost)
 
 	snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no);
 
-	q = __scsi_alloc_queue(shost, bsg_request_fn);
+	q = blk_init_queue(bsg_request_fn, NULL);
 	if (!q)
 		return -ENOMEM;
 
+	__scsi_init_queue(shost, q);
 	ret = bsg_setup_queue(dev, q, bsg_name, iscsi_bsg_host_dispatch, 0);
 	if (ret) {
 		shost_printk(KERN_ERR, shost, "bsg interface failed to "
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 36680f1..f4964d7 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -826,8 +826,6 @@ extern void scsi_block_requests(struct Scsi_Host *);
 
 struct class_container;
 
-extern struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
-						void (*) (struct request_queue *));
 /*
  * These two functions are used to allocate and free a pseudo device
  * which will connect to the host adapter itself rather than any
diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h
index 8129239..b6e07b5 100644
--- a/include/scsi/scsi_transport.h
+++ b/include/scsi/scsi_transport.h
@@ -119,4 +119,6 @@ scsi_transport_device_data(struct scsi_device *sdev)
 		+ shost->transportt->device_private_offset;
 }
 
+void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q);
+
 #endif /* SCSI_TRANSPORT_H */
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 15/18] scsi: allocate scsi_cmnd structures as part of struct request
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (13 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 14/18] scsi: remove __scsi_alloc_queue Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:30   ` Martin K. Petersen
  2017-01-27 18:39     ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 16/18] block/bsg: move queue creation into bsg_setup_queue Christoph Hellwig
                   ` (5 subsequent siblings)
  20 siblings, 2 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

Rely on the new block layer functionality to allocate additional driver
specific data behind struct request instead of implementing it in SCSI
itѕelf.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/hosts.c      |  20 +--
 drivers/scsi/scsi.c       | 319 ----------------------------------------------
 drivers/scsi/scsi_error.c |  17 ++-
 drivers/scsi/scsi_lib.c   | 122 ++++++++++++------
 drivers/scsi/scsi_priv.h  |   8 +-
 include/scsi/scsi_host.h  |   3 -
 6 files changed, 95 insertions(+), 394 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 6d29c4a..831a1c8 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -230,19 +230,6 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 		}
 	}
 
-	/*
-	 * Note that we allocate the freelist even for the MQ case for now,
-	 * as we need a command set aside for scsi_reset_provider.  Having
-	 * the full host freelist and one command available for that is a
-	 * little heavy-handed, but avoids introducing a special allocator
-	 * just for this.  Eventually the structure of scsi_reset_provider
-	 * will need a major overhaul.
-	 */
-	error = scsi_setup_command_freelist(shost);
-	if (error)
-		goto out_destroy_tags;
-
-
 	if (!shost->shost_gendev.parent)
 		shost->shost_gendev.parent = dev ? dev : &platform_bus;
 	if (!dma_dev)
@@ -262,7 +249,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 
 	error = device_add(&shost->shost_gendev);
 	if (error)
-		goto out_destroy_freelist;
+		goto out_disable_runtime_pm;
 
 	scsi_host_set_state(shost, SHOST_RUNNING);
 	get_device(shost->shost_gendev.parent);
@@ -312,13 +299,11 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 	device_del(&shost->shost_dev);
  out_del_gendev:
 	device_del(&shost->shost_gendev);
- out_destroy_freelist:
+ out_disable_runtime_pm:
 	device_disable_async_suspend(&shost->shost_gendev);
 	pm_runtime_disable(&shost->shost_gendev);
 	pm_runtime_set_suspended(&shost->shost_gendev);
 	pm_runtime_put_noidle(&shost->shost_gendev);
-	scsi_destroy_command_freelist(shost);
- out_destroy_tags:
 	if (shost_use_blk_mq(shost))
 		scsi_mq_destroy_tags(shost);
  fail:
@@ -359,7 +344,6 @@ static void scsi_host_dev_release(struct device *dev)
 		kfree(dev_name(&shost->shost_dev));
 	}
 
-	scsi_destroy_command_freelist(shost);
 	if (shost_use_blk_mq(shost)) {
 		if (shost->tag_set.tags)
 			scsi_mq_destroy_tags(shost);
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 2e24f31..3d8d215 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -98,163 +98,6 @@ EXPORT_SYMBOL(scsi_sd_probe_domain);
 ASYNC_DOMAIN_EXCLUSIVE(scsi_sd_pm_domain);
 EXPORT_SYMBOL(scsi_sd_pm_domain);
 
-struct scsi_host_cmd_pool {
-	struct kmem_cache	*cmd_slab;
-	unsigned int		users;
-	char			*cmd_name;
-};
-
-static struct scsi_host_cmd_pool scsi_cmd_pool = {
-	.cmd_name	= "scsi_cmd_cache",
-};
-
-static DEFINE_MUTEX(host_cmd_pool_mutex);
-
-/**
- * scsi_host_free_command - internal function to release a command
- * @shost:	host to free the command for
- * @cmd:	command to release
- *
- * the command must previously have been allocated by
- * scsi_host_alloc_command.
- */
-static void
-scsi_host_free_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
-{
-	struct scsi_host_cmd_pool *pool = shost->cmd_pool;
-
-	if (cmd->prot_sdb)
-		kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb);
-	scsi_free_sense_buffer(shost, cmd->sense_buffer);
-	kmem_cache_free(pool->cmd_slab, cmd);
-}
-
-/**
- * scsi_host_alloc_command - internal function to allocate command
- * @shost:	SCSI host whose pool to allocate from
- * @gfp_mask:	mask for the allocation
- *
- * Returns a fully allocated command with sense buffer and protection
- * data buffer (where applicable) or NULL on failure
- */
-static struct scsi_cmnd *
-scsi_host_alloc_command(struct Scsi_Host *shost, gfp_t gfp_mask)
-{
-	struct scsi_host_cmd_pool *pool = shost->cmd_pool;
-	struct scsi_cmnd *cmd;
-
-	cmd = kmem_cache_zalloc(pool->cmd_slab, gfp_mask);
-	if (!cmd)
-		goto fail;
-
-	cmd->sense_buffer = scsi_alloc_sense_buffer(shost, gfp_mask,
-			NUMA_NO_NODE);
-	if (!cmd->sense_buffer)
-		goto fail_free_cmd;
-
-	if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) {
-		cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp_mask);
-		if (!cmd->prot_sdb)
-			goto fail_free_sense;
-	}
-
-	return cmd;
-
-fail_free_sense:
-	scsi_free_sense_buffer(shost, cmd->sense_buffer);
-fail_free_cmd:
-	kmem_cache_free(pool->cmd_slab, cmd);
-fail:
-	return NULL;
-}
-
-/**
- * __scsi_get_command - Allocate a struct scsi_cmnd
- * @shost: host to transmit command
- * @gfp_mask: allocation mask
- *
- * Description: allocate a struct scsi_cmd from host's slab, recycling from the
- *              host's free_list if necessary.
- */
-static struct scsi_cmnd *
-__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask)
-{
-	struct scsi_cmnd *cmd = scsi_host_alloc_command(shost, gfp_mask);
-
-	if (unlikely(!cmd)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&shost->free_list_lock, flags);
-		if (likely(!list_empty(&shost->free_list))) {
-			cmd = list_entry(shost->free_list.next,
-					 struct scsi_cmnd, list);
-			list_del_init(&cmd->list);
-		}
-		spin_unlock_irqrestore(&shost->free_list_lock, flags);
-
-		if (cmd) {
-			void *buf, *prot;
-
-			buf = cmd->sense_buffer;
-			prot = cmd->prot_sdb;
-
-			memset(cmd, 0, sizeof(*cmd));
-
-			cmd->sense_buffer = buf;
-			cmd->prot_sdb = prot;
-		}
-	}
-
-	return cmd;
-}
-
-/**
- * scsi_get_command - Allocate and setup a scsi command block
- * @dev: parent scsi device
- * @gfp_mask: allocator flags
- *
- * Returns:	The allocated scsi command structure.
- */
-struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask)
-{
-	struct scsi_cmnd *cmd = __scsi_get_command(dev->host, gfp_mask);
-	unsigned long flags;
-
-	if (unlikely(cmd == NULL))
-		return NULL;
-
-	cmd->device = dev;
-	INIT_LIST_HEAD(&cmd->list);
-	INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
-	spin_lock_irqsave(&dev->list_lock, flags);
-	list_add_tail(&cmd->list, &dev->cmd_list);
-	spin_unlock_irqrestore(&dev->list_lock, flags);
-	cmd->jiffies_at_alloc = jiffies;
-	return cmd;
-}
-
-/**
- * __scsi_put_command - Free a struct scsi_cmnd
- * @shost: dev->host
- * @cmd: Command to free
- */
-static void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
-{
-	unsigned long flags;
-
-	if (unlikely(list_empty(&shost->free_list))) {
-		spin_lock_irqsave(&shost->free_list_lock, flags);
-		if (list_empty(&shost->free_list)) {
-			list_add(&cmd->list, &shost->free_list);
-			cmd = NULL;
-		}
-		spin_unlock_irqrestore(&shost->free_list_lock, flags);
-	}
-
-	if (likely(cmd != NULL))
-		scsi_host_free_command(shost, cmd);
-}
-
 /**
  * scsi_put_command - Free a scsi command block
  * @cmd: command block to free
@@ -274,168 +117,6 @@ void scsi_put_command(struct scsi_cmnd *cmd)
 	spin_unlock_irqrestore(&cmd->device->list_lock, flags);
 
 	BUG_ON(delayed_work_pending(&cmd->abort_work));
-
-	__scsi_put_command(cmd->device->host, cmd);
-}
-
-static struct scsi_host_cmd_pool *
-scsi_find_host_cmd_pool(struct Scsi_Host *shost)
-{
-	if (shost->hostt->cmd_size)
-		return shost->hostt->cmd_pool;
-	return &scsi_cmd_pool;
-}
-
-static void
-scsi_free_host_cmd_pool(struct scsi_host_cmd_pool *pool)
-{
-	kfree(pool->cmd_name);
-	kfree(pool);
-}
-
-static struct scsi_host_cmd_pool *
-scsi_alloc_host_cmd_pool(struct Scsi_Host *shost)
-{
-	struct scsi_host_template *hostt = shost->hostt;
-	struct scsi_host_cmd_pool *pool;
-
-	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
-	if (!pool)
-		return NULL;
-
-	pool->cmd_name = kasprintf(GFP_KERNEL, "%s_cmd", hostt->proc_name);
-	if (!pool->cmd_name) {
-		scsi_free_host_cmd_pool(pool);
-		return NULL;
-	}
-
-	if (hostt->cmd_size)
-		hostt->cmd_pool = pool;
-
-	return pool;
-}
-
-static struct scsi_host_cmd_pool *
-scsi_get_host_cmd_pool(struct Scsi_Host *shost)
-{
-	struct scsi_host_template *hostt = shost->hostt;
-	struct scsi_host_cmd_pool *retval = NULL, *pool;
-	size_t cmd_size = sizeof(struct scsi_cmnd) + hostt->cmd_size;
-
-	/*
-	 * Select a command slab for this host and create it if not
-	 * yet existent.
-	 */
-	mutex_lock(&host_cmd_pool_mutex);
-	pool = scsi_find_host_cmd_pool(shost);
-	if (!pool) {
-		pool = scsi_alloc_host_cmd_pool(shost);
-		if (!pool)
-			goto out;
-	}
-
-	if (!pool->users) {
-		pool->cmd_slab = kmem_cache_create(pool->cmd_name, cmd_size, 0,
-						   SLAB_HWCACHE_ALIGN, NULL);
-		if (!pool->cmd_slab)
-			goto out_free_pool;
-	}
-
-	pool->users++;
-	retval = pool;
-out:
-	mutex_unlock(&host_cmd_pool_mutex);
-	return retval;
-
-out_free_pool:
-	if (hostt->cmd_size) {
-		scsi_free_host_cmd_pool(pool);
-		hostt->cmd_pool = NULL;
-	}
-	goto out;
-}
-
-static void scsi_put_host_cmd_pool(struct Scsi_Host *shost)
-{
-	struct scsi_host_template *hostt = shost->hostt;
-	struct scsi_host_cmd_pool *pool;
-
-	mutex_lock(&host_cmd_pool_mutex);
-	pool = scsi_find_host_cmd_pool(shost);
-
-	/*
-	 * This may happen if a driver has a mismatched get and put
-	 * of the command pool; the driver should be implicated in
-	 * the stack trace
-	 */
-	BUG_ON(pool->users == 0);
-
-	if (!--pool->users) {
-		kmem_cache_destroy(pool->cmd_slab);
-		if (hostt->cmd_size) {
-			scsi_free_host_cmd_pool(pool);
-			hostt->cmd_pool = NULL;
-		}
-	}
-	mutex_unlock(&host_cmd_pool_mutex);
-}
-
-/**
- * scsi_setup_command_freelist - Setup the command freelist for a scsi host.
- * @shost: host to allocate the freelist for.
- *
- * Description: The command freelist protects against system-wide out of memory
- * deadlock by preallocating one SCSI command structure for each host, so the
- * system can always write to a swap file on a device associated with that host.
- *
- * Returns:	Nothing.
- */
-int scsi_setup_command_freelist(struct Scsi_Host *shost)
-{
-	struct scsi_cmnd *cmd;
-
-	spin_lock_init(&shost->free_list_lock);
-	INIT_LIST_HEAD(&shost->free_list);
-
-	shost->cmd_pool = scsi_get_host_cmd_pool(shost);
-	if (!shost->cmd_pool)
-		return -ENOMEM;
-
-	/*
-	 * Get one backup command for this host.
-	 */
-	cmd = scsi_host_alloc_command(shost, GFP_KERNEL);
-	if (!cmd) {
-		scsi_put_host_cmd_pool(shost);
-		shost->cmd_pool = NULL;
-		return -ENOMEM;
-	}
-	list_add(&cmd->list, &shost->free_list);
-	return 0;
-}
-
-/**
- * scsi_destroy_command_freelist - Release the command freelist for a scsi host.
- * @shost: host whose freelist is going to be destroyed
- */
-void scsi_destroy_command_freelist(struct Scsi_Host *shost)
-{
-	/*
-	 * If cmd_pool is NULL the free list was not initialized, so
-	 * do not attempt to release resources.
-	 */
-	if (!shost->cmd_pool)
-		return;
-
-	while (!list_empty(&shost->free_list)) {
-		struct scsi_cmnd *cmd;
-
-		cmd = list_entry(shost->free_list.next, struct scsi_cmnd, list);
-		list_del_init(&cmd->list);
-		scsi_host_free_command(shost, cmd);
-	}
-	shost->cmd_pool = NULL;
-	scsi_put_host_cmd_pool(shost);
 }
 
 #ifdef CONFIG_SCSI_LOGGING
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 996e134..7c08460 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -2331,7 +2331,7 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg)
 {
 	struct scsi_cmnd *scmd;
 	struct Scsi_Host *shost = dev->host;
-	struct request req;
+	struct request *rq;
 	unsigned long flags;
 	int error = 0, rtn, val;
 
@@ -2346,14 +2346,16 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg)
 		return -EIO;
 
 	error = -EIO;
-	scmd = scsi_get_command(dev, GFP_KERNEL);
-	if (!scmd)
+	rq = kzalloc(sizeof(struct request) + sizeof(struct scsi_cmnd) +
+			shost->hostt->cmd_size, GFP_KERNEL);
+	if (!rq)
 		goto out_put_autopm_host;
+	blk_rq_init(NULL, rq);
 
-	blk_rq_init(NULL, &req);
-	scmd->request = &req;
-
-	scmd->cmnd = req.cmd;
+	scmd = (struct scsi_cmnd *)(rq + 1);
+	scsi_init_command(dev, scmd);
+	scmd->request = rq;
+	scmd->cmnd = rq->cmd;
 
 	scmd->scsi_done		= scsi_reset_provider_done_command;
 	memset(&scmd->sdb, 0, sizeof(scmd->sdb));
@@ -2413,6 +2415,7 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg)
 	scsi_run_host_queues(shost);
 
 	scsi_put_command(scmd);
+	kfree(rq);
 
 out_put_autopm_host:
 	scsi_autopm_put_host(shost);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7950516..81ff5ad 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -37,8 +37,7 @@
 #include "scsi_priv.h"
 #include "scsi_logging.h"
 
-
-struct kmem_cache *scsi_sdb_cache;
+static struct kmem_cache *scsi_sdb_cache;
 static struct kmem_cache *scsi_sense_cache;
 static struct kmem_cache *scsi_sense_isadma_cache;
 static DEFINE_MUTEX(scsi_sense_cache_mutex);
@@ -50,14 +49,14 @@ scsi_select_sense_cache(struct Scsi_Host *shost)
 		scsi_sense_isadma_cache : scsi_sense_cache;
 }
 
-void scsi_free_sense_buffer(struct Scsi_Host *shost,
+static void scsi_free_sense_buffer(struct Scsi_Host *shost,
 		unsigned char *sense_buffer)
 {
 	kmem_cache_free(scsi_select_sense_cache(shost), sense_buffer);
 }
 
-unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost, gfp_t gfp_mask,
-		int numa_node)
+static unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost,
+	gfp_t gfp_mask, int numa_node)
 {
 	return kmem_cache_alloc_node(scsi_select_sense_cache(shost), gfp_mask,
 			numa_node);
@@ -697,14 +696,13 @@ static bool scsi_end_request(struct request *req, int error,
 
 		if (bidi_bytes)
 			scsi_release_bidi_buffers(cmd);
+		scsi_release_buffers(cmd);
+		scsi_put_command(cmd);
 
 		spin_lock_irqsave(q->queue_lock, flags);
 		blk_finish_request(req, error);
 		spin_unlock_irqrestore(q->queue_lock, flags);
 
-		scsi_release_buffers(cmd);
-
-		scsi_put_command(cmd);
 		scsi_run_queue(q);
 	}
 
@@ -1161,34 +1159,22 @@ int scsi_init_io(struct scsi_cmnd *cmd)
 }
 EXPORT_SYMBOL(scsi_init_io);
 
-static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev,
-		struct request *req)
+void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
 {
-	struct scsi_cmnd *cmd;
-
-	if (!req->special) {
-		/* Bail if we can't get a reference to the device */
-		if (!get_device(&sdev->sdev_gendev))
-			return NULL;
-
-		cmd = scsi_get_command(sdev, GFP_ATOMIC);
-		if (unlikely(!cmd)) {
-			put_device(&sdev->sdev_gendev);
-			return NULL;
-		}
-		req->special = cmd;
-	} else {
-		cmd = req->special;
-	}
-
-	/* pull a tag out of the request if we have one */
-	cmd->tag = req->tag;
-	cmd->request = req;
+	void *buf = cmd->sense_buffer;
+	void *prot = cmd->prot_sdb;
+	unsigned long flags;
 
-	cmd->cmnd = req->cmd;
-	cmd->prot_op = SCSI_PROT_NORMAL;
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->device = dev;
+	cmd->sense_buffer = buf;
+	cmd->prot_sdb = prot;
+	INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
+	cmd->jiffies_at_alloc = jiffies;
 
-	return cmd;
+	spin_lock_irqsave(&dev->list_lock, flags);
+	list_add_tail(&cmd->list, &dev->cmd_list);
+	spin_unlock_irqrestore(&dev->list_lock, flags);
 }
 
 static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
@@ -1349,19 +1335,29 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret)
 static int scsi_prep_fn(struct request_queue *q, struct request *req)
 {
 	struct scsi_device *sdev = q->queuedata;
-	struct scsi_cmnd *cmd;
+	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
 	int ret;
 
 	ret = scsi_prep_state_check(sdev, req);
 	if (ret != BLKPREP_OK)
 		goto out;
 
-	cmd = scsi_get_cmd_from_req(sdev, req);
-	if (unlikely(!cmd)) {
-		ret = BLKPREP_DEFER;
-		goto out;
+	if (!req->special) {
+		/* Bail if we can't get a reference to the device */
+		if (unlikely(!get_device(&sdev->sdev_gendev))) {
+			ret = BLKPREP_DEFER;
+			goto out;
+		}
+
+		scsi_init_command(sdev, cmd);
+		req->special = cmd;
 	}
 
+	cmd->tag = req->tag;
+	cmd->request = req;
+	cmd->cmnd = req->cmd;
+	cmd->prot_op = SCSI_PROT_NORMAL;
+
 	ret = scsi_setup_cmnd(sdev, req);
 out:
 	return scsi_prep_return(q, req, ret);
@@ -2119,15 +2115,61 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(__scsi_init_queue);
 
+static int scsi_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
+{
+	struct Scsi_Host *shost = q->rq_alloc_data;
+	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+	memset(cmd, 0, sizeof(*cmd));
+
+	cmd->sense_buffer = scsi_alloc_sense_buffer(shost, gfp, NUMA_NO_NODE);
+	if (!cmd->sense_buffer)
+		goto fail;
+
+	if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) {
+		cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp);
+		if (!cmd->prot_sdb)
+			goto fail_free_sense;
+	}
+
+	return 0;
+
+fail_free_sense:
+	scsi_free_sense_buffer(shost, cmd->sense_buffer);
+fail:
+	return -ENOMEM;
+}
+
+static void scsi_exit_rq(struct request_queue *q, struct request *rq)
+{
+	struct Scsi_Host *shost = q->rq_alloc_data;
+	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+
+	if (cmd->prot_sdb)
+		kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb);
+	scsi_free_sense_buffer(shost, cmd->sense_buffer);
+}
+
 struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
 {
+	struct Scsi_Host *shost = sdev->host;
 	struct request_queue *q;
 
-	q = blk_init_queue(scsi_request_fn, NULL);
+	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
 	if (!q)
 		return NULL;
+	q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
+	q->rq_alloc_data = shost;
+	q->request_fn = scsi_request_fn;
+	q->init_rq_fn = scsi_init_rq;
+	q->exit_rq_fn = scsi_exit_rq;
+
+	if (blk_init_allocated_queue(q) < 0) {
+		blk_cleanup_queue(q);
+		return NULL;
+	}
 
-	__scsi_init_queue(sdev->host, q);
+	__scsi_init_queue(shost, q);
 	blk_queue_prep_rq(q, scsi_prep_fn);
 	blk_queue_unprep_rq(q, scsi_unprep_fn);
 	blk_queue_softirq_done(q, scsi_softirq_done);
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 1a712c6..99bfc98 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -30,13 +30,8 @@ extern void scsi_exit_hosts(void);
 
 /* scsi.c */
 extern bool scsi_use_blk_mq;
-void scsi_free_sense_buffer(struct Scsi_Host *shost,
-		unsigned char *sense_buffer);
-unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost, gfp_t gfp_mask,
-		int numa_node);
 int scsi_init_sense_cache(struct Scsi_Host *shost);
-extern int scsi_setup_command_freelist(struct Scsi_Host *shost);
-extern void scsi_destroy_command_freelist(struct Scsi_Host *shost);
+void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd);
 #ifdef CONFIG_SCSI_LOGGING
 void scsi_log_send(struct scsi_cmnd *cmd);
 void scsi_log_completion(struct scsi_cmnd *cmd, int disposition);
@@ -101,7 +96,6 @@ extern void scsi_exit_queue(void);
 extern void scsi_evt_thread(struct work_struct *work);
 struct request_queue;
 struct request;
-extern struct kmem_cache *scsi_sdb_cache;
 
 /* scsi_proc.c */
 #ifdef CONFIG_SCSI_PROC_FS
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index f4964d7..3cd8c3b 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -551,9 +551,6 @@ struct Scsi_Host {
 	struct list_head	__devices;
 	struct list_head	__targets;
 	
-	struct scsi_host_cmd_pool *cmd_pool;
-	spinlock_t		free_list_lock;
-	struct list_head	free_list; /* backup store of cmd structs */
 	struct list_head	starved_list;
 
 	spinlock_t		default_lock;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 16/18] block/bsg: move queue creation into bsg_setup_queue
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (14 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 15/18] scsi: allocate scsi_cmnd structures as part of struct request Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-27 18:48     ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 17/18] block: split scsi_request out of struct request Christoph Hellwig
                   ` (4 subsequent siblings)
  20 siblings, 1 reply; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

Simply the boilerplate code needed for bsg nodes a bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 block/bsg-lib.c                     | 21 +++++++++++----------
 drivers/scsi/scsi_transport_fc.c    | 36 ++++++++----------------------------
 drivers/scsi/scsi_transport_iscsi.c | 15 ++++-----------
 include/linux/bsg-lib.h             |  5 ++---
 4 files changed, 25 insertions(+), 52 deletions(-)

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 9d652a9..c74acf4 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -177,7 +177,7 @@ static int bsg_create_job(struct device *dev, struct request *req)
  *
  * Drivers/subsys should pass this to the queue init function.
  */
-void bsg_request_fn(struct request_queue *q)
+static void bsg_request_fn(struct request_queue *q)
 	__releases(q->queue_lock)
 	__acquires(q->queue_lock)
 {
@@ -214,24 +214,24 @@ void bsg_request_fn(struct request_queue *q)
 	put_device(dev);
 	spin_lock_irq(q->queue_lock);
 }
-EXPORT_SYMBOL_GPL(bsg_request_fn);
 
 /**
  * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
  * @dev: device to attach bsg device to
- * @q: request queue setup by caller
  * @name: device to give bsg device
  * @job_fn: bsg job handler
  * @dd_job_size: size of LLD data needed for each job
- *
- * The caller should have setup the reuqest queue with bsg_request_fn
- * as the request_fn.
  */
-int bsg_setup_queue(struct device *dev, struct request_queue *q,
-		    char *name, bsg_job_fn *job_fn, int dd_job_size)
+struct request_queue *bsg_setup_queue(struct device *dev, char *name,
+		bsg_job_fn *job_fn, int dd_job_size)
 {
+	struct request_queue *q;
 	int ret;
 
+	q = blk_init_queue(bsg_request_fn, NULL);
+	if (!q)
+		return ERR_PTR(-ENOMEM);
+
 	q->queuedata = dev;
 	q->bsg_job_size = dd_job_size;
 	q->bsg_job_fn = job_fn;
@@ -243,9 +243,10 @@ int bsg_setup_queue(struct device *dev, struct request_queue *q,
 	if (ret) {
 		printk(KERN_ERR "%s: bsg interface failed to "
 		       "initialize - register queue\n", dev->kobj.name);
-		return ret;
+		blk_cleanup_queue(q);
+		return ERR_PTR(ret);
 	}
 
-	return 0;
+	return q;
 }
 EXPORT_SYMBOL_GPL(bsg_setup_queue);
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index afcedec..13dcb9b 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3765,7 +3765,6 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
 	struct device *dev = &shost->shost_gendev;
 	struct fc_internal *i = to_fc_internal(shost->transportt);
 	struct request_queue *q;
-	int err;
 	char bsg_name[20];
 
 	fc_host->rqst_q = NULL;
@@ -3776,24 +3775,14 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
 	snprintf(bsg_name, sizeof(bsg_name),
 		 "fc_host%d", shost->host_no);
 
-	q = blk_init_queue(bsg_request_fn, NULL);
-	if (!q) {
-		dev_err(dev,
-			"fc_host%d: bsg interface failed to initialize - no request queue\n",
-			shost->host_no);
-		return -ENOMEM;
-	}
-
-	__scsi_init_queue(shost, q);
-	err = bsg_setup_queue(dev, q, bsg_name, fc_bsg_dispatch,
-				 i->f->dd_bsg_size);
-	if (err) {
+	q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size);
+	if (IS_ERR(q)) {
 		dev_err(dev,
 			"fc_host%d: bsg interface failed to initialize - setup queue\n",
 			shost->host_no);
-		blk_cleanup_queue(q);
-		return err;
+		return PTR_ERR(q);
 	}
+	__scsi_init_queue(shost, q);
 	blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
 	blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT);
 	fc_host->rqst_q = q;
@@ -3825,27 +3814,18 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
 	struct device *dev = &rport->dev;
 	struct fc_internal *i = to_fc_internal(shost->transportt);
 	struct request_queue *q;
-	int err;
 
 	rport->rqst_q = NULL;
 
 	if (!i->f->bsg_request)
 		return -ENOTSUPP;
 
-	q = blk_init_queue(bsg_request_fn, NULL);
-	if (!q) {
-		dev_err(dev, "bsg interface failed to initialize - no request queue\n");
-		return -ENOMEM;
-	}
-
-	__scsi_init_queue(shost, q);
-	err = bsg_setup_queue(dev, q, NULL, fc_bsg_dispatch, i->f->dd_bsg_size);
-	if (err) {
+	q = bsg_setup_queue(dev, NULL, fc_bsg_dispatch, i->f->dd_bsg_size);
+	if (IS_ERR(q)) {
 		dev_err(dev, "failed to setup bsg queue\n");
-		blk_cleanup_queue(q);
-		return err;
+		return PTR_ERR(q);
 	}
-
+	__scsi_init_queue(shost, q);
 	blk_queue_prep_rq(q, fc_bsg_rport_prep);
 	blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 04ebe6e..568c9f2 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1537,25 +1537,18 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost)
 	struct iscsi_internal *i = to_iscsi_internal(shost->transportt);
 	struct request_queue *q;
 	char bsg_name[20];
-	int ret;
 
 	if (!i->iscsi_transport->bsg_request)
 		return -ENOTSUPP;
 
 	snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no);
-
-	q = blk_init_queue(bsg_request_fn, NULL);
-	if (!q)
-		return -ENOMEM;
-
-	__scsi_init_queue(shost, q);
-	ret = bsg_setup_queue(dev, q, bsg_name, iscsi_bsg_host_dispatch, 0);
-	if (ret) {
+	q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0);
+	if (IS_ERR(q)) {
 		shost_printk(KERN_ERR, shost, "bsg interface failed to "
 			     "initialize - no request queue\n");
-		blk_cleanup_queue(q);
-		return ret;
+		return PTR_ERR(q);
 	}
+	__scsi_init_queue(shost, q);
 
 	ihost->bsg_q = q;
 	return 0;
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 657a718..e34dde2 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -66,9 +66,8 @@ struct bsg_job {
 
 void bsg_job_done(struct bsg_job *job, int result,
 		  unsigned int reply_payload_rcv_len);
-int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
-		    bsg_job_fn *job_fn, int dd_job_size);
-void bsg_request_fn(struct request_queue *q);
+struct request_queue *bsg_setup_queue(struct device *dev, char *name,
+		bsg_job_fn *job_fn, int dd_job_size);
 void bsg_job_put(struct bsg_job *job);
 int __must_check bsg_job_get(struct bsg_job *job);
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 17/18] block: split scsi_request out of struct request
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (15 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 16/18] block/bsg: move queue creation into bsg_setup_queue Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-25 17:25 ` [PATCH 18/18] block: don't assign cmd_flags in __blk_rq_prep_clone Christoph Hellwig
                   ` (3 subsequent siblings)
  20 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

And require all drivers that want to support BLOCK_PC to allocate it
as the first thing of their private data.  To support this the legacy
IDE and BSG code is switched to set cmd_size on their queues to let
the block layer allocate the additional space.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c                         | 31 -----------
 block/blk-exec.c                         | 12 -----
 block/blk-mq.c                           | 10 ----
 block/bsg-lib.c                          | 34 ++++++++----
 block/bsg.c                              | 47 ++++++++---------
 block/scsi_ioctl.c                       | 87 ++++++++++++++++++------------
 drivers/ata/libata-scsi.c                |  2 +-
 drivers/block/cciss.c                    | 28 +++++-----
 drivers/block/pktcdvd.c                  |  6 +--
 drivers/block/virtio_blk.c               | 11 ++--
 drivers/cdrom/cdrom.c                    | 32 +++++------
 drivers/ide/ide-atapi.c                  | 43 ++++++++-------
 drivers/ide/ide-cd.c                     | 91 +++++++++++++++-----------------
 drivers/ide/ide-cd_ioctl.c               |  1 +
 drivers/ide/ide-cd_verbose.c             |  6 +--
 drivers/ide/ide-devsets.c                |  9 ++--
 drivers/ide/ide-disk.c                   |  1 +
 drivers/ide/ide-eh.c                     |  2 +-
 drivers/ide/ide-floppy.c                 |  4 +-
 drivers/ide/ide-io.c                     |  3 +-
 drivers/ide/ide-ioctls.c                 |  6 ++-
 drivers/ide/ide-park.c                   | 12 +++--
 drivers/ide/ide-pm.c                     |  2 +
 drivers/ide/ide-probe.c                  | 36 +++++++++++--
 drivers/ide/ide-tape.c                   | 32 +++++------
 drivers/ide/ide-taskfile.c               |  1 +
 drivers/ide/sis5513.c                    |  2 +-
 drivers/message/fusion/mptsas.c          |  8 +--
 drivers/scsi/libfc/fc_lport.c            |  2 +-
 drivers/scsi/libsas/sas_expander.c       |  8 +--
 drivers/scsi/libsas/sas_host_smp.c       | 38 ++++++-------
 drivers/scsi/mpt3sas/mpt3sas_transport.c |  8 +--
 drivers/scsi/osd/osd_initiator.c         | 19 +++----
 drivers/scsi/osst.c                      | 15 +++---
 drivers/scsi/qla2xxx/qla_bsg.c           |  2 +-
 drivers/scsi/qla2xxx/qla_isr.c           |  6 ++-
 drivers/scsi/qla2xxx/qla_mr.c            |  2 +-
 drivers/scsi/scsi_error.c                | 22 ++++----
 drivers/scsi/scsi_lib.c                  | 48 +++++++++--------
 drivers/scsi/scsi_transport_sas.c        |  5 ++
 drivers/scsi/sd.c                        |  4 +-
 drivers/scsi/sg.c                        | 30 ++++++-----
 drivers/scsi/st.c                        | 22 ++++----
 drivers/target/target_core_pscsi.c       | 11 ++--
 fs/nfsd/blocklayout.c                    | 17 +++---
 include/linux/blkdev.h                   | 11 ----
 include/linux/blktrace_api.h             |  3 +-
 include/linux/ide.h                      |  8 ++-
 include/scsi/scsi_cmnd.h                 |  2 +
 include/scsi/scsi_request.h              | 28 ++++++++++
 kernel/trace/blktrace.c                  | 32 +++++------
 51 files changed, 483 insertions(+), 419 deletions(-)
 create mode 100644 include/scsi/scsi_request.h

diff --git a/block/blk-core.c b/block/blk-core.c
index 7de7164..33c5d05e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -132,8 +132,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	rq->__sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
-	rq->cmd = rq->__cmd;
-	rq->cmd_len = BLK_MAX_CDB;
 	rq->tag = -1;
 	rq->internal_tag = -1;
 	rq->start_time = jiffies;
@@ -160,8 +158,6 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 
 void blk_dump_rq_flags(struct request *rq, char *msg)
 {
-	int bit;
-
 	printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg,
 		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
 		(unsigned long long) rq->cmd_flags);
@@ -171,13 +167,6 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
 	printk(KERN_INFO "  bio %p, biotail %p, len %u\n",
 	       rq->bio, rq->biotail, blk_rq_bytes(rq));
-
-	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
-		printk(KERN_INFO "  cdb: ");
-		for (bit = 0; bit < BLK_MAX_CDB; bit++)
-			printk("%02x ", rq->cmd[bit]);
-		printk("\n");
-	}
 }
 EXPORT_SYMBOL(blk_dump_rq_flags);
 
@@ -1316,18 +1305,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 EXPORT_SYMBOL(blk_get_request);
 
 /**
- * blk_rq_set_block_pc - initialize a request to type BLOCK_PC
- * @rq:		request to be initialized
- *
- */
-void blk_rq_set_block_pc(struct request *rq)
-{
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
-	memset(rq->__cmd, 0, sizeof(rq->__cmd));
-}
-EXPORT_SYMBOL(blk_rq_set_block_pc);
-
-/**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
@@ -2459,14 +2436,6 @@ void blk_start_request(struct request *req)
 		wbt_issue(req->q->rq_wb, &req->issue_stat);
 	}
 
-	/*
-	 * We are now handing the request to the hardware, initialize
-	 * resid_len to full count and add the timeout handler.
-	 */
-	req->resid_len = blk_rq_bytes(req);
-	if (unlikely(blk_bidi_rq(req)))
-		req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
-
 	BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
 	blk_add_timer(req);
 }
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 86656fd..8dd4a6d 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -101,16 +101,9 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
 		   struct request *rq, int at_head)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
-	char sense[SCSI_SENSE_BUFFERSIZE];
 	int err = 0;
 	unsigned long hang_check;
 
-	if (!rq->sense) {
-		memset(sense, 0, sizeof(sense));
-		rq->sense = sense;
-		rq->sense_len = 0;
-	}
-
 	rq->end_io_data = &wait;
 	blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
 
@@ -124,11 +117,6 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
 	if (rq->errors)
 		err = -EIO;
 
-	if (rq->sense == sense)	{
-		rq->sense = NULL;
-		rq->sense_len = 0;
-	}
-
 	return err;
 }
 EXPORT_SYMBOL(blk_execute_rq);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e229f8a..8364086 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -199,13 +199,7 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 	rq->special = NULL;
 	/* tag was already set */
 	rq->errors = 0;
-
-	rq->cmd = rq->__cmd;
-
 	rq->extra_len = 0;
-	rq->sense_len = 0;
-	rq->resid_len = 0;
-	rq->sense = NULL;
 
 	INIT_LIST_HEAD(&rq->timeout_list);
 	rq->timeout = 0;
@@ -487,10 +481,6 @@ void blk_mq_start_request(struct request *rq)
 
 	trace_block_rq_issue(q, rq);
 
-	rq->resid_len = blk_rq_bytes(rq);
-	if (unlikely(blk_bidi_rq(rq)))
-		rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
-
 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
 		blk_stat_set_issue_time(&rq->issue_stat);
 		rq->rq_flags |= RQF_STATS;
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index c74acf4..cd15f9d 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -71,22 +71,24 @@ void bsg_job_done(struct bsg_job *job, int result,
 {
 	struct request *req = job->req;
 	struct request *rsp = req->next_rq;
+	struct scsi_request *rq = scsi_req(req);
 	int err;
 
 	err = job->req->errors = result;
 	if (err < 0)
 		/* we're only returning the result field in the reply */
-		job->req->sense_len = sizeof(u32);
+		rq->sense_len = sizeof(u32);
 	else
-		job->req->sense_len = job->reply_len;
+		rq->sense_len = job->reply_len;
 	/* we assume all request payload was transferred, residual == 0 */
-	req->resid_len = 0;
+	rq->resid_len = 0;
 
 	if (rsp) {
-		WARN_ON(reply_payload_rcv_len > rsp->resid_len);
+		WARN_ON(reply_payload_rcv_len > scsi_req(rsp)->resid_len);
 
 		/* set reply (bidi) residual */
-		rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
+		scsi_req(rsp)->resid_len -=
+			min(reply_payload_rcv_len, scsi_req(rsp)->resid_len);
 	}
 	blk_complete_request(req);
 }
@@ -113,6 +115,7 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
 	if (!buf->sg_list)
 		return -ENOMEM;
 	sg_init_table(buf->sg_list, req->nr_phys_segments);
+	scsi_req(req)->resid_len = blk_rq_bytes(req);
 	buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
 	buf->payload_len = blk_rq_bytes(req);
 	return 0;
@@ -127,6 +130,7 @@ static int bsg_create_job(struct device *dev, struct request *req)
 {
 	struct request *rsp = req->next_rq;
 	struct request_queue *q = req->q;
+	struct scsi_request *rq = scsi_req(req);
 	struct bsg_job *job;
 	int ret;
 
@@ -140,9 +144,9 @@ static int bsg_create_job(struct device *dev, struct request *req)
 	job->req = req;
 	if (q->bsg_job_size)
 		job->dd_data = (void *)&job[1];
-	job->request = req->cmd;
-	job->request_len = req->cmd_len;
-	job->reply = req->sense;
+	job->request = rq->cmd;
+	job->request_len = rq->cmd_len;
+	job->reply = rq->sense;
 	job->reply_len = SCSI_SENSE_BUFFERSIZE;	/* Size of sense buffer
 						 * allocated */
 	if (req->bio) {
@@ -228,9 +232,15 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
 	struct request_queue *q;
 	int ret;
 
-	q = blk_init_queue(bsg_request_fn, NULL);
+	q = blk_alloc_queue(GFP_KERNEL);
 	if (!q)
 		return ERR_PTR(-ENOMEM);
+	q->cmd_size = sizeof(struct scsi_request);
+	q->request_fn = bsg_request_fn;
+
+	ret = blk_init_allocated_queue(q);
+	if (ret)
+		goto out_cleanup_queue;
 
 	q->queuedata = dev;
 	q->bsg_job_size = dd_job_size;
@@ -243,10 +253,12 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
 	if (ret) {
 		printk(KERN_ERR "%s: bsg interface failed to "
 		       "initialize - register queue\n", dev->kobj.name);
-		blk_cleanup_queue(q);
-		return ERR_PTR(ret);
+		goto out_cleanup_queue;
 	}
 
 	return q;
+out_cleanup_queue:
+	blk_cleanup_queue(q);
+	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(bsg_setup_queue);
diff --git a/block/bsg.c b/block/bsg.c
index a57046d..e34c332 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -85,7 +85,6 @@ struct bsg_command {
 	struct bio *bidi_bio;
 	int err;
 	struct sg_io_v4 hdr;
-	char sense[SCSI_SENSE_BUFFERSIZE];
 };
 
 static void bsg_free_command(struct bsg_command *bc)
@@ -140,18 +139,20 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
 				struct sg_io_v4 *hdr, struct bsg_device *bd,
 				fmode_t has_write_perm)
 {
+	struct scsi_request *req = scsi_req(rq);
+
 	if (hdr->request_len > BLK_MAX_CDB) {
-		rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
-		if (!rq->cmd)
+		req->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
+		if (!req->cmd)
 			return -ENOMEM;
 	}
 
-	if (copy_from_user(rq->cmd, (void __user *)(unsigned long)hdr->request,
+	if (copy_from_user(req->cmd, (void __user *)(unsigned long)hdr->request,
 			   hdr->request_len))
 		return -EFAULT;
 
 	if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
-		if (blk_verify_command(rq->cmd, has_write_perm))
+		if (blk_verify_command(req->cmd, has_write_perm))
 			return -EPERM;
 	} else if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
@@ -159,7 +160,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
 	/*
 	 * fill in request structure
 	 */
-	rq->cmd_len = hdr->request_len;
+	req->cmd_len = hdr->request_len;
 
 	rq->timeout = msecs_to_jiffies(hdr->timeout);
 	if (!rq->timeout)
@@ -205,8 +206,7 @@ bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *rw)
  * map sg_io_v4 to a request.
  */
 static struct request *
-bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
-	    u8 *sense)
+bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
 {
 	struct request_queue *q = bd->queue;
 	struct request *rq, *next_rq = NULL;
@@ -236,7 +236,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
 	rq = blk_get_request(q, rw, GFP_KERNEL);
 	if (IS_ERR(rq))
 		return rq;
-	blk_rq_set_block_pc(rq);
+	scsi_req_init(rq);
 
 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
 	if (ret)
@@ -280,13 +280,9 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
 			goto out;
 	}
 
-	rq->sense = sense;
-	rq->sense_len = 0;
-
 	return rq;
 out:
-	if (rq->cmd != rq->__cmd)
-		kfree(rq->cmd);
+	scsi_req_free_cmd(scsi_req(rq));
 	blk_put_request(rq);
 	if (next_rq) {
 		blk_rq_unmap_user(next_rq->bio);
@@ -393,6 +389,7 @@ static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd)
 static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 				    struct bio *bio, struct bio *bidi_bio)
 {
+	struct scsi_request *req = scsi_req(rq);
 	int ret = 0;
 
 	dprintk("rq %p bio %p 0x%x\n", rq, bio, rq->errors);
@@ -407,12 +404,12 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 		hdr->info |= SG_INFO_CHECK;
 	hdr->response_len = 0;
 
-	if (rq->sense_len && hdr->response) {
+	if (req->sense_len && hdr->response) {
 		int len = min_t(unsigned int, hdr->max_response_len,
-					rq->sense_len);
+					req->sense_len);
 
 		ret = copy_to_user((void __user *)(unsigned long)hdr->response,
-				   rq->sense, len);
+				   req->sense, len);
 		if (!ret)
 			hdr->response_len = len;
 		else
@@ -420,14 +417,14 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 	}
 
 	if (rq->next_rq) {
-		hdr->dout_resid = rq->resid_len;
-		hdr->din_resid = rq->next_rq->resid_len;
+		hdr->dout_resid = req->resid_len;
+		hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
 		blk_rq_unmap_user(bidi_bio);
 		blk_put_request(rq->next_rq);
 	} else if (rq_data_dir(rq) == READ)
-		hdr->din_resid = rq->resid_len;
+		hdr->din_resid = req->resid_len;
 	else
-		hdr->dout_resid = rq->resid_len;
+		hdr->dout_resid = req->resid_len;
 
 	/*
 	 * If the request generated a negative error number, return it
@@ -439,8 +436,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 		ret = rq->errors;
 
 	blk_rq_unmap_user(bio);
-	if (rq->cmd != rq->__cmd)
-		kfree(rq->cmd);
+	scsi_req_free_cmd(req);
 	blk_put_request(rq);
 
 	return ret;
@@ -625,7 +621,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf,
 		/*
 		 * get a request, fill in the blanks, and add to request queue
 		 */
-		rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm, bc->sense);
+		rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm);
 		if (IS_ERR(rq)) {
 			ret = PTR_ERR(rq);
 			rq = NULL;
@@ -911,12 +907,11 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		struct bio *bio, *bidi_bio = NULL;
 		struct sg_io_v4 hdr;
 		int at_head;
-		u8 sense[SCSI_SENSE_BUFFERSIZE];
 
 		if (copy_from_user(&hdr, uarg, sizeof(hdr)))
 			return -EFAULT;
 
-		rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE, sense);
+		rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE);
 		if (IS_ERR(rq))
 			return PTR_ERR(rq);
 
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index c2b6492..e542144 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -230,15 +230,17 @@ EXPORT_SYMBOL(blk_verify_command);
 static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
 			     struct sg_io_hdr *hdr, fmode_t mode)
 {
-	if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
+	struct scsi_request *req = scsi_req(rq);
+
+	if (copy_from_user(req->cmd, hdr->cmdp, hdr->cmd_len))
 		return -EFAULT;
-	if (blk_verify_command(rq->cmd, mode & FMODE_WRITE))
+	if (blk_verify_command(req->cmd, mode & FMODE_WRITE))
 		return -EPERM;
 
 	/*
 	 * fill in request structure
 	 */
-	rq->cmd_len = hdr->cmd_len;
+	req->cmd_len = hdr->cmd_len;
 
 	rq->timeout = msecs_to_jiffies(hdr->timeout);
 	if (!rq->timeout)
@@ -254,6 +256,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
 static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
 				 struct bio *bio)
 {
+	struct scsi_request *req = scsi_req(rq);
 	int r, ret = 0;
 
 	/*
@@ -267,13 +270,13 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
 	hdr->info = 0;
 	if (hdr->masked_status || hdr->host_status || hdr->driver_status)
 		hdr->info |= SG_INFO_CHECK;
-	hdr->resid = rq->resid_len;
+	hdr->resid = req->resid_len;
 	hdr->sb_len_wr = 0;
 
-	if (rq->sense_len && hdr->sbp) {
-		int len = min((unsigned int) hdr->mx_sb_len, rq->sense_len);
+	if (req->sense_len && hdr->sbp) {
+		int len = min((unsigned int) hdr->mx_sb_len, req->sense_len);
 
-		if (!copy_to_user(hdr->sbp, rq->sense, len))
+		if (!copy_to_user(hdr->sbp, req->sense, len))
 			hdr->sb_len_wr = len;
 		else
 			ret = -EFAULT;
@@ -294,7 +297,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	int writing = 0;
 	int at_head = 0;
 	struct request *rq;
-	char sense[SCSI_SENSE_BUFFERSIZE];
+	struct scsi_request *req;
 	struct bio *bio;
 
 	if (hdr->interface_id != 'S')
@@ -321,11 +324,12 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
-	blk_rq_set_block_pc(rq);
+	req = scsi_req(rq);
+	scsi_req_init(rq);
 
 	if (hdr->cmd_len > BLK_MAX_CDB) {
-		rq->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
-		if (!rq->cmd)
+		req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
+		if (!req->cmd)
 			goto out_put_request;
 	}
 
@@ -357,9 +361,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 		goto out_free_cdb;
 
 	bio = rq->bio;
-	memset(sense, 0, sizeof(sense));
-	rq->sense = sense;
-	rq->sense_len = 0;
 	rq->retries = 0;
 
 	start_time = jiffies;
@@ -375,8 +376,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	ret = blk_complete_sghdr_rq(rq, hdr, bio);
 
 out_free_cdb:
-	if (rq->cmd != rq->__cmd)
-		kfree(rq->cmd);
+	scsi_req_free_cmd(req);
 out_put_request:
 	blk_put_request(rq);
 	return ret;
@@ -420,9 +420,10 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 		struct scsi_ioctl_command __user *sic)
 {
 	struct request *rq;
+	struct scsi_request *req;
 	int err;
 	unsigned int in_len, out_len, bytes, opcode, cmdlen;
-	char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE];
+	char *buffer = NULL;
 
 	if (!sic)
 		return -EINVAL;
@@ -452,7 +453,8 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 		err = PTR_ERR(rq);
 		goto error_free_buffer;
 	}
-	blk_rq_set_block_pc(rq);
+	req = scsi_req(rq);
+	scsi_req_init(rq);
 
 	cmdlen = COMMAND_SIZE(opcode);
 
@@ -460,14 +462,14 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 	 * get command and data to send to device, if any
 	 */
 	err = -EFAULT;
-	rq->cmd_len = cmdlen;
-	if (copy_from_user(rq->cmd, sic->data, cmdlen))
+	req->cmd_len = cmdlen;
+	if (copy_from_user(req->cmd, sic->data, cmdlen))
 		goto error;
 
 	if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
 		goto error;
 
-	err = blk_verify_command(rq->cmd, mode & FMODE_WRITE);
+	err = blk_verify_command(req->cmd, mode & FMODE_WRITE);
 	if (err)
 		goto error;
 
@@ -503,18 +505,14 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 		goto error;
 	}
 
-	memset(sense, 0, sizeof(sense));
-	rq->sense = sense;
-	rq->sense_len = 0;
-
 	blk_execute_rq(q, disk, rq, 0);
 
 	err = rq->errors & 0xff;	/* only 8 bit SCSI status */
 	if (err) {
-		if (rq->sense_len && rq->sense) {
-			bytes = (OMAX_SB_LEN > rq->sense_len) ?
-				rq->sense_len : OMAX_SB_LEN;
-			if (copy_to_user(sic->data, rq->sense, bytes))
+		if (req->sense_len && req->sense) {
+			bytes = (OMAX_SB_LEN > req->sense_len) ?
+				req->sense_len : OMAX_SB_LEN;
+			if (copy_to_user(sic->data, req->sense, bytes))
 				err = -EFAULT;
 		}
 	} else {
@@ -542,11 +540,11 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 	rq = blk_get_request(q, WRITE, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
-	blk_rq_set_block_pc(rq);
+	scsi_req_init(rq);
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
-	rq->cmd[0] = cmd;
-	rq->cmd[4] = data;
-	rq->cmd_len = 6;
+	scsi_req(rq)->cmd[0] = cmd;
+	scsi_req(rq)->cmd[4] = data;
+	scsi_req(rq)->cmd_len = 6;
 	err = blk_execute_rq(q, bd_disk, rq, 0);
 	blk_put_request(rq);
 
@@ -743,6 +741,29 @@ int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
 }
 EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
 
+/**
+ * scsi_req_init - initialize a request to type BLOCK_PC
+ * @rq:		request to be initialized
+ *
+ */
+void scsi_req_init(struct request *rq)
+{
+	struct scsi_request *req = scsi_req(rq);
+
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	memset(req->__cmd, 0, sizeof(req->__cmd));
+	req->cmd = req->__cmd;
+	req->cmd_len = BLK_MAX_CDB;
+	req->sense_len = 0;
+}
+EXPORT_SYMBOL(scsi_req_init);
+
+int scsi_cmd_buf_len(struct request *rq)
+{
+	return scsi_req(rq)->cmd_len * 3;
+}
+EXPORT_SYMBOL(scsi_cmd_buf_len);
+
 static int __init blk_scsi_ioctl_init(void)
 {
 	blk_set_cmd_filter_defaults(&blk_default_cmd_filter);
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 1f863e7..6abd739 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1271,7 +1271,7 @@ static int atapi_drain_needed(struct request *rq)
 	if (!blk_rq_bytes(rq) || op_is_write(req_op(rq)))
 		return 0;
 
-	return atapi_cmd_type(rq->cmd[0]) == ATAPI_MISC;
+	return atapi_cmd_type(scsi_req(rq)->cmd[0]) == ATAPI_MISC;
 }
 
 static int ata_scsi_dev_config(struct scsi_device *sdev,
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index e5c5b8e..b93bb73 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -52,6 +52,7 @@
 #include <scsi/scsi.h>
 #include <scsi/sg.h>
 #include <scsi/scsi_ioctl.h>
+#include <scsi/scsi_request.h>
 #include <linux/cdrom.h>
 #include <linux/scatterlist.h>
 #include <linux/kthread.h>
@@ -1854,7 +1855,7 @@ static void cciss_softirq_done(struct request *rq)
 
 	/* set the residual count for pc requests */
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
-		rq->resid_len = c->err_info->ResidualCnt;
+		scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
 
 	blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
 
@@ -1941,9 +1942,16 @@ static void cciss_get_serial_no(ctlr_info_t *h, int logvol,
 static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 				int drv_index)
 {
-	disk->queue = blk_init_queue(do_cciss_request, &h->lock);
+	disk->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!disk->queue)
 		goto init_queue_failure;
+
+	disk->queue->cmd_size = sizeof(struct scsi_request);
+	disk->queue->request_fn = do_cciss_request;
+	disk->queue->queue_lock = &h->lock;
+	if (blk_init_allocated_queue(disk->queue) < 0)
+		goto cleanup_queue;
+
 	sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index);
 	disk->major = h->major;
 	disk->first_minor = drv_index << NWD_SHIFT;
@@ -3111,15 +3119,7 @@ static inline int evaluate_target_status(ctlr_info_t *h,
 		return error_value;
 	}
 
-	/* SG_IO or similar, copy sense data back */
-	if (cmd->rq->sense) {
-		if (cmd->rq->sense_len > cmd->err_info->SenseLen)
-			cmd->rq->sense_len = cmd->err_info->SenseLen;
-		memcpy(cmd->rq->sense, cmd->err_info->SenseInfo,
-			cmd->rq->sense_len);
-	} else
-		cmd->rq->sense_len = 0;
-
+	scsi_req(cmd->rq)->sense_len = cmd->err_info->SenseLen;
 	return error_value;
 }
 
@@ -3150,7 +3150,6 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 			dev_warn(&h->pdev->dev, "cmd %p has"
 			       " completed with data underrun "
 			       "reported\n", cmd);
-			cmd->rq->resid_len = cmd->err_info->ResidualCnt;
 		}
 		break;
 	case CMD_DATA_OVERRUN:
@@ -3426,8 +3425,9 @@ static void do_cciss_request(struct request_queue *q)
 			c->Request.CDB[14] = c->Request.CDB[15] = 0;
 		}
 	} else if (creq->cmd_type == REQ_TYPE_BLOCK_PC) {
-		c->Request.CDBLen = creq->cmd_len;
-		memcpy(c->Request.CDB, creq->cmd, BLK_MAX_CDB);
+		c->Request.CDBLen = scsi_req(creq)->cmd_len;
+		memcpy(c->Request.CDB, scsi_req(creq)->cmd, BLK_MAX_CDB);
+		scsi_req(creq)->sense = c->err_info->SenseInfo;
 	} else {
 		dev_warn(&h->pdev->dev, "bad request type %d\n",
 			creq->cmd_type);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 1b94c1c..918a92c 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -707,7 +707,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 			     WRITE : READ, __GFP_RECLAIM);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
-	blk_rq_set_block_pc(rq);
+	scsi_req_init(rq);
 
 	if (cgc->buflen) {
 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -716,8 +716,8 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 			goto out;
 	}
 
-	rq->cmd_len = COMMAND_SIZE(cgc->cmd[0]);
-	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
+	scsi_req(rq)->cmd_len = COMMAND_SIZE(cgc->cmd[0]);
+	memcpy(scsi_req(rq)->cmd, cgc->cmd, CDROM_PACKET_SIZE);
 
 	rq->timeout = 60*HZ;
 	if (cgc->quiet)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 10332c2..3027d2e 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -52,6 +52,7 @@ struct virtio_blk {
 };
 
 struct virtblk_req {
+	struct scsi_request sreq;	/* for SCSI passthrough */
 	struct request *req;
 	struct virtio_blk_outhdr out_hdr;
 	struct virtio_scsi_inhdr in_hdr;
@@ -91,7 +92,7 @@ static int __virtblk_add_req(struct virtqueue *vq,
 	 * inhdr with additional status information.
 	 */
 	if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
-		sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len);
+		sg_init_one(&cmd, vbr->sreq.cmd, vbr->sreq.cmd_len);
 		sgs[num_out++] = &cmd;
 	}
 
@@ -103,7 +104,6 @@ static int __virtblk_add_req(struct virtqueue *vq,
 	}
 
 	if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
-		memcpy(vbr->sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
 		sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE);
 		sgs[num_out + num_in++] = &sense;
 		sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
@@ -123,8 +123,10 @@ static inline void virtblk_request_done(struct request *req)
 	int error = virtblk_result(vbr);
 
 	if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
-		req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
-		req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
+		scsi_req(req)->resid_len =
+			virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
+		vbr->sreq.sense_len =
+			virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
 		req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
 	} else if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
 		req->errors = (error != 0);
@@ -538,6 +540,7 @@ static int virtblk_init_request(void *data, struct request *rq,
 	struct virtio_blk *vblk = data;
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
 
+	vbr->sreq.sense = vbr->sense;
 	sg_init_table(vbr->sg, vblk->sg_elems);
 	return 0;
 }
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 59cca72..36f5237 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -281,8 +281,8 @@
 #include <linux/fcntl.h>
 #include <linux/blkdev.h>
 #include <linux/times.h>
-
 #include <linux/uaccess.h>
+#include <scsi/scsi_request.h>
 
 /* used to tell the module to turn on full debugging messages */
 static bool debug;
@@ -2172,6 +2172,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 {
 	struct request_queue *q = cdi->disk->queue;
 	struct request *rq;
+	struct scsi_request *req;
 	struct bio *bio;
 	unsigned int len;
 	int nr, ret = 0;
@@ -2195,7 +2196,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 			ret = PTR_ERR(rq);
 			break;
 		}
-		blk_rq_set_block_pc(rq);
+		req = scsi_req(rq);
+		scsi_req_init(rq);
 
 		ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
 		if (ret) {
@@ -2203,23 +2205,23 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 			break;
 		}
 
-		rq->cmd[0] = GPCMD_READ_CD;
-		rq->cmd[1] = 1 << 2;
-		rq->cmd[2] = (lba >> 24) & 0xff;
-		rq->cmd[3] = (lba >> 16) & 0xff;
-		rq->cmd[4] = (lba >>  8) & 0xff;
-		rq->cmd[5] = lba & 0xff;
-		rq->cmd[6] = (nr >> 16) & 0xff;
-		rq->cmd[7] = (nr >>  8) & 0xff;
-		rq->cmd[8] = nr & 0xff;
-		rq->cmd[9] = 0xf8;
-
-		rq->cmd_len = 12;
+		req->cmd[0] = GPCMD_READ_CD;
+		req->cmd[1] = 1 << 2;
+		req->cmd[2] = (lba >> 24) & 0xff;
+		req->cmd[3] = (lba >> 16) & 0xff;
+		req->cmd[4] = (lba >>  8) & 0xff;
+		req->cmd[5] = lba & 0xff;
+		req->cmd[6] = (nr >> 16) & 0xff;
+		req->cmd[7] = (nr >>  8) & 0xff;
+		req->cmd[8] = nr & 0xff;
+		req->cmd[9] = 0xf8;
+
+		req->cmd_len = 12;
 		rq->timeout = 60 * HZ;
 		bio = rq->bio;
 
 		if (blk_execute_rq(q, cdi->disk, rq, 0)) {
-			struct request_sense *s = rq->sense;
+			struct request_sense *s = req->sense;
 			ret = -EIO;
 			cdi->last_sense = s->sense_key;
 		}
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index f90ea22..7c826ec 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -93,6 +93,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 	int error;
 
 	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
+	scsi_req_init(rq);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->special = (char *)pc;
 
@@ -103,9 +104,9 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 			goto put_req;
 	}
 
-	memcpy(rq->cmd, pc->c, 12);
+	memcpy(scsi_req(rq)->cmd, pc->c, 12);
 	if (drive->media == ide_tape)
-		rq->cmd[13] = REQ_IDETAPE_PC1;
+		scsi_req(rq)->cmd[13] = REQ_IDETAPE_PC1;
 	error = blk_execute_rq(drive->queue, disk, rq, 0);
 put_req:
 	blk_put_request(rq);
@@ -171,7 +172,8 @@ EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
 void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 {
 	struct request_sense *sense = &drive->sense_data;
-	struct request *sense_rq = &drive->sense_rq;
+	struct request *sense_rq = drive->sense_rq;
+	struct scsi_request *req = scsi_req(sense_rq);
 	unsigned int cmd_len, sense_len;
 	int err;
 
@@ -197,6 +199,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	memset(sense, 0, sizeof(*sense));
 
 	blk_rq_init(rq->q, sense_rq);
+	scsi_req_init(sense_rq);
 
 	err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
 			      GFP_NOIO);
@@ -208,13 +211,13 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
 	}
 
 	sense_rq->rq_disk = rq->rq_disk;
-	sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
-	sense_rq->cmd[4] = cmd_len;
 	sense_rq->cmd_type = REQ_TYPE_ATA_SENSE;
 	sense_rq->rq_flags |= RQF_PREEMPT;
 
+	req->cmd[0] = GPCMD_REQUEST_SENSE;
+	req->cmd[4] = cmd_len;
 	if (drive->media == ide_tape)
-		sense_rq->cmd[13] = REQ_IDETAPE_PC1;
+		req->cmd[13] = REQ_IDETAPE_PC1;
 
 	drive->sense_rq_armed = true;
 }
@@ -229,12 +232,12 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special)
 		return -ENOMEM;
 	}
 
-	drive->sense_rq.special = special;
+	drive->sense_rq->special = special;
 	drive->sense_rq_armed = false;
 
 	drive->hwif->rq = NULL;
 
-	elv_add_request(drive->queue, &drive->sense_rq, ELEVATOR_INSERT_FRONT);
+	elv_add_request(drive->queue, drive->sense_rq, ELEVATOR_INSERT_FRONT);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
@@ -247,14 +250,14 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
 void ide_retry_pc(ide_drive_t *drive)
 {
 	struct request *failed_rq = drive->hwif->rq;
-	struct request *sense_rq = &drive->sense_rq;
+	struct request *sense_rq = drive->sense_rq;
 	struct ide_atapi_pc *pc = &drive->request_sense_pc;
 
 	(void)ide_read_error(drive);
 
 	/* init pc from sense_rq */
 	ide_init_pc(pc);
-	memcpy(pc->c, sense_rq->cmd, 12);
+	memcpy(pc->c, scsi_req(sense_rq)->cmd, 12);
 
 	if (drive->media == ide_tape)
 		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
@@ -286,7 +289,7 @@ int ide_cd_expiry(ide_drive_t *drive)
 	 * commands/drives support that. Let ide_timer_expiry keep polling us
 	 * for these.
 	 */
-	switch (rq->cmd[0]) {
+	switch (scsi_req(rq)->cmd[0]) {
 	case GPCMD_BLANK:
 	case GPCMD_FORMAT_UNIT:
 	case GPCMD_RESERVE_RZONE_TRACK:
@@ -297,7 +300,7 @@ int ide_cd_expiry(ide_drive_t *drive)
 	default:
 		if (!(rq->rq_flags & RQF_QUIET))
 			printk(KERN_INFO PFX "cmd 0x%x timed out\n",
-					 rq->cmd[0]);
+					 scsi_req(rq)->cmd[0]);
 		wait = 0;
 		break;
 	}
@@ -420,7 +423,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 						     ? "write" : "read");
 			pc->flags |= PC_FLAG_DMA_ERROR;
 		} else
-			rq->resid_len = 0;
+			scsi_req(rq)->resid_len = 0;
 		debug_log("%s: DMA finished\n", drive->name);
 	}
 
@@ -436,7 +439,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		local_irq_enable_in_hardirq();
 
 		if (drive->media == ide_tape &&
-		    (stat & ATA_ERR) && rq->cmd[0] == REQUEST_SENSE)
+		    (stat & ATA_ERR) && scsi_req(rq)->cmd[0] == REQUEST_SENSE)
 			stat &= ~ATA_ERR;
 
 		if ((stat & ATA_ERR) || (pc->flags & PC_FLAG_DMA_ERROR)) {
@@ -446,7 +449,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 			if (drive->media != ide_tape)
 				pc->rq->errors++;
 
-			if (rq->cmd[0] == REQUEST_SENSE) {
+			if (scsi_req(rq)->cmd[0] == REQUEST_SENSE) {
 				printk(KERN_ERR PFX "%s: I/O error in request "
 						"sense command\n", drive->name);
 				return ide_do_reset(drive);
@@ -512,7 +515,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 	ide_pio_bytes(drive, cmd, write, done);
 
 	/* Update transferred byte count */
-	rq->resid_len -= done;
+	scsi_req(rq)->resid_len -= done;
 
 	bcount -= done;
 
@@ -520,7 +523,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 		ide_pad_transfer(drive, write, bcount);
 
 	debug_log("[cmd %x] transferred %d bytes, padded %d bytes, resid: %u\n",
-		  rq->cmd[0], done, bcount, rq->resid_len);
+		  rq->cmd[0], done, bcount, scsi_req(rq)->resid_len);
 
 	/* And set the interrupt handler again */
 	ide_set_handler(drive, ide_pc_intr, timeout);
@@ -603,7 +606,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 
 	if (dev_is_idecd(drive)) {
 		/* ATAPI commands get padded out to 12 bytes minimum */
-		cmd_len = COMMAND_SIZE(rq->cmd[0]);
+		cmd_len = COMMAND_SIZE(scsi_req(rq)->cmd[0]);
 		if (cmd_len < ATAPI_MIN_CDB_BYTES)
 			cmd_len = ATAPI_MIN_CDB_BYTES;
 
@@ -650,7 +653,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
 
 	/* Send the actual packet */
 	if ((drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) == 0)
-		hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len);
+		hwif->tp_ops->output_data(drive, NULL, scsi_req(rq)->cmd, cmd_len);
 
 	/* Begin DMA, if necessary */
 	if (dev_is_idecd(drive)) {
@@ -695,7 +698,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd)
 							     bytes, 63 * 1024));
 
 		/* We haven't transferred any data yet */
-		rq->resid_len = bcount;
+		scsi_req(rq)->resid_len = bcount;
 
 		if (pc->flags & PC_FLAG_DMA_ERROR) {
 			pc->flags &= ~PC_FLAG_DMA_ERROR;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 9cbd217..6eb9872 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -121,7 +121,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq)
 		 * don't log START_STOP unit with LoEj set, since we cannot
 		 * reliably check if drive can auto-close
 		 */
-		if (rq->cmd[0] == GPCMD_START_STOP_UNIT && sense->asc == 0x24)
+		if (scsi_req(rq)->cmd[0] == GPCMD_START_STOP_UNIT && sense->asc == 0x24)
 			break;
 		log = 1;
 		break;
@@ -163,7 +163,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
 	 * toc has not been recorded yet, it will fail with 05/24/00 (which is a
 	 * confusing error)
 	 */
-	if (failed_command && failed_command->cmd[0] == GPCMD_READ_TOC_PMA_ATIP)
+	if (failed_command && scsi_req(failed_command)->cmd[0] == GPCMD_READ_TOC_PMA_ATIP)
 		if (sense->sense_key == 0x05 && sense->asc == 0x24)
 			return;
 
@@ -219,15 +219,12 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 	void *sense = bio_data(rq->bio);
 
 	if (failed) {
-		if (failed->sense) {
-			/*
-			 * Sense is always read into drive->sense_data.
-			 * Copy back if the failed request has its
-			 * sense pointer set.
-			 */
-			memcpy(failed->sense, sense, 18);
-			failed->sense_len = rq->sense_len;
-		}
+		/*
+		 * Sense is always read into drive->sense_data, copy back to the
+		 * original request.
+		 */
+		memcpy(scsi_req(failed)->sense, sense, 18);
+		scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
 		cdrom_analyze_sense_data(drive, failed);
 
 		if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed)))
@@ -338,7 +335,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 		 *
 		 * cdrom_log_sense() knows this!
 		 */
-		if (rq->cmd[0] == GPCMD_START_STOP_UNIT)
+		if (scsi_req(rq)->cmd[0] == GPCMD_START_STOP_UNIT)
 			break;
 		/* fall-through */
 	case DATA_PROTECT:
@@ -414,7 +411,7 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd)
 	 * Some of the trailing request sense fields are optional,
 	 * and some drives don't send them.  Sigh.
 	 */
-	if (rq->cmd[0] == GPCMD_REQUEST_SENSE &&
+	if (scsi_req(rq)->cmd[0] == GPCMD_REQUEST_SENSE &&
 	    cmd->nleft > 0 && cmd->nleft <= 5)
 		cmd->nleft = 0;
 }
@@ -425,12 +422,8 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		    req_flags_t rq_flags)
 {
 	struct cdrom_info *info = drive->driver_data;
-	struct request_sense local_sense;
 	int retries = 10;
-	req_flags_t flags = 0;
-
-	if (!sense)
-		sense = &local_sense;
+	bool failed;
 
 	ide_debug_log(IDE_DBG_PC, "cmd[0]: 0x%x, write: 0x%x, timeout: %d, "
 				  "rq_flags: 0x%x",
@@ -440,12 +433,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 	do {
 		struct request *rq;
 		int error;
+		bool delay = false;
 
 		rq = blk_get_request(drive->queue, write, __GFP_RECLAIM);
-
-		memcpy(rq->cmd, cmd, BLK_MAX_CDB);
+		scsi_req_init(rq);
+		memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
 		rq->cmd_type = REQ_TYPE_ATA_PC;
-		rq->sense = sense;
 		rq->rq_flags |= rq_flags;
 		rq->timeout = timeout;
 		if (buffer) {
@@ -460,21 +453,21 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		error = blk_execute_rq(drive->queue, info->disk, rq, 0);
 
 		if (buffer)
-			*bufflen = rq->resid_len;
-
-		flags = rq->rq_flags;
-		blk_put_request(rq);
+			*bufflen = scsi_req(rq)->resid_len;
+		if (sense)
+			memcpy(sense, scsi_req(rq)->sense, sizeof(*sense));
 
 		/*
 		 * FIXME: we should probably abort/retry or something in case of
 		 * failure.
 		 */
-		if (flags & RQF_FAILED) {
+		failed = (rq->rq_flags & RQF_FAILED) != 0;
+		if (failed) {
 			/*
 			 * The request failed.  Retry if it was due to a unit
 			 * attention status (usually means media was changed).
 			 */
-			struct request_sense *reqbuf = sense;
+			struct request_sense *reqbuf = scsi_req(rq)->sense;
 
 			if (reqbuf->sense_key == UNIT_ATTENTION)
 				cdrom_saw_media_change(drive);
@@ -485,19 +478,20 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 				 * a disk.  Retry, but wait a little to give
 				 * the drive time to complete the load.
 				 */
-				ssleep(2);
+				delay = true;
 			} else {
 				/* otherwise, don't retry */
 				retries = 0;
 			}
 			--retries;
 		}
-
-		/* end of retry loop */
-	} while ((flags & RQF_FAILED) && retries >= 0);
+		blk_put_request(rq);
+		if (delay)
+			ssleep(2);
+	} while (failed && retries >= 0);
 
 	/* return an error if the command failed */
-	return (flags & RQF_FAILED) ? -EIO : 0;
+	return failed ? -EIO : 0;
 }
 
 /*
@@ -636,7 +630,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 		len -= blen;
 
 		if (sense && write == 0)
-			rq->sense_len += blen;
+			scsi_req(rq)->sense_len += blen;
 	}
 
 	/* pad, if necessary */
@@ -664,7 +658,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 
 out_end:
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC && rc == 0) {
-		rq->resid_len = 0;
+		scsi_req(rq)->resid_len = 0;
 		blk_end_request_all(rq, 0);
 		hwif->rq = NULL;
 	} else {
@@ -685,9 +679,9 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
 
 		/* make sure it's fully ended */
 		if (rq->cmd_type != REQ_TYPE_FS) {
-			rq->resid_len -= cmd->nbytes - cmd->nleft;
+			scsi_req(rq)->resid_len -= cmd->nbytes - cmd->nleft;
 			if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE))
-				rq->resid_len += cmd->last_xfer_len;
+				scsi_req(rq)->resid_len += cmd->last_xfer_len;
 		}
 
 		ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
@@ -1312,28 +1306,29 @@ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
 	int hard_sect = queue_logical_block_size(q);
 	long block = (long)blk_rq_pos(rq) / (hard_sect >> 9);
 	unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9);
+	struct scsi_request *req = scsi_req(rq);
 
-	memset(rq->cmd, 0, BLK_MAX_CDB);
+	memset(req->cmd, 0, BLK_MAX_CDB);
 
 	if (rq_data_dir(rq) == READ)
-		rq->cmd[0] = GPCMD_READ_10;
+		req->cmd[0] = GPCMD_READ_10;
 	else
-		rq->cmd[0] = GPCMD_WRITE_10;
+		req->cmd[0] = GPCMD_WRITE_10;
 
 	/*
 	 * fill in lba
 	 */
-	rq->cmd[2] = (block >> 24) & 0xff;
-	rq->cmd[3] = (block >> 16) & 0xff;
-	rq->cmd[4] = (block >>  8) & 0xff;
-	rq->cmd[5] = block & 0xff;
+	req->cmd[2] = (block >> 24) & 0xff;
+	req->cmd[3] = (block >> 16) & 0xff;
+	req->cmd[4] = (block >>  8) & 0xff;
+	req->cmd[5] = block & 0xff;
 
 	/*
 	 * and transfer length
 	 */
-	rq->cmd[7] = (blocks >> 8) & 0xff;
-	rq->cmd[8] = blocks & 0xff;
-	rq->cmd_len = 10;
+	req->cmd[7] = (blocks >> 8) & 0xff;
+	req->cmd[8] = blocks & 0xff;
+	req->cmd_len = 10;
 	return BLKPREP_OK;
 }
 
@@ -1343,7 +1338,7 @@ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
  */
 static int ide_cdrom_prep_pc(struct request *rq)
 {
-	u8 *c = rq->cmd;
+	u8 *c = scsi_req(rq)->cmd;
 
 	/* transform 6-byte read/write commands to the 10-byte version */
 	if (c[0] == READ_6 || c[0] == WRITE_6) {
@@ -1354,7 +1349,7 @@ static int ide_cdrom_prep_pc(struct request *rq)
 		c[2] = 0;
 		c[1] &= 0xe0;
 		c[0] += (READ_10 - READ_6);
-		rq->cmd_len = 10;
+		scsi_req(rq)->cmd_len = 10;
 		return BLKPREP_OK;
 	}
 
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index f085e3a..da0aa01 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,6 +304,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
 	int ret;
 
 	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
+	scsi_req_init(rq);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->rq_flags = RQF_QUIET;
 	ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-cd_verbose.c b/drivers/ide/ide-cd_verbose.c
index f079ca2..58a6feb 100644
--- a/drivers/ide/ide-cd_verbose.c
+++ b/drivers/ide/ide-cd_verbose.c
@@ -315,12 +315,12 @@ void ide_cd_log_error(const char *name, struct request *failed_command,
 		while (hi > lo) {
 			mid = (lo + hi) / 2;
 			if (packet_command_texts[mid].packet_command ==
-			    failed_command->cmd[0]) {
+			    scsi_req(failed_command)->cmd[0]) {
 				s = packet_command_texts[mid].text;
 				break;
 			}
 			if (packet_command_texts[mid].packet_command >
-			    failed_command->cmd[0])
+			    scsi_req(failed_command)->cmd[0])
 				hi = mid;
 			else
 				lo = mid + 1;
@@ -329,7 +329,7 @@ void ide_cd_log_error(const char *name, struct request *failed_command,
 		printk(KERN_ERR "  The failed \"%s\" packet command "
 				"was: \n  \"", s);
 		for (i = 0; i < BLK_MAX_CDB; i++)
-			printk(KERN_CONT "%02x ", failed_command->cmd[i]);
+			printk(KERN_CONT "%02x ", scsi_req(failed_command)->cmd[i]);
 		printk(KERN_CONT "\"\n");
 	}
 
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 0dd43b4..fd56c9d 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,10 +166,11 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
 		return setting->set(drive, arg);
 
 	rq = blk_get_request(q, READ, __GFP_RECLAIM);
+	scsi_req_init(rq);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
-	rq->cmd_len = 5;
-	rq->cmd[0] = REQ_DEVSET_EXEC;
-	*(int *)&rq->cmd[1] = arg;
+	scsi_req(rq)->cmd_len = 5;
+	scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
+	*(int *)&scsi_req(rq)->cmd[1] = arg;
 	rq->special = setting->set;
 
 	if (blk_execute_rq(q, NULL, rq, 0))
@@ -183,7 +184,7 @@ ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq)
 {
 	int err, (*setfunc)(ide_drive_t *, int) = rq->special;
 
-	err = setfunc(drive, *(int *)&rq->cmd[1]);
+	err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]);
 	if (err)
 		rq->errors = err;
 	ide_complete_rq(drive, err, blk_rq_bytes(rq));
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 5ceace5..3437c5b 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -478,6 +478,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
 		return -EBUSY;
 
 	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
+	scsi_req_init(rq);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
 	drive->mult_req = arg;
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index d6da011..35e5b89 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -148,7 +148,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
 	struct request *rq = drive->hwif->rq;
 
 	if (rq && rq->cmd_type == REQ_TYPE_DRV_PRIV &&
-	    rq->cmd[0] == REQ_DRIVE_RESET) {
+	    scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
 		if (err <= 0 && rq->errors == 0)
 			rq->errors = -EIO;
 		ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index f079d8d..3bd678a 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -203,7 +203,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
 	put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]);
 	put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]);
 
-	memcpy(rq->cmd, pc->c, 12);
+	memcpy(scsi_req(rq)->cmd, pc->c, 12);
 
 	pc->rq = rq;
 	if (cmd == WRITE)
@@ -216,7 +216,7 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
 		struct ide_atapi_pc *pc, struct request *rq)
 {
 	ide_init_pc(pc);
-	memcpy(pc->c, rq->cmd, sizeof(pc->c));
+	memcpy(pc->c, scsi_req(rq)->cmd, sizeof(pc->c));
 	pc->rq = rq;
 	if (blk_rq_bytes(rq)) {
 		pc->flags |= PC_FLAG_DMA_OK;
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 201e43f..3378503 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -279,7 +279,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
 
 static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq)
 {
-	u8 cmd = rq->cmd[0];
+	u8 cmd = scsi_req(rq)->cmd[0];
 
 	switch (cmd) {
 	case REQ_PARK_HEADS:
@@ -545,6 +545,7 @@ void do_ide_request(struct request_queue *q)
 			goto plug_device;
 		}
 
+		scsi_req(rq)->resid_len = blk_rq_bytes(rq);
 		hwif->rq = rq;
 
 		spin_unlock_irq(&hwif->lock);
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index d05db24..a5d22c6 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -126,6 +126,7 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 		struct request *rq;
 
 		rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
+		scsi_req_init(rq);
 		rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 		err = blk_execute_rq(drive->queue, NULL, rq, 0);
 		blk_put_request(rq);
@@ -222,9 +223,10 @@ static int generic_drive_reset(ide_drive_t *drive)
 	int ret = 0;
 
 	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
+	scsi_req_init(rq);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
-	rq->cmd_len = 1;
-	rq->cmd[0] = REQ_DRIVE_RESET;
+	scsi_req(rq)->cmd_len = 1;
+	scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
 	if (blk_execute_rq(drive->queue, NULL, rq, 1))
 		ret = rq->errors;
 	blk_put_request(rq);
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 2d7dca5..c37604a 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -32,8 +32,9 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	spin_unlock_irq(&hwif->lock);
 
 	rq = blk_get_request(q, READ, __GFP_RECLAIM);
-	rq->cmd[0] = REQ_PARK_HEADS;
-	rq->cmd_len = 1;
+	scsi_req_init(rq);
+	scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
+	scsi_req(rq)->cmd_len = 1;
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	rq->special = &timeout;
 	rc = blk_execute_rq(q, NULL, rq, 1);
@@ -46,11 +47,12 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
 	 * timeout has expired, so power management will be reenabled.
 	 */
 	rq = blk_get_request(q, READ, GFP_NOWAIT);
+	scsi_req_init(rq);
 	if (IS_ERR(rq))
 		goto out;
 
-	rq->cmd[0] = REQ_UNPARK_HEADS;
-	rq->cmd_len = 1;
+	scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS;
+	scsi_req(rq)->cmd_len = 1;
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
 	elv_add_request(q, rq, ELEVATOR_INSERT_FRONT);
 
@@ -64,7 +66,7 @@ ide_startstop_t ide_do_park_unpark(ide_drive_t *drive, struct request *rq)
 	struct ide_taskfile *tf = &cmd.tf;
 
 	memset(&cmd, 0, sizeof(cmd));
-	if (rq->cmd[0] == REQ_PARK_HEADS) {
+	if (scsi_req(rq)->cmd[0] == REQ_PARK_HEADS) {
 		drive->sleep = *(unsigned long *)rq->special;
 		drive->dev_flags |= IDE_DFLAG_SLEEPING;
 		tf->command = ATA_CMD_IDLEIMMEDIATE;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index a015acd..f6767ab 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -19,6 +19,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 
 	memset(&rqpm, 0, sizeof(rqpm));
 	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
+	scsi_req_init(rq);
 	rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND;
 	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -89,6 +90,7 @@ int generic_ide_resume(struct device *dev)
 
 	memset(&rqpm, 0, sizeof(rqpm));
 	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
+	scsi_req_init(rq);
 	rq->cmd_type = REQ_TYPE_ATA_PM_RESUME;
 	rq->rq_flags |= RQF_PREEMPT;
 	rq->special = &rqpm;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 330e319..a74ae8df 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -741,6 +741,14 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
 	}
 }
 
+static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
+{
+	struct ide_request *req = blk_mq_rq_to_pdu(rq);
+
+	req->sreq.sense = req->sense;
+	return 0;
+}
+
 /*
  * init request queue
  */
@@ -758,11 +766,18 @@ static int ide_init_queue(ide_drive_t *drive)
 	 *	limits and LBA48 we could raise it but as yet
 	 *	do not.
 	 */
-
-	q = blk_init_queue_node(do_ide_request, NULL, hwif_to_node(hwif));
+	q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif));
 	if (!q)
 		return 1;
 
+	q->request_fn = do_ide_request;
+	q->init_rq_fn = ide_init_rq;
+	q->cmd_size = sizeof(struct ide_request);
+	if (blk_init_allocated_queue(q) < 0) {
+		blk_cleanup_queue(q);
+		return 1;
+	}
+
 	q->queuedata = drive;
 	blk_queue_segment_boundary(q, 0xffff);
 
@@ -1131,10 +1146,12 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
 	ide_port_for_each_dev(i, drive, hwif) {
 		u8 j = (hwif->index * MAX_DRIVES) + i;
 		u16 *saved_id = drive->id;
+		struct request *saved_sense_rq = drive->sense_rq;
 
 		memset(drive, 0, sizeof(*drive));
 		memset(saved_id, 0, SECTOR_SIZE);
 		drive->id = saved_id;
+		drive->sense_rq = saved_sense_rq;
 
 		drive->media			= ide_disk;
 		drive->select			= (i << 4) | ATA_DEVICE_OBS;
@@ -1241,6 +1258,7 @@ static void ide_port_free_devices(ide_hwif_t *hwif)
 	int i;
 
 	ide_port_for_each_dev(i, drive, hwif) {
+		kfree(drive->sense_rq);
 		kfree(drive->id);
 		kfree(drive);
 	}
@@ -1248,11 +1266,10 @@ static void ide_port_free_devices(ide_hwif_t *hwif)
 
 static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
 {
+	ide_drive_t *drive;
 	int i;
 
 	for (i = 0; i < MAX_DRIVES; i++) {
-		ide_drive_t *drive;
-
 		drive = kzalloc_node(sizeof(*drive), GFP_KERNEL, node);
 		if (drive == NULL)
 			goto out_nomem;
@@ -1267,12 +1284,21 @@ static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
 		 */
 		drive->id = kzalloc_node(SECTOR_SIZE, GFP_KERNEL, node);
 		if (drive->id == NULL)
-			goto out_nomem;
+			goto out_free_drive;
+
+		drive->sense_rq = kmalloc(sizeof(struct request) +
+				sizeof(struct ide_request), GFP_KERNEL);
+		if (!drive->sense_rq)
+			goto out_free_id;
 
 		hwif->devices[i] = drive;
 	}
 	return 0;
 
+out_free_id:
+	kfree(drive->id);
+out_free_drive:
+	kfree(drive);
 out_nomem:
 	ide_port_free_devices(hwif);
 	return -ENOMEM;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 9ecf4e3..f6bc1e2 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -282,7 +282,7 @@ static void idetape_analyze_error(ide_drive_t *drive)
 
 	/* correct remaining bytes to transfer */
 	if (pc->flags & PC_FLAG_DMA_ERROR)
-		rq->resid_len = tape->blk_size * get_unaligned_be32(&sense[3]);
+		scsi_req(rq)->resid_len = tape->blk_size * get_unaligned_be32(&sense[3]);
 
 	/*
 	 * If error was the result of a zero-length read or write command,
@@ -316,7 +316,7 @@ static void idetape_analyze_error(ide_drive_t *drive)
 			pc->flags |= PC_FLAG_ABORT;
 		}
 		if (!(pc->flags & PC_FLAG_ABORT) &&
-		    (blk_rq_bytes(rq) - rq->resid_len))
+		    (blk_rq_bytes(rq) - scsi_req(rq)->resid_len))
 			pc->retries = IDETAPE_MAX_PC_RETRIES + 1;
 	}
 }
@@ -348,7 +348,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
 					"itself - Aborting request!\n");
 	} else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) {
 		unsigned int blocks =
-			(blk_rq_bytes(rq) - rq->resid_len) / tape->blk_size;
+			(blk_rq_bytes(rq) - scsi_req(rq)->resid_len) / tape->blk_size;
 
 		tape->avg_size += blocks * tape->blk_size;
 
@@ -560,7 +560,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
 		pc->flags |= PC_FLAG_WRITING;
 	}
 
-	memcpy(rq->cmd, pc->c, 12);
+	memcpy(scsi_req(rq)->cmd, pc->c, 12);
 }
 
 static ide_startstop_t idetape_do_request(ide_drive_t *drive,
@@ -570,10 +570,11 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 	idetape_tape_t *tape = drive->driver_data;
 	struct ide_atapi_pc *pc = NULL;
 	struct ide_cmd cmd;
+	struct scsi_request *req = scsi_req(rq);
 	u8 stat;
 
 	ide_debug_log(IDE_DBG_RQ, "cmd: 0x%x, sector: %llu, nr_sectors: %u",
-		      rq->cmd[0], (unsigned long long)blk_rq_pos(rq),
+		      req->cmd[0], (unsigned long long)blk_rq_pos(rq),
 		      blk_rq_sectors(rq));
 
 	BUG_ON(!(rq->cmd_type == REQ_TYPE_DRV_PRIV ||
@@ -592,7 +593,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 	stat = hwif->tp_ops->read_status(hwif);
 
 	if ((drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) == 0 &&
-	    (rq->cmd[13] & REQ_IDETAPE_PC2) == 0)
+	    (req->cmd[13] & REQ_IDETAPE_PC2) == 0)
 		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
 
 	if (drive->dev_flags & IDE_DFLAG_POST_RESET) {
@@ -609,7 +610,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 		} else if (time_after(jiffies, tape->dsc_timeout)) {
 			printk(KERN_ERR "ide-tape: %s: DSC timeout\n",
 				tape->name);
-			if (rq->cmd[13] & REQ_IDETAPE_PC2) {
+			if (req->cmd[13] & REQ_IDETAPE_PC2) {
 				idetape_media_access_finished(drive);
 				return ide_stopped;
 			} else {
@@ -626,23 +627,23 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 		tape->postponed_rq = false;
 	}
 
-	if (rq->cmd[13] & REQ_IDETAPE_READ) {
+	if (req->cmd[13] & REQ_IDETAPE_READ) {
 		pc = &tape->queued_pc;
 		ide_tape_create_rw_cmd(tape, pc, rq, READ_6);
 		goto out;
 	}
-	if (rq->cmd[13] & REQ_IDETAPE_WRITE) {
+	if (req->cmd[13] & REQ_IDETAPE_WRITE) {
 		pc = &tape->queued_pc;
 		ide_tape_create_rw_cmd(tape, pc, rq, WRITE_6);
 		goto out;
 	}
-	if (rq->cmd[13] & REQ_IDETAPE_PC1) {
+	if (req->cmd[13] & REQ_IDETAPE_PC1) {
 		pc = (struct ide_atapi_pc *)rq->special;
-		rq->cmd[13] &= ~(REQ_IDETAPE_PC1);
-		rq->cmd[13] |= REQ_IDETAPE_PC2;
+		req->cmd[13] &= ~(REQ_IDETAPE_PC1);
+		req->cmd[13] |= REQ_IDETAPE_PC2;
 		goto out;
 	}
-	if (rq->cmd[13] & REQ_IDETAPE_PC2) {
+	if (req->cmd[13] & REQ_IDETAPE_PC2) {
 		idetape_media_access_finished(drive);
 		return ide_stopped;
 	}
@@ -853,8 +854,9 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 	BUG_ON(size < 0 || size % tape->blk_size);
 
 	rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM);
+	scsi_req_init(rq);
 	rq->cmd_type = REQ_TYPE_DRV_PRIV;
-	rq->cmd[13] = cmd;
+	scsi_req(rq)->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;
 	rq->__sector = tape->first_frame;
 
@@ -868,7 +870,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 	blk_execute_rq(drive->queue, tape->disk, rq, 0);
 
 	/* calculate the number of transferred bytes and update buffer state */
-	size -= rq->resid_len;
+	size -= scsi_req(rq)->resid_len;
 	tape->cur = tape->buf;
 	if (cmd == REQ_IDETAPE_READ)
 		tape->valid = size;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index a716693..a393e13 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -431,6 +431,7 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 	int rw = !(cmd->tf_flags & IDE_TFLAG_WRITE) ? READ : WRITE;
 
 	rq = blk_get_request(drive->queue, rw, __GFP_RECLAIM);
+	scsi_req_init(rq);
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 
 	/*
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
index 247853e..c3062b5 100644
--- a/drivers/ide/sis5513.c
+++ b/drivers/ide/sis5513.c
@@ -54,7 +54,7 @@
 #define DRV_NAME "sis5513"
 
 /* registers layout and init values are chipset family dependent */
-
+#undef ATA_16
 #define ATA_16		0x01
 #define ATA_33		0x02
 #define ATA_66		0x03
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 7ee1667..b8c4b2b 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -2320,10 +2320,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 		SmpPassthroughReply_t *smprep;
 
 		smprep = (SmpPassthroughReply_t *)ioc->sas_mgmt.reply;
-		memcpy(req->sense, smprep, sizeof(*smprep));
-		req->sense_len = sizeof(*smprep);
-		req->resid_len = 0;
-		rsp->resid_len -= smprep->ResponseDataLength;
+		memcpy(scsi_req(req)->sense, smprep, sizeof(*smprep));
+		scsi_req(req)->sense_len = sizeof(*smprep);
+		scsi_req(req)->resid_len = 0;
+		scsi_req(rsp)->resid_len -= smprep->ResponseDataLength;
 	} else {
 		printk(MYIOC_s_ERR_FMT
 		    "%s: smp passthru reply failed to be returned\n",
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 919736a..aa76f36 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -2095,7 +2095,7 @@ int fc_lport_bsg_request(struct bsg_job *job)
 
 	bsg_reply->reply_payload_rcv_len = 0;
 	if (rsp)
-		rsp->resid_len = job->reply_payload.payload_len;
+		scsi_req(rsp)->resid_len = job->reply_payload.payload_len;
 
 	mutex_lock(&lport->lp_mutex);
 
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 022bb6e..570b2cb 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -2174,12 +2174,12 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 			       bio_data(rsp->bio), blk_rq_bytes(rsp));
 	if (ret > 0) {
 		/* positive number is the untransferred residual */
-		rsp->resid_len = ret;
-		req->resid_len = 0;
+		scsi_req(rsp)->resid_len = ret;
+		scsi_req(req)->resid_len = 0;
 		ret = 0;
 	} else if (ret == 0) {
-		rsp->resid_len = 0;
-		req->resid_len = 0;
+		scsi_req(rsp)->resid_len = 0;
+		scsi_req(req)->resid_len = 0;
 	}
 
 	return ret;
diff --git a/drivers/scsi/libsas/sas_host_smp.c b/drivers/scsi/libsas/sas_host_smp.c
index d247925..45cbbc4 100644
--- a/drivers/scsi/libsas/sas_host_smp.c
+++ b/drivers/scsi/libsas/sas_host_smp.c
@@ -274,15 +274,15 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 
 	switch (req_data[1]) {
 	case SMP_REPORT_GENERAL:
-		req->resid_len -= 8;
-		rsp->resid_len -= 32;
+		scsi_req(req)->resid_len -= 8;
+		scsi_req(rsp)->resid_len -= 32;
 		resp_data[2] = SMP_RESP_FUNC_ACC;
 		resp_data[9] = sas_ha->num_phys;
 		break;
 
 	case SMP_REPORT_MANUF_INFO:
-		req->resid_len -= 8;
-		rsp->resid_len -= 64;
+		scsi_req(req)->resid_len -= 8;
+		scsi_req(rsp)->resid_len -= 64;
 		resp_data[2] = SMP_RESP_FUNC_ACC;
 		memcpy(resp_data + 12, shost->hostt->name,
 		       SAS_EXPANDER_VENDOR_ID_LEN);
@@ -295,13 +295,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 		break;
 
 	case SMP_DISCOVER:
-		req->resid_len -= 16;
-		if ((int)req->resid_len < 0) {
-			req->resid_len = 0;
+		scsi_req(req)->resid_len -= 16;
+		if ((int)scsi_req(req)->resid_len < 0) {
+			scsi_req(req)->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		rsp->resid_len -= 56;
+		scsi_req(rsp)->resid_len -= 56;
 		sas_host_smp_discover(sas_ha, resp_data, req_data[9]);
 		break;
 
@@ -311,13 +311,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 		break;
 
 	case SMP_REPORT_PHY_SATA:
-		req->resid_len -= 16;
-		if ((int)req->resid_len < 0) {
-			req->resid_len = 0;
+		scsi_req(req)->resid_len -= 16;
+		if ((int)scsi_req(req)->resid_len < 0) {
+			scsi_req(req)->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		rsp->resid_len -= 60;
+		scsi_req(rsp)->resid_len -= 60;
 		sas_report_phy_sata(sas_ha, resp_data, req_data[9]);
 		break;
 
@@ -331,15 +331,15 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 		int to_write = req_data[4];
 
 		if (blk_rq_bytes(req) < base_frame_size + to_write * 4 ||
-		    req->resid_len < base_frame_size + to_write * 4) {
+		    scsi_req(req)->resid_len < base_frame_size + to_write * 4) {
 			resp_data[2] = SMP_RESP_INV_FRM_LEN;
 			break;
 		}
 
 		to_write = sas_host_smp_write_gpio(sas_ha, resp_data, req_data[2],
 						   req_data[3], to_write, &req_data[8]);
-		req->resid_len -= base_frame_size + to_write * 4;
-		rsp->resid_len -= 8;
+		scsi_req(req)->resid_len -= base_frame_size + to_write * 4;
+		scsi_req(rsp)->resid_len -= 8;
 		break;
 	}
 
@@ -348,13 +348,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
 		break;
 
 	case SMP_PHY_CONTROL:
-		req->resid_len -= 44;
-		if ((int)req->resid_len < 0) {
-			req->resid_len = 0;
+		scsi_req(req)->resid_len -= 44;
+		if ((int)scsi_req(req)->resid_len < 0) {
+			scsi_req(req)->resid_len = 0;
 			error = -EINVAL;
 			goto out;
 		}
-		rsp->resid_len -= 8;
+		scsi_req(rsp)->resid_len -= 8;
 		sas_phy_control(sas_ha, req_data[9], req_data[10],
 				req_data[32] >> 4, req_data[33] >> 4,
 				resp_data);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index 7f1d578..e7a7a70 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -2057,10 +2057,10 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 		    ioc->name, __func__,
 		    le16_to_cpu(mpi_reply->ResponseDataLength)));
 
-		memcpy(req->sense, mpi_reply, sizeof(*mpi_reply));
-		req->sense_len = sizeof(*mpi_reply);
-		req->resid_len = 0;
-		rsp->resid_len -=
+		memcpy(scsi_req(req)->sense, mpi_reply, sizeof(*mpi_reply));
+		scsi_req(req)->sense_len = sizeof(*mpi_reply);
+		scsi_req(req)->resid_len = 0;
+		scsi_req(rsp)->resid_len -=
 		    le16_to_cpu(mpi_reply->ResponseDataLength);
 
 		/* check if the resp needs to be copied from the allocated
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index ef99f62..fcb040e 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -48,6 +48,7 @@
 #include <scsi/osd_sense.h>
 
 #include <scsi/scsi_device.h>
+#include <scsi/scsi_request.h>
 
 #include "osd_debug.h"
 
@@ -477,11 +478,13 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
 {
 	or->async_error = error;
 	or->req_errors = req->errors ? : error;
-	or->sense_len = req->sense_len;
+	or->sense_len = scsi_req(req)->sense_len;
+	if (or->sense_len)
+		memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
 	if (or->out.req)
-		or->out.residual = or->out.req->resid_len;
+		or->out.residual = scsi_req(or->out.req)->resid_len;
 	if (or->in.req)
-		or->in.residual = or->in.req->resid_len;
+		or->in.residual = scsi_req(or->in.req)->resid_len;
 }
 
 int osd_execute_request(struct osd_request *or)
@@ -1565,7 +1568,7 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
 	req = blk_get_request(q, has_write ? WRITE : READ, flags);
 	if (IS_ERR(req))
 		return req;
-	blk_rq_set_block_pc(req);
+	scsi_req_init(req);
 
 	for_each_bio(bio) {
 		struct bio *bounce_bio = bio;
@@ -1599,8 +1602,6 @@ static int _init_blk_request(struct osd_request *or,
 
 	req->timeout = or->timeout;
 	req->retries = or->retries;
-	req->sense = or->sense;
-	req->sense_len = 0;
 
 	if (has_out) {
 		or->out.req = req;
@@ -1612,7 +1613,7 @@ static int _init_blk_request(struct osd_request *or,
 				ret = PTR_ERR(req);
 				goto out;
 			}
-			blk_rq_set_block_pc(req);
+			scsi_req_init(req);
 			or->in.req = or->request->next_rq = req;
 		}
 	} else if (has_in)
@@ -1699,8 +1700,8 @@ int osd_finalize_request(struct osd_request *or,
 
 	osd_sec_sign_cdb(&or->cdb, cap_key);
 
-	or->request->cmd = or->cdb.buff;
-	or->request->cmd_len = _osd_req_cdb_len(or);
+	scsi_req(or->request)->cmd = or->cdb.buff;
+	scsi_req(or->request)->cmd_len = _osd_req_cdb_len(or);
 
 	return 0;
 }
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index e8196c5..d314aa5 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -322,6 +322,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt)
 /* Wakeup from interrupt */
 static void osst_end_async(struct request *req, int update)
 {
+	struct scsi_request *rq = scsi_req(req);
 	struct osst_request *SRpnt = req->end_io_data;
 	struct osst_tape *STp = SRpnt->stp;
 	struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data;
@@ -330,6 +331,8 @@ static void osst_end_async(struct request *req, int update)
 #if DEBUG
 	STp->write_pending = 0;
 #endif
+	if (rq->sense_len)
+		memcpy(SRpnt->sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
 	if (SRpnt->waiting)
 		complete(SRpnt->waiting);
 
@@ -357,6 +360,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
 			int use_sg, int timeout, int retries)
 {
 	struct request *req;
+	struct scsi_request *rq;
 	struct page **pages = NULL;
 	struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data;
 
@@ -367,7 +371,8 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
 	if (IS_ERR(req))
 		return DRIVER_ERROR << 24;
 
-	blk_rq_set_block_pc(req);
+	rq = scsi_req(req);
+	scsi_req_init(req);
 	req->rq_flags |= RQF_QUIET;
 
 	SRpnt->bio = NULL;
@@ -404,11 +409,9 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
 			goto free_req;
 	}
 
-	req->cmd_len = cmd_len;
-	memset(req->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */
-	memcpy(req->cmd, cmd, req->cmd_len);
-	req->sense = SRpnt->sense;
-	req->sense_len = 0;
+	rq->cmd_len = cmd_len;
+	memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */
+	memcpy(rq->cmd, cmd, rq->cmd_len);
 	req->timeout = timeout;
 	req->retries = retries;
 	req->end_io_data = SRpnt;
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 1bf8061..40ca75b 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -921,7 +921,7 @@ qla2x00_process_loopback(struct bsg_job *bsg_job)
 
 	bsg_job->reply_len = sizeof(struct fc_bsg_reply) +
 	    sizeof(response) + sizeof(uint8_t);
-	fw_sts_ptr = ((uint8_t *)bsg_job->req->sense) +
+	fw_sts_ptr = ((uint8_t *)scsi_req(bsg_job->req)->sense) +
 	    sizeof(struct fc_bsg_reply);
 	memcpy(fw_sts_ptr, response, sizeof(response));
 	fw_sts_ptr += sizeof(response);
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 5093ca9..759f281 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1472,7 +1472,8 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 			    type, sp->handle, comp_status, fw_status[1], fw_status[2],
 			    le16_to_cpu(((struct els_sts_entry_24xx *)
 				pkt)->total_byte_count));
-			fw_sts_ptr = ((uint8_t*)bsg_job->req->sense) + sizeof(struct fc_bsg_reply);
+			fw_sts_ptr = ((uint8_t*)scsi_req(bsg_job->req)->sense) +
+				sizeof(struct fc_bsg_reply);
 			memcpy( fw_sts_ptr, fw_status, sizeof(fw_status));
 		}
 		else {
@@ -1486,7 +1487,8 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 				    pkt)->error_subcode_2));
 			res = DID_ERROR << 16;
 			bsg_reply->reply_payload_rcv_len = 0;
-			fw_sts_ptr = ((uint8_t*)bsg_job->req->sense) + sizeof(struct fc_bsg_reply);
+			fw_sts_ptr = ((uint8_t*)scsi_req(bsg_job->req)->sense) +
+					sizeof(struct fc_bsg_reply);
 			memcpy( fw_sts_ptr, fw_status, sizeof(fw_status));
 		}
 		ql_dump_buffer(ql_dbg_user + ql_dbg_buffer, vha, 0x5056,
diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index 02f1de1..96c33e29 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -2244,7 +2244,7 @@ qlafx00_ioctl_iosb_entry(scsi_qla_host_t *vha, struct req_que *req,
 		memcpy(fstatus.reserved_3,
 		    pkt->reserved_2, 20 * sizeof(uint8_t));
 
-		fw_sts_ptr = ((uint8_t *)bsg_job->req->sense) +
+		fw_sts_ptr = ((uint8_t *)scsi_req(bsg_job->req)->sense) +
 		    sizeof(struct fc_bsg_reply);
 
 		memcpy(fw_sts_ptr, (uint8_t *)&fstatus,
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 7c08460..4b40f74 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1968,6 +1968,7 @@ static void eh_lock_door_done(struct request *req, int uptodate)
 static void scsi_eh_lock_door(struct scsi_device *sdev)
 {
 	struct request *req;
+	struct scsi_request *rq;
 
 	/*
 	 * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a
@@ -1976,17 +1977,16 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
 	if (IS_ERR(req))
 		return;
+	rq = scsi_req(req);
+	scsi_req_init(req);
 
-	blk_rq_set_block_pc(req);
-
-	req->cmd[0] = ALLOW_MEDIUM_REMOVAL;
-	req->cmd[1] = 0;
-	req->cmd[2] = 0;
-	req->cmd[3] = 0;
-	req->cmd[4] = SCSI_REMOVAL_PREVENT;
-	req->cmd[5] = 0;
-
-	req->cmd_len = COMMAND_SIZE(req->cmd[0]);
+	rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
+	rq->cmd[1] = 0;
+	rq->cmd[2] = 0;
+	rq->cmd[3] = 0;
+	rq->cmd[4] = SCSI_REMOVAL_PREVENT;
+	rq->cmd[5] = 0;
+	rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
 
 	req->rq_flags |= RQF_QUIET;
 	req->timeout = 10 * HZ;
@@ -2355,7 +2355,7 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg)
 	scmd = (struct scsi_cmnd *)(rq + 1);
 	scsi_init_command(dev, scmd);
 	scmd->request = rq;
-	scmd->cmnd = rq->cmd;
+	scmd->cmnd = scsi_req(rq)->cmd;
 
 	scmd->scsi_done		= scsi_reset_provider_done_command;
 	memset(&scmd->sdb, 0, sizeof(scmd->sdb));
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 81ff5ad..98e4c7b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -220,21 +220,21 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 {
 	struct request *req;
 	int write = (data_direction == DMA_TO_DEVICE);
+	struct scsi_request *rq;
 	int ret = DRIVER_ERROR << 24;
 
 	req = blk_get_request(sdev->request_queue, write, __GFP_RECLAIM);
 	if (IS_ERR(req))
 		return ret;
-	blk_rq_set_block_pc(req);
+	rq = scsi_req(req);
+	scsi_req_init(req);
 
 	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
 					buffer, bufflen, __GFP_RECLAIM))
 		goto out;
 
-	req->cmd_len = COMMAND_SIZE(cmd[0]);
-	memcpy(req->cmd, cmd, req->cmd_len);
-	req->sense = sense;
-	req->sense_len = 0;
+	rq->cmd_len = COMMAND_SIZE(cmd[0]);
+	memcpy(rq->cmd, cmd, rq->cmd_len);
 	req->retries = retries;
 	req->timeout = timeout;
 	req->cmd_flags |= flags;
@@ -251,11 +251,13 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	 * is invalid.  Prevent the garbage from being misinterpreted
 	 * and prevent security leaks by zeroing out the excess data.
 	 */
-	if (unlikely(req->resid_len > 0 && req->resid_len <= bufflen))
-		memset(buffer + (bufflen - req->resid_len), 0, req->resid_len);
+	if (unlikely(rq->resid_len > 0 && rq->resid_len <= bufflen))
+		memset(buffer + (bufflen - rq->resid_len), 0, rq->resid_len);
 
 	if (resid)
-		*resid = req->resid_len;
+		*resid = rq->resid_len;
+	if (sense && rq->sense_len)
+		memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
 	ret = req->errors;
  out:
 	blk_put_request(req);
@@ -806,16 +808,13 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 
 	if (req->cmd_type == REQ_TYPE_BLOCK_PC) { /* SG_IO ioctl from block level */
 		if (result) {
-			if (sense_valid && req->sense) {
+			if (sense_valid) {
 				/*
 				 * SG_IO wants current and deferred errors
 				 */
-				int len = 8 + cmd->sense_buffer[7];
-
-				if (len > SCSI_SENSE_BUFFERSIZE)
-					len = SCSI_SENSE_BUFFERSIZE;
-				memcpy(req->sense, cmd->sense_buffer,  len);
-				req->sense_len = len;
+				scsi_req(req)->sense_len =
+					min(8 + cmd->sense_buffer[7],
+					    SCSI_SENSE_BUFFERSIZE);
 			}
 			if (!sense_deferred)
 				error = __scsi_error_from_host_byte(cmd, result);
@@ -825,14 +824,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		 */
 		req->errors = cmd->result;
 
-		req->resid_len = scsi_get_resid(cmd);
+		scsi_req(req)->resid_len = scsi_get_resid(cmd);
 
 		if (scsi_bidi_cmnd(cmd)) {
 			/*
 			 * Bidi commands Must be complete as a whole,
 			 * both sides at once.
 			 */
-			req->next_rq->resid_len = scsi_in(cmd)->resid;
+			scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
 			if (scsi_end_request(req, 0, blk_rq_bytes(req),
 					blk_rq_bytes(req->next_rq)))
 				BUG();
@@ -1165,7 +1164,8 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
 	void *prot = cmd->prot_sdb;
 	unsigned long flags;
 
-	memset(cmd, 0, sizeof(*cmd));
+	memset((char *)cmd + sizeof(cmd->req), 0,
+		sizeof(*cmd) - sizeof(cmd->req));
 	cmd->device = dev;
 	cmd->sense_buffer = buf;
 	cmd->prot_sdb = prot;
@@ -1197,7 +1197,8 @@ static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 		memset(&cmd->sdb, 0, sizeof(cmd->sdb));
 	}
 
-	cmd->cmd_len = req->cmd_len;
+	cmd->cmd_len = scsi_req(req)->cmd_len;
+	cmd->cmnd = scsi_req(req)->cmd;
 	cmd->transfersize = blk_rq_bytes(req);
 	cmd->allowed = req->retries;
 	return BLKPREP_OK;
@@ -1217,6 +1218,7 @@ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
 			return ret;
 	}
 
+	cmd->cmnd = scsi_req(req)->cmd = scsi_req(req)->__cmd;
 	memset(cmd->cmnd, 0, BLK_MAX_CDB);
 	return scsi_cmd_to_driver(cmd)->init_command(cmd);
 }
@@ -1355,7 +1357,6 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 
 	cmd->tag = req->tag;
 	cmd->request = req;
-	cmd->cmnd = req->cmd;
 	cmd->prot_op = SCSI_PROT_NORMAL;
 
 	ret = scsi_setup_cmnd(sdev, req);
@@ -1874,7 +1875,8 @@ static int scsi_mq_prep_fn(struct request *req)
 	unsigned char *sense_buf = cmd->sense_buffer;
 	struct scatterlist *sg;
 
-	memset(cmd, 0, sizeof(struct scsi_cmnd));
+	memset((char *)cmd + sizeof(cmd->req), 0,
+		sizeof(*cmd) - sizeof(cmd->req));
 
 	req->special = cmd;
 
@@ -1884,7 +1886,6 @@ static int scsi_mq_prep_fn(struct request *req)
 
 	cmd->tag = req->tag;
 
-	cmd->cmnd = req->cmd;
 	cmd->prot_op = SCSI_PROT_NORMAL;
 
 	INIT_LIST_HEAD(&cmd->list);
@@ -1959,7 +1960,6 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (!scsi_host_queue_ready(q, shost, sdev))
 		goto out_dec_target_busy;
 
-
 	if (!(req->rq_flags & RQF_DONTPREP)) {
 		ret = prep_to_mq(scsi_mq_prep_fn(req));
 		if (ret != BLK_MQ_RQ_QUEUE_OK)
@@ -2036,6 +2036,7 @@ static int scsi_init_request(void *data, struct request *rq,
 		scsi_alloc_sense_buffer(shost, GFP_KERNEL, numa_node);
 	if (!cmd->sense_buffer)
 		return -ENOMEM;
+	cmd->req.sense = cmd->sense_buffer;
 	return 0;
 }
 
@@ -2125,6 +2126,7 @@ static int scsi_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
 	cmd->sense_buffer = scsi_alloc_sense_buffer(shost, gfp, NUMA_NO_NODE);
 	if (!cmd->sense_buffer)
 		goto fail;
+	cmd->req.sense = cmd->sense_buffer;
 
 	if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) {
 		cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp);
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 60b651b..126a5ee 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -33,6 +33,7 @@
 #include <linux/bsg.h>
 
 #include <scsi/scsi.h>
+#include <scsi/scsi_request.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport.h>
@@ -177,6 +178,10 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
 	while ((req = blk_fetch_request(q)) != NULL) {
 		spin_unlock_irq(q->queue_lock);
 
+		scsi_req(req)->resid_len = blk_rq_bytes(req);
+		if (req->next_rq)
+			scsi_req(req->next_rq)->resid_len =
+				blk_rq_bytes(req->next_rq);
 		handler = to_sas_internal(shost->transportt)->f->smp_handler;
 		ret = handler(shost, rphy, req);
 		req->errors = ret;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 1fbb1ec..bc8ce6b 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -781,7 +781,7 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
 	rq->special_vec.bv_len = len;
 
 	rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
-	rq->resid_len = len;
+	scsi_req(rq)->resid_len = len;
 
 	ret = scsi_init_io(cmd);
 out:
@@ -1164,7 +1164,7 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
 		__free_page(rq->special_vec.bv_page);
 
-	if (SCpnt->cmnd != rq->cmd) {
+	if (SCpnt->cmnd != scsi_req(rq)->cmd) {
 		mempool_free(SCpnt->cmnd, sd_cdb_pool);
 		SCpnt->cmnd = NULL;
 		SCpnt->cmd_len = 0;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index dbe5b4b..226a8de 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -781,9 +781,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
 	}
 	if (atomic_read(&sdp->detaching)) {
 		if (srp->bio) {
-			if (srp->rq->cmd != srp->rq->__cmd)
-				kfree(srp->rq->cmd);
-
+			scsi_req_free_cmd(scsi_req(srp->rq));
 			blk_end_request_all(srp->rq, -EIO);
 			srp->rq = NULL;
 		}
@@ -1279,6 +1277,7 @@ static void
 sg_rq_end_io(struct request *rq, int uptodate)
 {
 	struct sg_request *srp = rq->end_io_data;
+	struct scsi_request *req = scsi_req(rq);
 	Sg_device *sdp;
 	Sg_fd *sfp;
 	unsigned long iflags;
@@ -1297,9 +1296,9 @@ sg_rq_end_io(struct request *rq, int uptodate)
 	if (unlikely(atomic_read(&sdp->detaching)))
 		pr_info("%s: device detaching\n", __func__);
 
-	sense = rq->sense;
+	sense = req->sense;
 	result = rq->errors;
-	resid = rq->resid_len;
+	resid = req->resid_len;
 
 	SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp,
 				      "sg_cmd_done: pack_id=%d, res=0x%x\n",
@@ -1333,6 +1332,10 @@ sg_rq_end_io(struct request *rq, int uptodate)
 			sdp->device->changed = 1;
 		}
 	}
+
+	if (req->sense_len)
+		memcpy(srp->sense_b, req->sense, SCSI_SENSE_BUFFERSIZE);
+
 	/* Rely on write phase to clean out srp status values, so no "else" */
 
 	/*
@@ -1342,8 +1345,7 @@ sg_rq_end_io(struct request *rq, int uptodate)
 	 * blk_rq_unmap_user() can be called from user context.
 	 */
 	srp->rq = NULL;
-	if (rq->cmd != rq->__cmd)
-		kfree(rq->cmd);
+	scsi_req_free_cmd(scsi_req(rq));
 	__blk_put_request(rq->q, rq);
 
 	write_lock_irqsave(&sfp->rq_list_lock, iflags);
@@ -1658,6 +1660,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
 {
 	int res;
 	struct request *rq;
+	struct scsi_request *req;
 	Sg_fd *sfp = srp->parentfp;
 	sg_io_hdr_t *hp = &srp->header;
 	int dxfer_len = (int) hp->dxfer_len;
@@ -1700,17 +1703,17 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
 		kfree(long_cmdp);
 		return PTR_ERR(rq);
 	}
+	req = scsi_req(rq);
 
-	blk_rq_set_block_pc(rq);
+	scsi_req_init(rq);
 
 	if (hp->cmd_len > BLK_MAX_CDB)
-		rq->cmd = long_cmdp;
-	memcpy(rq->cmd, cmd, hp->cmd_len);
-	rq->cmd_len = hp->cmd_len;
+		req->cmd = long_cmdp;
+	memcpy(req->cmd, cmd, hp->cmd_len);
+	req->cmd_len = hp->cmd_len;
 
 	srp->rq = rq;
 	rq->end_io_data = srp;
-	rq->sense = srp->sense_b;
 	rq->retries = SG_DEFAULT_RETRIES;
 
 	if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE))
@@ -1786,8 +1789,7 @@ sg_finish_rem_req(Sg_request *srp)
 		ret = blk_rq_unmap_user(srp->bio);
 
 	if (srp->rq) {
-		if (srp->rq->cmd != srp->rq->__cmd)
-			kfree(srp->rq->cmd);
+		scsi_req_free_cmd(scsi_req(srp->rq));
 		blk_put_request(srp->rq);
 	}
 
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 5f35b86..4af9001 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -475,7 +475,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
 	ktime_t now;
 
 	now = ktime_get();
-	if (req->cmd[0] == WRITE_6) {
+	if (scsi_req(req)->cmd[0] == WRITE_6) {
 		now = ktime_sub(now, STp->stats->write_time);
 		atomic64_add(ktime_to_ns(now), &STp->stats->tot_write_time);
 		atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time);
@@ -489,7 +489,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
 		} else
 			atomic64_add(atomic_read(&STp->stats->last_write_size),
 				&STp->stats->write_byte_cnt);
-	} else if (req->cmd[0] == READ_6) {
+	} else if (scsi_req(req)->cmd[0] == READ_6) {
 		now = ktime_sub(now, STp->stats->read_time);
 		atomic64_add(ktime_to_ns(now), &STp->stats->tot_read_time);
 		atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time);
@@ -514,15 +514,18 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
 static void st_scsi_execute_end(struct request *req, int uptodate)
 {
 	struct st_request *SRpnt = req->end_io_data;
+	struct scsi_request *rq = scsi_req(req);
 	struct scsi_tape *STp = SRpnt->stp;
 	struct bio *tmp;
 
 	STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
-	STp->buffer->cmdstat.residual = req->resid_len;
+	STp->buffer->cmdstat.residual = rq->resid_len;
 
 	st_do_stats(STp, req);
 
 	tmp = SRpnt->bio;
+	if (rq->sense_len)
+		memcpy(SRpnt->sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
 	if (SRpnt->waiting)
 		complete(SRpnt->waiting);
 
@@ -535,6 +538,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
 			   int timeout, int retries)
 {
 	struct request *req;
+	struct scsi_request *rq;
 	struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data;
 	int err = 0;
 	int write = (data_direction == DMA_TO_DEVICE);
@@ -544,8 +548,8 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
 			      GFP_KERNEL);
 	if (IS_ERR(req))
 		return DRIVER_ERROR << 24;
-
-	blk_rq_set_block_pc(req);
+	rq = scsi_req(req);
+	scsi_req_init(req);
 	req->rq_flags |= RQF_QUIET;
 
 	mdata->null_mapped = 1;
@@ -571,11 +575,9 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
 	}
 
 	SRpnt->bio = req->bio;
-	req->cmd_len = COMMAND_SIZE(cmd[0]);
-	memset(req->cmd, 0, BLK_MAX_CDB);
-	memcpy(req->cmd, cmd, req->cmd_len);
-	req->sense = SRpnt->sense;
-	req->sense_len = 0;
+	rq->cmd_len = COMMAND_SIZE(cmd[0]);
+	memset(rq->cmd, 0, BLK_MAX_CDB);
+	memcpy(rq->cmd, cmd, rq->cmd_len);
 	req->timeout = timeout;
 	req->retries = retries;
 	req->end_io_data = SRpnt;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 04d7aa7..e52f4e1 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -1013,7 +1013,7 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 		goto fail;
 	}
 
-	blk_rq_set_block_pc(req);
+	scsi_req_init(req);
 
 	if (sgl) {
 		ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
@@ -1023,10 +1023,8 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 
 	req->end_io = pscsi_req_done;
 	req->end_io_data = cmd;
-	req->cmd_len = scsi_command_size(pt->pscsi_cdb);
-	req->cmd = &pt->pscsi_cdb[0];
-	req->sense = &pt->pscsi_sense[0];
-	req->sense_len = 0;
+	scsi_req(req)->cmd_len = scsi_command_size(pt->pscsi_cdb);
+	scsi_req(req)->cmd = &pt->pscsi_cdb[0];
 	if (pdv->pdv_sd->type == TYPE_DISK)
 		req->timeout = PS_TIMEOUT_DISK;
 	else
@@ -1075,7 +1073,7 @@ static void pscsi_req_done(struct request *req, int uptodate)
 	struct pscsi_plugin_task *pt = cmd->priv;
 
 	pt->pscsi_result = req->errors;
-	pt->pscsi_resid = req->resid_len;
+	pt->pscsi_resid = scsi_req(req)->resid_len;
 
 	cmd->scsi_status = status_byte(pt->pscsi_result) << 1;
 	if (cmd->scsi_status) {
@@ -1096,6 +1094,7 @@ static void pscsi_req_done(struct request *req, int uptodate)
 		break;
 	}
 
+	memcpy(pt->pscsi_sense, scsi_req(req)->sense, TRANSPORT_SENSE_BUFFER);
 	__blk_put_request(req->q, req);
 	kfree(pt);
 }
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 0780ff8..930d98c 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -10,6 +10,7 @@
 #include <linux/nfsd/debug.h>
 #include <scsi/scsi_proto.h>
 #include <scsi/scsi_common.h>
+#include <scsi/scsi_request.h>
 
 #include "blocklayoutxdr.h"
 #include "pnfs.h"
@@ -213,6 +214,7 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
 {
 	struct request_queue *q = bdev->bd_disk->queue;
 	struct request *rq;
+	struct scsi_request *req;
 	size_t bufflen = 252, len, id_len;
 	u8 *buf, *d, type, assoc;
 	int error;
@@ -226,18 +228,19 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
 		error = -ENOMEM;
 		goto out_free_buf;
 	}
-	blk_rq_set_block_pc(rq);
+	req = scsi_req(rq);
+	scsi_req_init(rq);
 
 	error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
 	if (error)
 		goto out_put_request;
 
-	rq->cmd[0] = INQUIRY;
-	rq->cmd[1] = 1;
-	rq->cmd[2] = 0x83;
-	rq->cmd[3] = bufflen >> 8;
-	rq->cmd[4] = bufflen & 0xff;
-	rq->cmd_len = COMMAND_SIZE(INQUIRY);
+	req->cmd[0] = INQUIRY;
+	req->cmd[1] = 1;
+	req->cmd[2] = 0x83;
+	req->cmd[3] = bufflen >> 8;
+	req->cmd[4] = bufflen & 0xff;
+	req->cmd_len = COMMAND_SIZE(INQUIRY);
 
 	error = blk_execute_rq(rq->q, NULL, rq, 1);
 	if (error) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 648ecf5..d7f117f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -227,17 +227,7 @@ struct request {
 
 	int errors;
 
-	/*
-	 * when request is used as a packet command carrier
-	 */
-	unsigned char __cmd[BLK_MAX_CDB];
-	unsigned char *cmd;
-	unsigned short cmd_len;
-
 	unsigned int extra_len;	/* length of alignment and padding */
-	unsigned int sense_len;
-	unsigned int resid_len;	/* residual count */
-	void *sense;
 
 	unsigned long deadline;
 	struct list_head timeout_list;
@@ -919,7 +909,6 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
-extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index e417f08..36767ec 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -112,9 +112,10 @@ struct compat_blk_user_trace_setup {
 
 #if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK)
 
+int scsi_cmd_buf_len(struct request *rq);
 static inline int blk_cmd_buf_len(struct request *rq)
 {
-	return (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? rq->cmd_len * 3 : 1;
+	return (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? scsi_cmd_buf_len(rq) : 1;
 }
 
 extern void blk_dump_cmd(char *buf, struct request *rq);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index a633898..086fbe1 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -20,6 +20,7 @@
 #include <linux/mutex.h>
 /* for request_sense */
 #include <linux/cdrom.h>
+#include <scsi/scsi_cmnd.h>
 #include <asm/byteorder.h>
 #include <asm/io.h>
 
@@ -52,6 +53,11 @@ enum ata_cmd_type_bits {
 	((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \
 	 (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME)
 
+struct ide_request {
+	struct scsi_request sreq;
+	u8 sense[SCSI_SENSE_BUFFERSIZE];
+};
+
 /* Error codes returned in rq->errors to the higher part of the driver. */
 enum {
 	IDE_DRV_ERROR_GENERAL	= 101,
@@ -579,7 +585,7 @@ struct ide_drive_s {
 
 	/* current sense rq and buffer */
 	bool sense_rq_armed;
-	struct request sense_rq;
+	struct request *sense_rq;
 	struct request_sense sense_data;
 };
 
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 9fc1aec..f708f1a 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -8,6 +8,7 @@
 #include <linux/timer.h>
 #include <linux/scatterlist.h>
 #include <scsi/scsi_device.h>
+#include <scsi/scsi_request.h>
 
 struct Scsi_Host;
 struct scsi_driver;
@@ -57,6 +58,7 @@ struct scsi_pointer {
 #define SCMD_TAGGED		(1 << 0)
 
 struct scsi_cmnd {
+	struct scsi_request req;
 	struct scsi_device *device;
 	struct list_head list;  /* scsi_cmnd participates in queue lists */
 	struct list_head eh_entry; /* entry for the host eh_cmd_q */
diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h
new file mode 100644
index 0000000..c4d86c6
--- /dev/null
+++ b/include/scsi/scsi_request.h
@@ -0,0 +1,28 @@
+#ifndef _SCSI_SCSI_REQUEST_H
+#define _SCSI_SCSI_REQUEST_H
+
+#include <linux/blk-mq.h>
+
+struct scsi_request {
+	unsigned char	__cmd[BLK_MAX_CDB];
+	unsigned char	*cmd;
+	unsigned short	cmd_len;
+	unsigned int	sense_len;
+	unsigned int	resid_len;	/* residual count */
+	void		*sense;
+};
+
+static inline struct scsi_request *scsi_req(struct request *rq)
+{
+	return blk_mq_rq_to_pdu(rq);
+}
+
+void scsi_req_init(struct request *);
+
+static inline void scsi_req_free_cmd(struct scsi_request *req)
+{
+	if (req->cmd != req->__cmd)
+		kfree(req->cmd);
+}
+
+#endif /* _SCSI_SCSI_REQUEST_H */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 95cecbf..69c36e6a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -27,6 +27,7 @@
 #include <linux/time.h>
 #include <linux/uaccess.h>
 #include <linux/list.h>
+#include <scsi/scsi_request.h>
 
 #include <trace/events/block.h>
 
@@ -715,7 +716,8 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		what |= BLK_TC_ACT(BLK_TC_PC);
 		__blk_add_trace(bt, 0, nr_bytes, req_op(rq), rq->cmd_flags,
-				what, rq->errors, rq->cmd_len, rq->cmd);
+				what, rq->errors,
+				scsi_req(rq)->cmd_len, scsi_req(rq)->cmd);
 	} else  {
 		what |= BLK_TC_ACT(BLK_TC_FS);
 		__blk_add_trace(bt, blk_rq_pos(rq), nr_bytes, req_op(rq),
@@ -1751,32 +1753,32 @@ void blk_trace_remove_sysfs(struct device *dev)
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
 #ifdef CONFIG_EVENT_TRACING
-
-void blk_dump_cmd(char *buf, struct request *rq)
+static void blk_dump_scsi_cmd(char *buf, struct scsi_request *req)
 {
 	int i, end;
-	int len = rq->cmd_len;
-	unsigned char *cmd = rq->cmd;
 
-	if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
-		buf[0] = '\0';
-		return;
-	}
-
-	for (end = len - 1; end >= 0; end--)
-		if (cmd[end])
+	for (end = req->cmd_len - 1; end >= 0; end--)
+		if (req->cmd[end])
 			break;
 	end++;
 
-	for (i = 0; i < len; i++) {
-		buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]);
-		if (i == end && end != len - 1) {
+	for (i = 0; i < req->cmd_len; i++) {
+		buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", req->cmd[i]);
+		if (i == end && end != req->cmd_len - 1) {
 			sprintf(buf, " ..");
 			break;
 		}
 	}
 }
 
+void blk_dump_cmd(char *buf, struct request *rq)
+{
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
+		blk_dump_scsi_cmd(buf, scsi_req(rq));
+	else
+		buf[0] = '\0';
+}
+
 void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes)
 {
 	int i = 0;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* [PATCH 18/18] block: don't assign cmd_flags in __blk_rq_prep_clone
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
                   ` (16 preceding siblings ...)
  2017-01-25 17:25 ` [PATCH 17/18] block: split scsi_request out of struct request Christoph Hellwig
@ 2017-01-25 17:25 ` Christoph Hellwig
  2017-01-26  3:31   ` Martin K. Petersen
  2017-01-26 18:29   ` Bart Van Assche
                   ` (2 subsequent siblings)
  20 siblings, 1 reply; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-25 17:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

These days we have the proper flags set since request allocation time.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 block/blk-core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 33c5d05e..6bf5ba0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2983,7 +2983,6 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
 	dst->cpu = src->cpu;
-	dst->cmd_flags = src->cmd_flags | REQ_NOMERGE;
 	dst->cmd_type = src->cmd_type;
 	dst->__sector = blk_rq_pos(src);
 	dst->__data_len = blk_rq_bytes(src);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* Re: [PATCH 01/18] block: add a op_is_flush helper
  2017-01-25 17:25 ` [PATCH 01/18] block: add a op_is_flush helper Christoph Hellwig
@ 2017-01-26  2:58   ` Martin K. Petersen
  2017-01-26 22:38   ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  2:58 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> This centralizes the checks for bios that needs to be go into
Christoph> the flush state machine.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request
  2017-01-25 17:25 ` [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request Christoph Hellwig
@ 2017-01-26  2:59   ` Martin K. Petersen
  2017-01-26 23:18     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  2:59 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> No need for the local variables, the bio is still live and we
Christoph> can just assigned the bits we want directly.  Make me wonder
Christoph> why we can't assign all the bio flags to start with.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 03/18] block: fix elevator init check
  2017-01-25 17:25 ` [PATCH 03/18] block: fix elevator init check Christoph Hellwig
@ 2017-01-26  3:01   ` Martin K. Petersen
  2017-01-26 23:21     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:01 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> We can't initalize the elevator fields for flushes as flush
Christoph> share space in struct request with the elevator data.  But
Christoph> currently we can't commnicate that a request is a flush

communicate

Christoph> through blk_get_request as we can only pass READ or WRITE,
Christoph> and the low-level code looks at the possible NULL bio to
Christoph> check for a flush.

Christoph> Fix this by allowing to pass any block op and flags, and by
Christoph> checking for the flush flags in __get_request.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 04/18] block: simplify blk_init_allocated_queue
  2017-01-25 17:25 ` [PATCH 04/18] block: simplify blk_init_allocated_queue Christoph Hellwig
@ 2017-01-26  3:02   ` Martin K. Petersen
  2017-01-26 23:27     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> Return an errno value instead of the passed in queue so that
Christoph> the callers don't have to keep track of two queues, and move
Christoph> the assignment of the request_fn and lock to the caller as
Christoph> passing them as argument doesn't simplify anything.  While
Christoph> we're at it also remove two pointless NULL assignments, given
Christoph> that the request structure is zeroed on allocation.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 05/18] block: allow specifying size for extra command data
  2017-01-25 17:25 ` [PATCH 05/18] block: allow specifying size for extra command data Christoph Hellwig
@ 2017-01-26  3:15   ` Martin K. Petersen
  2017-01-27 16:12       ` Christoph Hellwig
  0 siblings, 1 reply; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:15 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph,

Christoph> This mirrors the blk-mq capabilities to allocate extra
Christoph> drivers-specific data behind struct request by setting a
Christoph> cmd_size field, as well as having a constructor / destructor
Christoph> for it.

Nice!

A couple of minor nits:

+static void *alloc_request_size(gfp_t gfp_mask, void *data)

I like alloc_request_simple() but alloc_request_size() seems a bit
contrived. _reserve? _extra? _special? Don't have any good suggestions,
I'm afraid.

Also a bit heavy on the else brackets a couple of places. But no biggie.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 08/18] scsi_dh_rdac: switch to scsi_execute_req_flags()
  2017-01-25 17:25 ` [PATCH 08/18] scsi_dh_rdac: switch to scsi_execute_req_flags() Christoph Hellwig
@ 2017-01-26  3:18   ` Martin K. Petersen
  0 siblings, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:18 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel, Hannes Reinecke,
	Hannes Reinecke

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> From: Hannes Reinecke <hare@suse.de> Switch to
Christoph> scsi_execute_req_flags() and scsi_get_vpd_page() instead of
Christoph> open-coding it.  Using scsi_execute_req_flags() will set
Christoph> REQ_QUIET and REQ_PREEMPT, but this is okay as we're
Christoph> evaluating the errors anyway and should be able to send the
Christoph> command even if the device is quiesced.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 09/18] scsi_dh_emc: switch to scsi_execute_req_flags()
  2017-01-25 17:25 ` [PATCH 09/18] scsi_dh_emc: " Christoph Hellwig
@ 2017-01-26  3:19   ` Martin K. Petersen
  0 siblings, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:19 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel, Hannes Reinecke,
	Hannes Reinecke

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> From: Hannes Reinecke <hare@suse.de> Switch to
Christoph> scsi_execute_req_flags() and scsi_get_vpd_page() instead of
Christoph> open-coding it.  Using scsi_execute_req_flags() will set
Christoph> REQ_QUIET and REQ_PREEMPT, but this is okay as we're
Christoph> evaluating the errors anyway and should be able to send the
Christoph> command even if the device is quiesced.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 10/18] scsi_dh_hp_sw: switch to scsi_execute_req_flags()
  2017-01-25 17:25 ` [PATCH 10/18] scsi_dh_hp_sw: " Christoph Hellwig
@ 2017-01-26  3:20     ` Martin K. Petersen
  0 siblings, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:20 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-block, Hannes Reinecke, linux-raid, Mike Snitzer,
	linux-scsi, Jens Axboe, dm-devel, Junichi Nomura

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> From: Hannes Reinecke <hare@suse.de> Switch to
Christoph> scsi_execute_req_flags() instead of using the block interface
Christoph> directly.  This will set REQ_QUIET and REQ_PREEMPT, but this
Christoph> is okay as we're evaluating the errors anyway and should be
Christoph> able to send the command even if the device is quiesced.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 10/18] scsi_dh_hp_sw: switch to scsi_execute_req_flags()
@ 2017-01-26  3:20     ` Martin K. Petersen
  0 siblings, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:20 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel, Hannes Reinecke,
	Hannes Reinecke

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> From: Hannes Reinecke <hare@suse.de> Switch to
Christoph> scsi_execute_req_flags() instead of using the block interface
Christoph> directly.  This will set REQ_QUIET and REQ_PREEMPT, but this
Christoph> is okay as we're evaluating the errors anyway and should be
Christoph> able to send the command even if the device is quiesced.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 11/18] scsi: remove gfp_flags member in scsi_host_cmd_pool
  2017-01-25 17:25 ` [PATCH 11/18] scsi: remove gfp_flags member in scsi_host_cmd_pool Christoph Hellwig
@ 2017-01-26  3:21   ` Martin K. Petersen
  2017-01-27 17:38     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:21 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> When using the slab allocator we already decide at cache
Christoph> creation time if an allocation comes from a GFP_DMA pool
Christoph> using the SLAB_CACHE_DMA flag, and there is no point passing
Christoph> the kmalloc-family only GFP_DMA flag to kmem_cache_alloc.
Christoph> Drop all the infrastructure for doing so.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 12/18] scsi: respect unchecked_isa_dma for blk-mq
  2017-01-25 17:25 ` [PATCH 12/18] scsi: respect unchecked_isa_dma for blk-mq Christoph Hellwig
@ 2017-01-26  3:23   ` Martin K. Petersen
  2017-01-27 17:45     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:23 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> Currently blk-mq always allocates the sense buffer using
Christoph> normal GFP_KERNEL allocation.  Refactor the cmd pool code to
Christoph> split the cmd and sense allocation and share the code to
Christoph> allocate the sense buffers as well as the sense buffer slab
Christoph> caches between the legacy and blk-mq path.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 13/18] scsi: remove scsi_cmd_dma_pool
  2017-01-25 17:25 ` [PATCH 13/18] scsi: remove scsi_cmd_dma_pool Christoph Hellwig
@ 2017-01-26  3:24   ` Martin K. Petersen
  2017-01-27 17:51     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:24 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> There is no need for GFP_DMA allocations of the scsi_cmnd
Christoph> structures themselves, all that might be DMAed to or from is
Christoph> the actual payload, or the sense buffers.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 14/18] scsi: remove __scsi_alloc_queue
  2017-01-25 17:25 ` [PATCH 14/18] scsi: remove __scsi_alloc_queue Christoph Hellwig
@ 2017-01-26  3:25   ` Martin K. Petersen
  2017-01-27 17:58     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:25 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> Instead do an internal export of __scsi_init_queue for the
Christoph> transport classes that export BSG nodes.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 15/18] scsi: allocate scsi_cmnd structures as part of struct request
  2017-01-25 17:25 ` [PATCH 15/18] scsi: allocate scsi_cmnd structures as part of struct request Christoph Hellwig
@ 2017-01-26  3:30   ` Martin K. Petersen
  2017-01-27 18:39     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:30 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> Rely on the new block layer functionality to allocate
Christoph> additional driver specific data behind struct request instead
Christoph> of implementing it in SCSI itѕelf.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 18/18] block: don't assign cmd_flags in __blk_rq_prep_clone
  2017-01-25 17:25 ` [PATCH 18/18] block: don't assign cmd_flags in __blk_rq_prep_clone Christoph Hellwig
@ 2017-01-26  3:31   ` Martin K. Petersen
  0 siblings, 0 replies; 172+ messages in thread
From: Martin K. Petersen @ 2017-01-26  3:31 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

>>>>> "Christoph" == Christoph Hellwig <hch@lst.de> writes:

Christoph> These days we have the proper flags set since request
Christoph> allocation time.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
@ 2017-01-26 18:29   ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request Christoph Hellwig
                     ` (19 subsequent siblings)
  20 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 18:29 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> Hi all,
> 
> this series splits the support for SCSI passthrough commands from the
> main struct request used all over the block layer into a separate
> scsi_request structure that drivers that want to support SCSI passthough
> need to embedded as the first thing into their request-private data,
> similar to how we handle NVMe passthrough commands.
> 
> To support this I've added support for that the private data after
> request structure to the legacy request path instead, so that it can
> be treated the same way as the blk-mq path.  Compare to the current
> scsi_cmnd allocator that actually is a major simplification.
> 
> Changes since V1:
>  - fix handling of a NULL sense pointer in __scsi_execute
>  - clean up handling of the flush flags in the block layer and MD
>  - additional small cleanup in dm-rq

Hello Christoph,

Thanks for having fixed the NULL pointer issue I had reported for v1.
However, if I try to run my srp-test testsuite on top of your
hch-block/block-pc-refactor branch (commit ID a07dc3521034) merged
with v4.10-rc5 the following appears on the console:

[  707.317403] BUG: scheduling while atomic: fio/9073/0x00000003
[  707.317404] 1 lock held by fio/9073:
[  707.317404]  #0:  (rcu_read_lock){......}, at: [<ffffffff8132618e>] __blk_mq_run_hw_queue+0xde/0x1c0
[  707.317409] Modules linked in: dm_service_time ib_srp scsi_transport_srp target_core_user uio target_core_pscsi target_core_file ib_srpt target_core_iblock target_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat libcrc32c nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables af_packet ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm configfs ib_cm iw_cm msr mlx4_ib ib_core sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp ipmi_ssif kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul mlx4_core crc32c_intel ghash_clmulni_intel hid_generic pcbc usbhid iTCO_wdt tg3 aesni_intel
[  707.317445]  ptp iTCO_vendor_support aes_x86_64 crypto_simd pps_core glue_helper dcdbas ipmi_si ipmi_devintf libphy devlink lpc_ich cryptd pcspkr ipmi_msghandler mfd_core fjes mei_me tpm_tis button tpm_tis_core shpchp mei tpm wmi mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm sr_mod cdrom drm ehci_pci ehci_hcd usbcore usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua autofs4
[  707.317469] CPU: 6 PID: 9073 Comm: fio Tainted: G        W       4.10.0-rc5-dbg+ #1
[  707.317470] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
[  707.317470] Call Trace:
[  707.317473]  dump_stack+0x68/0x93
[  707.317475]  __schedule_bug+0x5b/0x80
[  707.317477]  __schedule+0x762/0xb00
[  707.317479]  schedule+0x38/0x90
[  707.317481]  schedule_timeout+0x2fe/0x640
[  707.317491]  io_schedule_timeout+0x9f/0x110
[  707.317493]  blk_mq_get_tag+0x158/0x260
[  707.317496]  __blk_mq_alloc_request+0x16/0xe0
[  707.317498]  blk_mq_sched_get_request+0x30d/0x360
[  707.317502]  blk_mq_alloc_request+0x3b/0x90
[  707.317505]  blk_get_request+0x2f/0x110
[  707.317507]  multipath_clone_and_map+0xcd/0x140 [dm_multipath]
[  707.317512]  map_request+0x3c/0x290 [dm_mod]
[  707.317517]  dm_mq_queue_rq+0x77/0x100 [dm_mod]
[  707.317519]  blk_mq_dispatch_rq_list+0x1ff/0x320
[  707.317521]  blk_mq_sched_dispatch_requests+0xa9/0xe0
[  707.317523]  __blk_mq_run_hw_queue+0x122/0x1c0
[  707.317528]  blk_mq_run_hw_queue+0x84/0x90
[  707.317530]  blk_mq_flush_plug_list+0x39f/0x480
[  707.317531]  blk_flush_plug_list+0xee/0x270
[  707.317533]  blk_finish_plug+0x27/0x40
[  707.317534]  do_io_submit+0x475/0x900
[  707.317537]  SyS_io_submit+0xb/0x10
[  707.317539]  entry_SYSCALL_64_fastpath+0x18/0xad

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-26 18:29   ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 18:29 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> Hi all,
>=20
> this series splits the support for SCSI passthrough commands from the
> main struct request used all over the block layer into a separate
> scsi_request structure that drivers that want to support SCSI passthough
> need to embedded as the first thing into their request-private data,
> similar to how we handle NVMe passthrough commands.
>=20
> To support this I've added support for that the private data after
> request structure to the legacy request path instead, so that it can
> be treated the same way as the blk-mq path.  Compare to the current
> scsi_cmnd allocator that actually is a major simplification.
>=20
> Changes since V1:
>  - fix handling of a NULL sense pointer in __scsi_execute
>  - clean up handling of the flush flags in the block layer and MD
>  - additional small cleanup in dm-rq

Hello Christoph,

Thanks for having fixed the NULL pointer issue I had reported for v1.
However, if I try to run my srp-test testsuite on top of your
hch-block/block-pc-refactor=A0branch (commit ID a07dc3521034) merged
with v4.10-rc5 the following appears on the console:

[  707.317403] BUG: scheduling while atomic: fio/9073/0x00000003
[  707.317404] 1 lock held by fio/9073:
[  707.317404]  #0:  (rcu_read_lock){......}, at: [<ffffffff8132618e>] __bl=
k_mq_run_hw_queue+0xde/0x1c0
[  707.317409] Modules linked in: dm_service_time ib_srp scsi_transport_srp=
 target_core_user uio target_core_pscsi target_core_file ib_srpt target_cor=
e_iblock target_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MASQ=
UERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat libcrc32c nf_c=
onntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject=
_ipv4 xt_tcpudp tun bridge stp llc ebtable_filter ebtables ip6table_filter =
ip6_tables iptable_filter ip_tables x_tables af_packet ib_ipoib rdma_ucm ib=
_ucm ib_uverbs ib_umad rdma_cm configfs ib_cm iw_cm msr mlx4_ib ib_core sb_=
edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp ipmi_ssif kvm=
_intel kvm irqbypass crct10dif_pclmul crc32_pclmul mlx4_core crc32c_intel g=
hash_clmulni_intel hid_generic pcbc usbhid iTCO_wdt tg3 aesni_intel
[  707.317445]  ptp iTCO_vendor_support aes_x86_64 crypto_simd pps_core glu=
e_helper dcdbas ipmi_si ipmi_devintf libphy devlink lpc_ich cryptd pcspkr i=
pmi_msghandler mfd_core fjes mei_me tpm_tis button tpm_tis_core shpchp mei =
tpm wmi mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgb=
lt fb_sys_fops ttm sr_mod cdrom drm ehci_pci ehci_hcd usbcore usb_common sg=
 dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua autofs4
[  707.317469] CPU: 6 PID: 9073 Comm: fio Tainted: G        W       4.10.0-=
rc5-dbg+ #1
[  707.317470] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 1=
1/17/2014
[  707.317470] Call Trace:
[  707.317473]  dump_stack+0x68/0x93
[  707.317475]  __schedule_bug+0x5b/0x80
[  707.317477]  __schedule+0x762/0xb00
[  707.317479]  schedule+0x38/0x90
[  707.317481]  schedule_timeout+0x2fe/0x640
[  707.317491]  io_schedule_timeout+0x9f/0x110
[  707.317493]  blk_mq_get_tag+0x158/0x260
[  707.317496]  __blk_mq_alloc_request+0x16/0xe0
[  707.317498]  blk_mq_sched_get_request+0x30d/0x360
[  707.317502]  blk_mq_alloc_request+0x3b/0x90
[  707.317505]  blk_get_request+0x2f/0x110
[  707.317507]  multipath_clone_and_map+0xcd/0x140 [dm_multipath]
[  707.317512]  map_request+0x3c/0x290 [dm_mod]
[  707.317517]  dm_mq_queue_rq+0x77/0x100 [dm_mod]
[  707.317519]  blk_mq_dispatch_rq_list+0x1ff/0x320
[  707.317521]  blk_mq_sched_dispatch_requests+0xa9/0xe0
[  707.317523]  __blk_mq_run_hw_queue+0x122/0x1c0
[  707.317528]  blk_mq_run_hw_queue+0x84/0x90
[  707.317530]  blk_mq_flush_plug_list+0x39f/0x480
[  707.317531]  blk_flush_plug_list+0xee/0x270
[  707.317533]  blk_finish_plug+0x27/0x40
[  707.317534]  do_io_submit+0x475/0x900
[  707.317537]  SyS_io_submit+0xb/0x10
[  707.317539]  entry_SYSCALL_64_fastpath+0x18/0xad

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 18:29   ` Bart Van Assche
  (?)
@ 2017-01-26 18:44   ` Jens Axboe
  2017-01-26 18:52       ` Bart Van Assche
  -1 siblings, 1 reply; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 18:44 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/26/2017 11:29 AM, Bart Van Assche wrote:
> On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
>> Hi all,
>>
>> this series splits the support for SCSI passthrough commands from the
>> main struct request used all over the block layer into a separate
>> scsi_request structure that drivers that want to support SCSI passthough
>> need to embedded as the first thing into their request-private data,
>> similar to how we handle NVMe passthrough commands.
>>
>> To support this I've added support for that the private data after
>> request structure to the legacy request path instead, so that it can
>> be treated the same way as the blk-mq path.  Compare to the current
>> scsi_cmnd allocator that actually is a major simplification.
>>
>> Changes since V1:
>>  - fix handling of a NULL sense pointer in __scsi_execute
>>  - clean up handling of the flush flags in the block layer and MD
>>  - additional small cleanup in dm-rq
> 
> Hello Christoph,
> 
> Thanks for having fixed the NULL pointer issue I had reported for v1.
> However, if I try to run my srp-test testsuite on top of your
> hch-block/block-pc-refactor branch (commit ID a07dc3521034) merged
> with v4.10-rc5 the following appears on the console:

I think this may be my bug - does the below help?


diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index d05061f27bb1..56b92db944ae 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -117,7 +117,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 	ctx = blk_mq_get_ctx(q);
 	hctx = blk_mq_map_queue(q, ctx->cpu);
 
-	blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
+	blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
 
 	if (e) {
 		data->flags |= BLK_MQ_REQ_INTERNAL;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index dcb567642db7..9e4ed04f398c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -253,7 +253,7 @@ EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		unsigned int flags)
 {
-	struct blk_mq_alloc_data alloc_data;
+	struct blk_mq_alloc_data alloc_data = { .flags = flags };
 	struct request *rq;
 	int ret;
 
@@ -1382,7 +1382,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = op_is_sync(bio->bi_opf);
 	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
-	struct blk_mq_alloc_data data;
+	struct blk_mq_alloc_data data = { 0, };
 	struct request *rq;
 	unsigned int request_count = 0, srcu_idx;
 	struct blk_plug *plug;
@@ -1504,7 +1504,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
 	struct blk_plug *plug;
 	unsigned int request_count = 0;
-	struct blk_mq_alloc_data data;
+	struct blk_mq_alloc_data data = { 0, };
 	struct request *rq;
 	blk_qc_t cookie;
 	unsigned int wb_acct;

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 18:44   ` Jens Axboe
@ 2017-01-26 18:52       ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 18:52 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 11:44 -0700, Jens Axboe wrote:
> I think this may be my bug - does the below help?

Hello Jens,

What tree has that patch been generated against? It does not apply
cleanly on top of Christoph's tree:

$ git checkout hch-block-pc-refactor
$ patch -p1 --dry-run -f -s < ~/Re\:_split_scsi_passthrough_fields_out_of_struct_request_V2.mbox
1 out of 3 hunks FAILED

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-26 18:52       ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 18:52 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 11:44 -0700, Jens Axboe wrote:
> I think this may be my bug - does the below help?

Hello Jens,

What tree has that patch been generated against? It does not apply
cleanly on top of Christoph's tree:

$ git checkout hch-block-pc-refactor
$ patch -p1 --dry-run -f -s < ~/Re\:_split_scsi_passthrough_fields_out_of_s=
truct_request_V2.mbox
1 out of 3 hunks FAILED

Thanks,

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 18:52       ` Bart Van Assche
  (?)
@ 2017-01-26 18:57       ` Jens Axboe
  2017-01-26 18:59         ` hch
  -1 siblings, 1 reply; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 18:57 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/26/2017 11:52 AM, Bart Van Assche wrote:
> On Thu, 2017-01-26 at 11:44 -0700, Jens Axboe wrote:
>> I think this may be my bug - does the below help?
> 
> Hello Jens,
> 
> What tree has that patch been generated against? It does not apply
> cleanly on top of Christoph's tree:
> 
> $ git checkout hch-block-pc-refactor
> $ patch -p1 --dry-run -f -s < ~/Re\:_split_scsi_passthrough_fields_out_of_struct_request_V2.mbox
> 1 out of 3 hunks FAILED

It's against my for-4.11/block, which you were running under Christoph's
patches. Maybe he's using an older version? In any case, should be
pretty trivial for you to hand apply. Just ensure that .flags is set to
0 for the common cases, and inherit 'flags' when it is passed in.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 18:57       ` Jens Axboe
@ 2017-01-26 18:59         ` hch
  2017-01-26 19:01             ` Jens Axboe
  0 siblings, 1 reply; 172+ messages in thread
From: hch @ 2017-01-26 18:59 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Bart Van Assche, hch, linux-scsi, linux-raid, dm-devel,
	linux-block, snitzer, j-nomura

On Thu, Jan 26, 2017 at 11:57:36AM -0700, Jens Axboe wrote:
> It's against my for-4.11/block, which you were running under Christoph's
> patches. Maybe he's using an older version? In any case, should be
> pretty trivial for you to hand apply. Just ensure that .flags is set to
> 0 for the common cases, and inherit 'flags' when it is passed in.

No, the flush op cleanups you asked for last round create a conflict
with your patch.  They should be trivial to fix, though.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 18:59         ` hch
@ 2017-01-26 19:01             ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 19:01 UTC (permalink / raw)
  To: hch
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura,
	Bart Van Assche

On 01/26/2017 11:59 AM, hch@lst.de wrote:
> On Thu, Jan 26, 2017 at 11:57:36AM -0700, Jens Axboe wrote:
>> It's against my for-4.11/block, which you were running under Christoph's
>> patches. Maybe he's using an older version? In any case, should be
>> pretty trivial for you to hand apply. Just ensure that .flags is set to
>> 0 for the common cases, and inherit 'flags' when it is passed in.
> 
> No, the flush op cleanups you asked for last round create a conflict
> with your patch.  They should be trivial to fix, though.

Ah, makes sense. And yes, as I said, should be trivial to hand apply the
hunk that does fail.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-26 19:01             ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 19:01 UTC (permalink / raw)
  To: hch
  Cc: Bart Van Assche, linux-scsi, linux-raid, dm-devel, linux-block,
	snitzer, j-nomura

On 01/26/2017 11:59 AM, hch@lst.de wrote:
> On Thu, Jan 26, 2017 at 11:57:36AM -0700, Jens Axboe wrote:
>> It's against my for-4.11/block, which you were running under Christoph's
>> patches. Maybe he's using an older version? In any case, should be
>> pretty trivial for you to hand apply. Just ensure that .flags is set to
>> 0 for the common cases, and inherit 'flags' when it is passed in.
> 
> No, the flush op cleanups you asked for last round create a conflict
> with your patch.  They should be trivial to fix, though.

Ah, makes sense. And yes, as I said, should be trivial to hand apply the
hunk that does fail.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-26 19:01             ` Jens Axboe
  (?)
@ 2017-01-26 20:47             ` Bart Van Assche
  2017-01-26 20:54                 ` [dm-devel] " Jens Axboe
  -1 siblings, 1 reply; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 20:47 UTC (permalink / raw)
  To: Jens Axboe, hch
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura,
	Bart Van Assche

On 01/26/2017 11:01 AM, Jens Axboe wrote:
> On 01/26/2017 11:59 AM, hch@lst.de wrote:
>> On Thu, Jan 26, 2017 at 11:57:36AM -0700, Jens Axboe wrote:
>>> It's against my for-4.11/block, which you were running under Christoph's
>>> patches. Maybe he's using an older version? In any case, should be
>>> pretty trivial for you to hand apply. Just ensure that .flags is set to
>>> 0 for the common cases, and inherit 'flags' when it is passed in.
>>
>> No, the flush op cleanups you asked for last round create a conflict
>> with your patch.  They should be trivial to fix, though.
> 
> Ah, makes sense. And yes, as I said, should be trivial to hand apply the
> hunk that does fail.

Hello Jens and Christoph,

With the below patch applied the test got a little further but did not
pass unfortunately. I tried to analyze the new call stack but it's not yet
clear to me what is going on.
 
The patch I had applied on Christoph's tree:

---
 block/blk-mq-sched.c | 2 +-
 block/blk-mq.c       | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 3bd66e50ec84..7c9318755fab 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -116,7 +116,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 	ctx = blk_mq_get_ctx(q);
 	hctx = blk_mq_map_queue(q, ctx->cpu);
 
-	blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
+	blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
 
 	if (e) {
 		data->flags |= BLK_MQ_REQ_INTERNAL;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 83640869d9e4..6697626e5d32 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -248,7 +248,7 @@ EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		unsigned int flags)
 {
-	struct blk_mq_alloc_data alloc_data;
+	struct blk_mq_alloc_data alloc_data = { .flags = flags };
 	struct request *rq;
 	int ret;
 
@@ -1369,7 +1369,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = op_is_sync(bio->bi_opf);
 	const int is_flush_fua = op_is_flush(bio->bi_opf);
-	struct blk_mq_alloc_data data;
+	struct blk_mq_alloc_data data = { };
 	struct request *rq;
 	unsigned int request_count = 0, srcu_idx;
 	struct blk_plug *plug;
@@ -1491,7 +1491,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	const int is_flush_fua = op_is_flush(bio->bi_opf);
 	struct blk_plug *plug;
 	unsigned int request_count = 0;
-	struct blk_mq_alloc_data data;
+	struct blk_mq_alloc_data data = { };
 	struct request *rq;
 	blk_qc_t cookie;
 	unsigned int wb_acct;
-- 
2.11.0


The new call trace:

[ 4277.729785] BUG: scheduling while atomic: mount/9209/0x00000004
[ 4277.729824] 2 locks held by mount/9209:
[ 4277.729846]  #0:  (&type->s_umount_key#25/1){+.+.+.}, at: [<ffffffff811ef6fd>] sget_userns+0x2bd/0x500
[ 4277.729881]  #1:  (rcu_read_lock){......}, at: [<ffffffff813261ae>] __blk_mq_run_hw_queue+0xde/0x1c0
[ 4277.729911] Modules linked in: dm_service_time ib_srp scsi_transport_srp target_core_user uio target_core_pscsi target_core_file ib_srpt target_core_iblock target_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_
ipv4 nf_nat libcrc32c nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables af_packet ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad 
rdma_cm configfs ib_cm iw_cm msr mlx4_ib ib_core sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ipmi_ssif kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel pcbc aesni_intel aes_x86_64 mlx4_core crypto_simd iTCO_wdt
[ 4277.730048]  tg3 iTCO_vendor_support dcdbas glue_helper ptp ipmi_si pcspkr pps_core devlink ipmi_devintf cryptd libphy fjes ipmi_msghandler tpm_tis mei_me tpm_tis_core lpc_ich mfd_core shpchp mei tpm wmi button hid_generic usbhid mgag200 i2c_algo_bit drm_kms_helper sysco
pyarea sysfillrect sysimgblt fb_sys_fops ttm sr_mod cdrom drm ehci_pci ehci_hcd usbcore usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua autofs4
[ 4277.730135] CPU: 11 PID: 9209 Comm: mount Not tainted 4.10.0-rc5-dbg+ #2
[ 4277.730159] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
[ 4277.730187] Call Trace:
[ 4277.730212]  dump_stack+0x68/0x93
[ 4277.730236]  __schedule_bug+0x5b/0x80
[ 4277.730259]  __schedule+0x762/0xb00
[ 4277.730281]  schedule+0x38/0x90
[ 4277.730302]  schedule_timeout+0x2fe/0x640
[ 4277.730324]  ? mark_held_locks+0x6f/0xa0
[ 4277.730349]  ? ktime_get+0x74/0x130
[ 4277.730370]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[ 4277.730391]  ? trace_hardirqs_on+0xd/0x10
[ 4277.730418]  ? ktime_get+0x98/0x130
[ 4277.730623]  ? __delayacct_blkio_start+0x1a/0x30
[ 4277.730647]  io_schedule_timeout+0x9f/0x110
[ 4277.730669]  blk_mq_get_tag+0x158/0x260
[ 4277.730691]  ? remove_wait_queue+0x70/0x70
[ 4277.730714]  __blk_mq_alloc_request+0x16/0xe0
[ 4277.730735]  blk_mq_sched_get_request+0x308/0x350
[ 4277.730757]  ? blk_mq_sched_bypass_insert+0x70/0x70
[ 4277.730781]  blk_mq_alloc_request+0x5e/0xb0
[ 4277.730805]  blk_get_request+0x31/0x110
[ 4277.730828]  multipath_clone_and_map+0xcd/0x140 [dm_multipath]
[ 4277.730854]  map_request+0x3c/0x290 [dm_mod]
[ 4277.730885]  dm_mq_queue_rq+0x77/0x100 [dm_mod]
[ 4277.730908]  blk_mq_dispatch_rq_list+0x1ff/0x320
[ 4277.730931]  blk_mq_sched_dispatch_requests+0xa9/0xe0
[ 4277.730955]  __blk_mq_run_hw_queue+0x122/0x1c0
[ 4277.730977]  ? __blk_mq_run_hw_queue+0xde/0x1c0
[ 4277.731000]  blk_mq_run_hw_queue+0x84/0x90
[ 4277.731022]  blk_sq_make_request+0x53c/0xc90
[ 4277.731044]  ? generic_make_request+0xca/0x290
[ 4277.731066]  generic_make_request+0xd7/0x290
[ 4277.731087]  submit_bio+0x5f/0x120
[ 4277.731108]  ? __find_get_block+0x27f/0x300
[ 4277.731129]  submit_bh_wbc+0x14d/0x180
[ 4277.731154]  ? __end_buffer_read_notouch+0x20/0x20
[ 4277.731177]  ll_rw_block+0xa8/0xb0
[ 4277.731203]  __breadahead+0x30/0x40
[ 4277.731232]  __ext4_get_inode_loc+0x3fe/0x4e0
[ 4277.731254]  ext4_iget+0x6b/0xbc0
[ 4277.731277]  ext4_fill_super+0x1c8b/0x33d0
[ 4277.731589]  mount_bdev+0x17b/0x1b0
[ 4277.731613]  ? ext4_calculate_overhead+0x430/0x430
[ 4277.731637]  ext4_mount+0x10/0x20
[ 4277.731659]  mount_fs+0xf/0xa0
[ 4277.731682]  vfs_kern_mount+0x66/0x170
[ 4277.731704]  do_mount+0x19b/0xd70
[ 4277.731726]  ? _copy_from_user+0x7a/0xb0
[ 4277.731748]  ? memdup_user+0x4e/0x80
[ 4277.731771]  SyS_mount+0x7e/0xd0
[ 4277.731793]  entry_SYSCALL_64_fastpath+0x18/0xad
[ 4277.731814] RIP: 0033:0x7fe575771afa
[ 4277.731835] RSP: 002b:00007fffd2261248 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
[ 4277.731858] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fe575771afa
[ 4277.731880] RDX: 0000556f0b7b1010 RSI: 0000556f0b7af1d0 RDI: 0000556f0b7afed0
[ 4277.731901] RBP: 0000556f0b7af060 R08: 0000000000000000 R09: 0000000000000020
[ 4277.731923] R10: 00000000c0ed0000 R11: 0000000000000246 R12: 00007fe575c731a4

^ permalink raw reply related	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 20:47             ` [dm-devel] " Bart Van Assche
@ 2017-01-26 20:54                 ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 20:54 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-raid, linux-scsi, snitzer, linux-block, dm-devel, j-nomura

On 01/26/2017 01:47 PM, Bart Van Assche wrote:
> On 01/26/2017 11:01 AM, Jens Axboe wrote:
>> On 01/26/2017 11:59 AM, hch@lst.de wrote:
>>> On Thu, Jan 26, 2017 at 11:57:36AM -0700, Jens Axboe wrote:
>>>> It's against my for-4.11/block, which you were running under Christoph's
>>>> patches. Maybe he's using an older version? In any case, should be
>>>> pretty trivial for you to hand apply. Just ensure that .flags is set to
>>>> 0 for the common cases, and inherit 'flags' when it is passed in.
>>>
>>> No, the flush op cleanups you asked for last round create a conflict
>>> with your patch.  They should be trivial to fix, though.
>>
>> Ah, makes sense. And yes, as I said, should be trivial to hand apply the
>> hunk that does fail.
> 
> Hello Jens and Christoph,
> 
> With the below patch applied the test got a little further but did not
> pass unfortunately. I tried to analyze the new call stack but it's not yet
> clear to me what is going on.
>  
> The patch I had applied on Christoph's tree:
> 
> ---
>  block/blk-mq-sched.c | 2 +-
>  block/blk-mq.c       | 6 +++---
>  2 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index 3bd66e50ec84..7c9318755fab 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -116,7 +116,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
>  	ctx = blk_mq_get_ctx(q);
>  	hctx = blk_mq_map_queue(q, ctx->cpu);
>  
> -	blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
> +	blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
>  
>  	if (e) {
>  		data->flags |= BLK_MQ_REQ_INTERNAL;
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 83640869d9e4..6697626e5d32 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -248,7 +248,7 @@ EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
>  struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
>  		unsigned int flags)
>  {
> -	struct blk_mq_alloc_data alloc_data;
> +	struct blk_mq_alloc_data alloc_data = { .flags = flags };
>  	struct request *rq;
>  	int ret;
>  
> @@ -1369,7 +1369,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
>  {
>  	const int is_sync = op_is_sync(bio->bi_opf);
>  	const int is_flush_fua = op_is_flush(bio->bi_opf);
> -	struct blk_mq_alloc_data data;
> +	struct blk_mq_alloc_data data = { };
>  	struct request *rq;
>  	unsigned int request_count = 0, srcu_idx;
>  	struct blk_plug *plug;
> @@ -1491,7 +1491,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
>  	const int is_flush_fua = op_is_flush(bio->bi_opf);
>  	struct blk_plug *plug;
>  	unsigned int request_count = 0;
> -	struct blk_mq_alloc_data data;
> +	struct blk_mq_alloc_data data = { };
>  	struct request *rq;
>  	blk_qc_t cookie;
>  	unsigned int wb_acct;

Looks correct to me. Your call path has blk_get_request() in it, I don't have
that in my tree. Is it passing in the right mask?

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-26 20:54                 ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 20:54 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On 01/26/2017 01:47 PM, Bart Van Assche wrote:
> On 01/26/2017 11:01 AM, Jens Axboe wrote:
>> On 01/26/2017 11:59 AM, hch@lst.de wrote:
>>> On Thu, Jan 26, 2017 at 11:57:36AM -0700, Jens Axboe wrote:
>>>> It's against my for-4.11/block, which you were running under Christoph's
>>>> patches. Maybe he's using an older version? In any case, should be
>>>> pretty trivial for you to hand apply. Just ensure that .flags is set to
>>>> 0 for the common cases, and inherit 'flags' when it is passed in.
>>>
>>> No, the flush op cleanups you asked for last round create a conflict
>>> with your patch.  They should be trivial to fix, though.
>>
>> Ah, makes sense. And yes, as I said, should be trivial to hand apply the
>> hunk that does fail.
> 
> Hello Jens and Christoph,
> 
> With the below patch applied the test got a little further but did not
> pass unfortunately. I tried to analyze the new call stack but it's not yet
> clear to me what is going on.
>  
> The patch I had applied on Christoph's tree:
> 
> ---
>  block/blk-mq-sched.c | 2 +-
>  block/blk-mq.c       | 6 +++---
>  2 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index 3bd66e50ec84..7c9318755fab 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -116,7 +116,7 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
>  	ctx = blk_mq_get_ctx(q);
>  	hctx = blk_mq_map_queue(q, ctx->cpu);
>  
> -	blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
> +	blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
>  
>  	if (e) {
>  		data->flags |= BLK_MQ_REQ_INTERNAL;
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 83640869d9e4..6697626e5d32 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -248,7 +248,7 @@ EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
>  struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
>  		unsigned int flags)
>  {
> -	struct blk_mq_alloc_data alloc_data;
> +	struct blk_mq_alloc_data alloc_data = { .flags = flags };
>  	struct request *rq;
>  	int ret;
>  
> @@ -1369,7 +1369,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
>  {
>  	const int is_sync = op_is_sync(bio->bi_opf);
>  	const int is_flush_fua = op_is_flush(bio->bi_opf);
> -	struct blk_mq_alloc_data data;
> +	struct blk_mq_alloc_data data = { };
>  	struct request *rq;
>  	unsigned int request_count = 0, srcu_idx;
>  	struct blk_plug *plug;
> @@ -1491,7 +1491,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
>  	const int is_flush_fua = op_is_flush(bio->bi_opf);
>  	struct blk_plug *plug;
>  	unsigned int request_count = 0;
> -	struct blk_mq_alloc_data data;
> +	struct blk_mq_alloc_data data = { };
>  	struct request *rq;
>  	blk_qc_t cookie;
>  	unsigned int wb_acct;

Looks correct to me. Your call path has blk_get_request() in it, I don't have
that in my tree. Is it passing in the right mask?

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-26 20:54                 ` [dm-devel] " Jens Axboe
@ 2017-01-26 21:01                   ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 21:01 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 13:54 -0700, Jens Axboe wrote:
> Your call path has blk_get_request() in it, I don't have
> that in my tree. Is it passing in the right mask?

Hello Jens,

There is only one blk_get_request() call in drivers/md/dm-mpath.c
and it looks as follows:

 	clone = blk_get_request(bdev_get_queue(bdev),
			rq->cmd_flags | REQ_NOMERGE,
			GFP_ATOMIC);

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-26 21:01                   ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 21:01 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 13:54 -0700, Jens Axboe wrote:
> Your call path has blk_get_request() in it, I don't have
> that in my tree. Is it passing in the right mask?

Hello Jens,

There is only one blk_get_request() call in drivers/md/dm-mpath.c
and it looks as follows:

 	clone =3D blk_get_request(bdev_get_queue(bdev),
			rq->cmd_flags | REQ_NOMERGE,
			GFP_ATOMIC);

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 21:01                   ` Bart Van Assche
@ 2017-01-26 21:12                     ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 21:12 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On 01/26/2017 02:01 PM, Bart Van Assche wrote:
> On Thu, 2017-01-26 at 13:54 -0700, Jens Axboe wrote:
>> Your call path has blk_get_request() in it, I don't have
>> that in my tree. Is it passing in the right mask?
> 
> Hello Jens,
> 
> There is only one blk_get_request() call in drivers/md/dm-mpath.c
> and it looks as follows:
> 
>  	clone = blk_get_request(bdev_get_queue(bdev),
> 			rq->cmd_flags | REQ_NOMERGE,
> 			GFP_ATOMIC);

Yeah, I found it in the dm patch. Looks fine to me, since
blk_mq_alloc_request() checks for __GFP_DIRECT_RECLAIM. Weird, it all
looks fine to me. Are you sure you tested with the patch? Either that,
or I'm smoking crack.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-26 21:12                     ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 21:12 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/26/2017 02:01 PM, Bart Van Assche wrote:
> On Thu, 2017-01-26 at 13:54 -0700, Jens Axboe wrote:
>> Your call path has blk_get_request() in it, I don't have
>> that in my tree. Is it passing in the right mask?
> 
> Hello Jens,
> 
> There is only one blk_get_request() call in drivers/md/dm-mpath.c
> and it looks as follows:
> 
>  	clone = blk_get_request(bdev_get_queue(bdev),
> 			rq->cmd_flags | REQ_NOMERGE,
> 			GFP_ATOMIC);

Yeah, I found it in the dm patch. Looks fine to me, since
blk_mq_alloc_request() checks for __GFP_DIRECT_RECLAIM. Weird, it all
looks fine to me. Are you sure you tested with the patch? Either that,
or I'm smoking crack.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 21:12                     ` [dm-devel] " Jens Axboe
@ 2017-01-26 21:47                       ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 21:47 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Thu, 2017-01-26 at 14:12 -0700, Jens Axboe wrote:
> On 01/26/2017 02:01 PM, Bart Van Assche wrote:
> > On Thu, 2017-01-26 at 13:54 -0700, Jens Axboe wrote:
> > > Your call path has blk_get_request() in it, I don't have
> > > that in my tree. Is it passing in the right mask?
> > 
> > Hello Jens,
> > 
> > There is only one blk_get_request() call in drivers/md/dm-mpath.c
> > and it looks as follows:
> > 
> >  	clone = blk_get_request(bdev_get_queue(bdev),
> > 			rq->cmd_flags | REQ_NOMERGE,
> > 			GFP_ATOMIC);
> 
> Yeah, I found it in the dm patch. Looks fine to me, since
> blk_mq_alloc_request() checks for __GFP_DIRECT_RECLAIM. Weird, it all
> looks fine to me. Are you sure you tested with the patch? Either that,
> or I'm smoking crack.

Hello Jens,

After I received your e-mail I noticed that there was a local
modification on the test system that was responsible for the schedule-
while-atomic complaint. Sorry for that. Anyway, I undid the merge with
the v4.10-rc5 code and repeated my test. This time the following call
stack appeared:

BUG: unable to handle kernel NULL pointer dereference at 000000000000005c
IP: blk_mq_sched_get_request+0x310/0x350
PGD 34bd9c067 
PUD 346b37067 
PMD 0 

Oops: 0000 [#1] SMP
Modules linked in: dm_service_time ib_srp scsi_transport_srp target_core_user uio target_core_pscsi target_core_file ib_srpt target_core_iblock target_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat libcrc32c nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables af_packet ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm configfs ib_cm iw_cm msr mlx4_ib ib_core sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp ipmi_ssif kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul mlx4_core crc32c_intel ghash_clmulni_intel pcbc aesni_intel aes_x86_64 tg3 iTCO_wdt crypto_simd dcdbas iTCO_vendor_support ptp glue_helper ipmi_si cryptd ipmi_devintf pps_core fjes devlink ipmi_msghandler pcspkr libphy tpm_tis tpm_tis_core tpm button mei_me lpc_ich wmi mei mfd_core shpchp hid_generic usbhid mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm sr_mod drm cdrom ehci_pci ehci_hcd usbcore usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua autofs4
CPU: 0 PID: 9231 Comm: fio Not tainted 4.10.0-rc4-dbg+ #1
Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
task: ffff88034c8c3140 task.stack: ffffc90005698000
RIP: 0010:blk_mq_sched_get_request+0x310/0x350
RSP: 0018:ffffc9000569bac8 EFLAGS: 00010246
RAX: ffff88034f430958 RBX: ffff88045ed2cef0 RCX: 0000000000000000
RDX: 000000000000001f RSI: ffff8803507bdcf8 RDI: 000000000000001f
RBP: ffffc9000569bb00 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: ffffc9000569bb18
R13: 000000000000c801 R14: 0000000000000000 R15: 0000000000000000
FS:  00007f65ca054700(0000) GS:ffff88046f200000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000000005c CR3: 000000034b0ed000 CR4: 00000000001406f0
Call Trace:
 blk_mq_alloc_request+0x5e/0xb0
 blk_get_request+0x2f/0x110
 multipath_clone_and_map+0xcd/0x140 [dm_multipath]
 map_request+0x3c/0x290 [dm_mod]
 dm_mq_queue_rq+0x77/0x100 [dm_mod]
 blk_mq_dispatch_rq_list+0x1ff/0x320
 blk_mq_sched_dispatch_requests+0xa9/0xe0
 __blk_mq_run_hw_queue+0x122/0x1c0
 blk_mq_run_hw_queue+0x84/0x90
 blk_mq_flush_plug_list+0x39f/0x480
 blk_flush_plug_list+0xee/0x270
 blk_finish_plug+0x27/0x40
 do_io_submit+0x475/0x900
 SyS_io_submit+0xb/0x10
 entry_SYSCALL_64_fastpath+0x18/0xad
RIP: 0033:0x7f65e4d05787
RSP: 002b:00007f65ca051948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007f65e4d05787
RDX: 00007f65a404f158 RSI: 0000000000000001 RDI: 00007f65f6bfd000
RBP: 0000000000000815 R08: 0000000000000001 R09: 00007f65a404e3e0
R10: 00007f65a4040000 R11: 0000000000000202 R12: 00000000000006d0
R13: 00007f65a404e930 R14: 0000000000001000 R15: 0000000000000830
Code: 67 ff ff ff e9 80 fe ff ff 48 89 df e8 ba c4 fe ff 31 c9 e9 60 ff ff ff 44 89 ee 4c 89 e7 e8 c8 6d ff ff 48 89 c1 49 8b 44 24 18 <48> 63 51 5c 48 8b 80 20 01 00 00 48 8b 80 80 00 00 00 48 89 0c 
RIP: blk_mq_sched_get_request+0x310/0x350 RSP: ffffc9000569bac8
CR2: 000000000000005c

(gdb) list *(blk_mq_sched_get_request+0x310)
0xffffffff8132dcf0 is in blk_mq_sched_get_request (block/blk-mq-sched.c:136).
131                                     rq->rq_flags |= RQF_QUEUED;
132                     } else
133                             rq = __blk_mq_alloc_request(data, op);
134             } else {
135                     rq = __blk_mq_alloc_request(data, op);
136                     data->hctx->tags->rqs[rq->tag] = rq;
137             }
138
139             if (rq) {
140                     if (!op_is_flush(op)) {

(gdb) disas blk_mq_sched_get_request
[ ... ]
   0xffffffff8132dce3 <+771>:   callq  0xffffffff81324ab0 <__blk_mq_alloc_request>
   0xffffffff8132dce8 <+776>:   mov    %rax,%rcx
   0xffffffff8132dceb <+779>:   mov    0x18(%r12),%rax
   0xffffffff8132dcf0 <+784>:   movslq 0x5c(%rcx),%rdx
[ ... ]
(gdb) print &((struct request *)0)->tag
$1 = (int *) 0x5c <irq_stack_union+92>

I think this means that rq == NULL and that a test for rq is missing after the
__blk_mq_alloc_request() call?

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-26 21:47                       ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 21:47 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 14:12 -0700, Jens Axboe wrote:
> On 01/26/2017 02:01 PM, Bart Van Assche wrote:
> > On Thu, 2017-01-26 at 13:54 -0700, Jens Axboe wrote:
> > > Your call path has blk_get_request() in it, I don't have
> > > that in my tree. Is it passing in the right mask?
> >=20
> > Hello Jens,
> >=20
> > There is only one blk_get_request() call in drivers/md/dm-mpath.c
> > and it looks as follows:
> >=20
> >  	clone =3D blk_get_request(bdev_get_queue(bdev),
> > 			rq->cmd_flags | REQ_NOMERGE,
> > 			GFP_ATOMIC);
>=20
> Yeah, I found it in the dm patch. Looks fine to me, since
> blk_mq_alloc_request() checks for __GFP_DIRECT_RECLAIM. Weird, it all
> looks fine to me. Are you sure you tested with the patch? Either that,
> or I'm smoking crack.

Hello Jens,

After I received your e-mail I noticed that there was a local
modification on the test system that was responsible for the schedule-
while-atomic complaint. Sorry for that. Anyway, I undid the merge with
the v4.10-rc5 code and repeated my test. This time the following call
stack appeared:

BUG: unable to handle kernel NULL pointer dereference at 000000000000005c
IP: blk_mq_sched_get_request+0x310/0x350
PGD 34bd9c067=20
PUD 346b37067=20
PMD 0=20

Oops: 0000 [#1] SMP
Modules linked in: dm_service_time ib_srp scsi_transport_srp target_core_us=
er uio target_core_pscsi target_core_file ib_srpt target_core_iblock target=
_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_m=
asquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat libcrc32c nf_conntrack_ipv4 n=
f_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp=
 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables ipta=
ble_filter ip_tables x_tables af_packet ib_ipoib rdma_ucm ib_ucm ib_uverbs =
ib_umad rdma_cm configfs ib_cm iw_cm msr mlx4_ib ib_core sb_edac edac_core =
x86_pkg_temp_thermal intel_powerclamp coretemp ipmi_ssif kvm_intel kvm irqb=
ypass crct10dif_pclmul crc32_pclmul mlx4_core crc32c_intel ghash_clmulni_in=
tel pcbc aesni_intel aes_x86_64 tg3 iTCO_wdt crypto_simd dcdbas iTCO_vendor=
_support ptp glue_helper ipmi_si cryptd ipmi_devintf pps_core fjes devlink =
ipmi_msghandler pcspkr libphy tpm_tis tpm_tis_core tpm button mei_me lpc_ic=
h wmi mei mfd_core shpchp hid_generic usbhid mgag200 i2c_algo_bit drm_kms_h=
elper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm sr_mod drm cdrom eh=
ci_pci ehci_hcd usbcore usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi=
_dh_emc scsi_dh_alua autofs4
CPU: 0 PID: 9231 Comm: fio Not tainted 4.10.0-rc4-dbg+ #1
Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
task: ffff88034c8c3140 task.stack: ffffc90005698000
RIP: 0010:blk_mq_sched_get_request+0x310/0x350
RSP: 0018:ffffc9000569bac8 EFLAGS: 00010246
RAX: ffff88034f430958 RBX: ffff88045ed2cef0 RCX: 0000000000000000
RDX: 000000000000001f RSI: ffff8803507bdcf8 RDI: 000000000000001f
RBP: ffffc9000569bb00 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: ffffc9000569bb18
R13: 000000000000c801 R14: 0000000000000000 R15: 0000000000000000
FS:  00007f65ca054700(0000) GS:ffff88046f200000(0000) knlGS:000000000000000=
0
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000000005c CR3: 000000034b0ed000 CR4: 00000000001406f0
Call Trace:
 blk_mq_alloc_request+0x5e/0xb0
 blk_get_request+0x2f/0x110
 multipath_clone_and_map+0xcd/0x140 [dm_multipath]
 map_request+0x3c/0x290 [dm_mod]
 dm_mq_queue_rq+0x77/0x100 [dm_mod]
 blk_mq_dispatch_rq_list+0x1ff/0x320
 blk_mq_sched_dispatch_requests+0xa9/0xe0
 __blk_mq_run_hw_queue+0x122/0x1c0
 blk_mq_run_hw_queue+0x84/0x90
 blk_mq_flush_plug_list+0x39f/0x480
 blk_flush_plug_list+0xee/0x270
 blk_finish_plug+0x27/0x40
 do_io_submit+0x475/0x900
 SyS_io_submit+0xb/0x10
 entry_SYSCALL_64_fastpath+0x18/0xad
RIP: 0033:0x7f65e4d05787
RSP: 002b:00007f65ca051948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007f65e4d05787
RDX: 00007f65a404f158 RSI: 0000000000000001 RDI: 00007f65f6bfd000
RBP: 0000000000000815 R08: 0000000000000001 R09: 00007f65a404e3e0
R10: 00007f65a4040000 R11: 0000000000000202 R12: 00000000000006d0
R13: 00007f65a404e930 R14: 0000000000001000 R15: 0000000000000830
Code: 67 ff ff ff e9 80 fe ff ff 48 89 df e8 ba c4 fe ff 31 c9 e9 60 ff ff =
ff 44 89 ee 4c 89 e7 e8 c8 6d ff ff 48 89 c1 49 8b 44 24 18 <48> 63 51 5c 4=
8 8b 80 20 01 00 00 48 8b 80 80 00 00 00 48 89 0c=20
RIP: blk_mq_sched_get_request+0x310/0x350 RSP: ffffc9000569bac8
CR2: 000000000000005c

(gdb) list *(blk_mq_sched_get_request+0x310)
0xffffffff8132dcf0 is in blk_mq_sched_get_request (block/blk-mq-sched.c:136=
).
131 =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0rq->rq_flags |=3D RQF_QUEUED;
132 =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0} else
133 =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0=A0rq =3D __blk_mq_alloc_request(data, op);
134 =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0} else {
135 =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0rq =3D __bl=
k_mq_alloc_request(data, op);
136 =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0data->hctx-=
>tags->rqs[rq->tag] =3D rq;
137 =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0}
138
139 =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0if (rq) {
140 =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0if (!op_is_=
flush(op)) {

(gdb) disas blk_mq_sched_get_request
[ ... ]
 =A0=A00xffffffff8132dce3 <+771>: =A0=A0callq =A00xffffffff81324ab0 <__blk_=
mq_alloc_request>
 =A0=A00xffffffff8132dce8 <+776>: =A0=A0mov =A0=A0=A0%rax,%rcx
 =A0=A00xffffffff8132dceb <+779>: =A0=A0mov =A0=A0=A00x18(%r12),%rax
 =A0=A00xffffffff8132dcf0 <+784>: =A0=A0movslq 0x5c(%rcx),%rdx
[ ... ]
(gdb) print &((struct request *)0)->tag
$1 =3D (int *) 0x5c <irq_stack_union+92>

I think this means that rq =3D=3D NULL and that a test for rq is missing af=
ter the
__blk_mq_alloc_request() call?

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 21:47                       ` [dm-devel] " Bart Van Assche
@ 2017-01-26 21:51                         ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 21:51 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On 01/26/2017 02:47 PM, Bart Van Assche wrote:
> (gdb) list *(blk_mq_sched_get_request+0x310)
> 0xffffffff8132dcf0 is in blk_mq_sched_get_request (block/blk-mq-sched.c:136).
> 131                                     rq->rq_flags |= RQF_QUEUED;
> 132                     } else
> 133                             rq = __blk_mq_alloc_request(data, op);
> 134             } else {
> 135                     rq = __blk_mq_alloc_request(data, op);
> 136                     data->hctx->tags->rqs[rq->tag] = rq;
> 137             }
> 138
> 139             if (rq) {
> 140                     if (!op_is_flush(op)) {
> 
> (gdb) disas blk_mq_sched_get_request
> [ ... ]
>    0xffffffff8132dce3 <+771>:   callq  0xffffffff81324ab0 <__blk_mq_alloc_request>
>    0xffffffff8132dce8 <+776>:   mov    %rax,%rcx
>    0xffffffff8132dceb <+779>:   mov    0x18(%r12),%rax
>    0xffffffff8132dcf0 <+784>:   movslq 0x5c(%rcx),%rdx
> [ ... ]
> (gdb) print &((struct request *)0)->tag
> $1 = (int *) 0x5c <irq_stack_union+92>
> 
> I think this means that rq == NULL and that a test for rq is missing after the
> __blk_mq_alloc_request() call?

That is exactly what it means, looks like that one path doesn't handle
that.  You'd have to exhaust the pool with atomic allocs for this to
trigger, we don't do that at all in the normal IO path. So good catch,
must be the dm part that enables this since it does NOWAIT allocations.


diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 3136696f4991..c27613de80c5 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -134,7 +134,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 			rq = __blk_mq_alloc_request(data, op);
 	} else {
 		rq = __blk_mq_alloc_request(data, op);
-		data->hctx->tags->rqs[rq->tag] = rq;
+		if (rq)
+			data->hctx->tags->rqs[rq->tag] = rq;
 	}
 
 	if (rq) {

-- 
Jens Axboe

^ permalink raw reply related	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-26 21:51                         ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 21:51 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/26/2017 02:47 PM, Bart Van Assche wrote:
> (gdb) list *(blk_mq_sched_get_request+0x310)
> 0xffffffff8132dcf0 is in blk_mq_sched_get_request (block/blk-mq-sched.c:136).
> 131                                     rq->rq_flags |= RQF_QUEUED;
> 132                     } else
> 133                             rq = __blk_mq_alloc_request(data, op);
> 134             } else {
> 135                     rq = __blk_mq_alloc_request(data, op);
> 136                     data->hctx->tags->rqs[rq->tag] = rq;
> 137             }
> 138
> 139             if (rq) {
> 140                     if (!op_is_flush(op)) {
> 
> (gdb) disas blk_mq_sched_get_request
> [ ... ]
>    0xffffffff8132dce3 <+771>:   callq  0xffffffff81324ab0 <__blk_mq_alloc_request>
>    0xffffffff8132dce8 <+776>:   mov    %rax,%rcx
>    0xffffffff8132dceb <+779>:   mov    0x18(%r12),%rax
>    0xffffffff8132dcf0 <+784>:   movslq 0x5c(%rcx),%rdx
> [ ... ]
> (gdb) print &((struct request *)0)->tag
> $1 = (int *) 0x5c <irq_stack_union+92>
> 
> I think this means that rq == NULL and that a test for rq is missing after the
> __blk_mq_alloc_request() call?

That is exactly what it means, looks like that one path doesn't handle
that.  You'd have to exhaust the pool with atomic allocs for this to
trigger, we don't do that at all in the normal IO path. So good catch,
must be the dm part that enables this since it does NOWAIT allocations.


diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 3136696f4991..c27613de80c5 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -134,7 +134,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 			rq = __blk_mq_alloc_request(data, op);
 	} else {
 		rq = __blk_mq_alloc_request(data, op);
-		data->hctx->tags->rqs[rq->tag] = rq;
+		if (rq)
+			data->hctx->tags->rqs[rq->tag] = rq;
 	}
 
 	if (rq) {

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* Re: [PATCH 01/18] block: add a op_is_flush helper
  2017-01-25 17:25 ` [PATCH 01/18] block: add a op_is_flush helper Christoph Hellwig
  2017-01-26  2:58   ` Martin K. Petersen
@ 2017-01-26 22:38   ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 22:38 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> This centralizes the checks for bios that needs to be go into the flush
> state machine.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-26 21:51                         ` [dm-devel] " Jens Axboe
@ 2017-01-26 23:14                           ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 23:14 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 14:51 -0700, Jens Axboe wrote:
> That is exactly what it means, looks like that one path doesn't handle
> that.  You'd have to exhaust the pool with atomic allocs for this to
> trigger, we don't do that at all in the normal IO path. So good catch,
> must be the dm part that enables this since it does NOWAIT allocations.
> 
> 
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index 3136696f4991..c27613de80c5 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -134,7 +134,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
>  			rq = __blk_mq_alloc_request(data, op);
>  	} else {
>  		rq = __blk_mq_alloc_request(data, op);
> -		data->hctx->tags->rqs[rq->tag] = rq;
> +		if (rq)
> +			data->hctx->tags->rqs[rq->tag] = rq;
>  	}
>  
>  	if (rq) {

Hello Jens,

With these two patches applied the scheduling-while-atomic complaint and
the oops are gone. However, some tasks get stuck. Is the console output
below enough to figure out what is going on or do you want me to bisect
this? I don't think that any requests got stuck since no pending requests
are shown in /sys/block/*/mq/*/{pending,*/rq_list}.

Thanks,

Bart.

[  663.217074] sysrq: SysRq : Show Blocked State
[  663.217111]   task                        PC stack   pid father
[  663.217237] kworker/10:0    D    0    71      2 0x00000000
[  663.217267] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.217289] Call Trace:
[  663.217313]  __schedule+0x2da/0xb00
[  663.217337]  ? bit_wait+0x50/0x50
[  663.217360]  schedule+0x38/0x90
[  663.217383]  schedule_timeout+0x2fe/0x640
[  663.217406]  ? mark_held_locks+0x6f/0xa0
[  663.217430]  ? ktime_get+0x74/0x130
[  663.217452]  ? bit_wait+0x50/0x50
[  663.217473]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.217497]  ? trace_hardirqs_on+0xd/0x10
[  663.217520]  ? ktime_get+0x98/0x130
[  663.217542]  ? __delayacct_blkio_start+0x1a/0x30
[  663.217564]  ? bit_wait+0x50/0x50
[  663.217586]  io_schedule_timeout+0x9f/0x110
[  663.217609]  bit_wait_io+0x16/0x60
[  663.217637]  __wait_on_bit+0x53/0x80
[  663.217659]  ? bit_wait+0x50/0x50
[  663.217680]  out_of_line_wait_on_bit+0x6e/0x80
[  663.217703]  ? prepare_to_wait_event+0x170/0x170
[  663.217727]  sync_mapping_buffers+0x22f/0x390
[  663.217750]  __generic_file_fsync+0x4d/0x90
[  663.217772]  ext4_sync_file+0x2b4/0x540
[  663.217793]  vfs_fsync_range+0x46/0xa0
[  663.217814]  dio_complete+0x181/0x1b0
[  663.217835]  dio_aio_complete_work+0x17/0x20
[  663.217856]  process_one_work+0x208/0x6a0
[  663.217878]  ? process_one_work+0x18d/0x6a0
[  663.217899]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.217919]  worker_thread+0x49/0x4a0
[  663.217941]  kthread+0x107/0x140
[  663.217962]  ? process_one_work+0x6a0/0x6a0
[  663.217982]  ? kthread_create_on_node+0x40/0x40
[  663.218003]  ? acpi_ps_create_op+0x1c0/0x35a
[  663.218025]  ? acpi_ps_alloc_op+0x3f/0x80
[  663.218045]  ? acpi_os_acquire_object+0x28/0x2a
[  663.218068]  ret_from_fork+0x2e/0x40
[  663.218112] kworker/u24:8   D    0   141      2 0x00000000
[  663.218139] Workqueue: writeback wb_workfn (flush-254:0)
[  663.218160] Call Trace:
[  663.218182]  __schedule+0x2da/0xb00
[  663.218209]  schedule+0x38/0x90
[  663.218233]  schedule_timeout+0x2fe/0x640
[  663.218263]  ? ktime_get+0x74/0x130
[  663.218284]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.218307]  ? trace_hardirqs_on+0xd/0x10
[  663.218329]  ? ktime_get+0x98/0x130
[  663.218352]  ? __delayacct_blkio_start+0x1a/0x30
[  663.218448]  io_schedule_timeout+0x9f/0x110
[  663.218475]  blk_mq_get_tag+0x158/0x260
[  663.218499]  ? remove_wait_queue+0x70/0x70
[  663.218525]  __blk_mq_alloc_request+0x16/0xe0
[  663.218548]  blk_mq_sched_get_request+0x279/0x370
[  663.218571]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.218596]  ? generic_make_request+0xca/0x290
[  663.218619]  blk_sq_make_request+0x111/0xc90
[  663.218642]  ? blk_queue_enter+0x2d/0x280
[  663.218665]  ? generic_make_request+0xca/0x290
[  663.218688]  generic_make_request+0xd7/0x290
[  663.218712]  ? _raw_spin_unlock_irqrestore+0x31/0x50
[  663.218737]  submit_bio+0x5f/0x120
[  663.218761]  submit_bh_wbc+0x14d/0x180
[  663.218783]  __block_write_full_page+0x193/0x3f0
[  663.218805]  ? I_BDEV+0x10/0x10
[  663.218826]  ? I_BDEV+0x10/0x10
[  663.218847]  block_write_full_page+0xd0/0x120
[  663.218870]  blkdev_writepage+0x13/0x20
[  663.218892]  __writepage+0x11/0x40
[  663.218914]  write_cache_pages+0x216/0x640
[  663.218935]  ? wb_position_ratio+0x1f0/0x1f0
[  663.218957]  generic_writepages+0x41/0x60
[  663.218979]  blkdev_writepages+0x2a/0x30
[  663.219000]  do_writepages+0x1c/0x30
[  663.219022]  __writeback_single_inode+0x57/0x720
[  663.219042]  ? _raw_spin_unlock+0x22/0x30
[  663.219064]  writeback_sb_inodes+0x2f4/0x6a0
[  663.219086]  __writeback_inodes_wb+0x8d/0xc0
[  663.219107]  wb_writeback+0x33c/0x530
[  663.219127]  ? mark_held_locks+0x6f/0xa0
[  663.219149]  wb_workfn+0x356/0x630
[  663.219170]  process_one_work+0x208/0x6a0
[  663.219191]  ? process_one_work+0x18d/0x6a0
[  663.219212]  worker_thread+0x49/0x4a0
[  663.219233]  kthread+0x107/0x140
[  663.219253]  ? process_one_work+0x6a0/0x6a0
[  663.219275]  ? kthread_create_on_node+0x40/0x40
[  663.219299]  ret_from_fork+0x2e/0x40
[  663.219323] kworker/10:1    D    0   155      2 0x00000000
[  663.219349] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.219371] Call Trace:
[  663.219393]  __schedule+0x2da/0xb00
[  663.219421]  ? bit_wait+0x50/0x50
[  663.219443]  schedule+0x38/0x90
[  663.219465]  schedule_timeout+0x2fe/0x640
[  663.219487]  ? mark_held_locks+0x6f/0xa0
[  663.219509]  ? ktime_get+0x74/0x130
[  663.219531]  ? bit_wait+0x50/0x50
[  663.219553]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.219576]  ? trace_hardirqs_on+0xd/0x10
[  663.219598]  ? ktime_get+0x98/0x130
[  663.219619]  ? __delayacct_blkio_start+0x1a/0x30
[  663.219643]  ? bit_wait+0x50/0x50
[  663.219665]  io_schedule_timeout+0x9f/0x110
[  663.219688]  bit_wait_io+0x16/0x60
[  663.219711]  __wait_on_bit+0x53/0x80
[  663.219734]  ? bit_wait+0x50/0x50
[  663.219756]  out_of_line_wait_on_bit+0x6e/0x80
[  663.219779]  ? prepare_to_wait_event+0x170/0x170
[  663.219803]  sync_mapping_buffers+0x22f/0x390
[  663.219826]  __generic_file_fsync+0x4d/0x90
[  663.219848]  ext4_sync_file+0x2b4/0x540
[  663.219870]  vfs_fsync_range+0x46/0xa0
[  663.219892]  dio_complete+0x181/0x1b0
[  663.219915]  dio_aio_complete_work+0x17/0x20
[  663.219939]  process_one_work+0x208/0x6a0
[  663.219962]  ? process_one_work+0x18d/0x6a0
[  663.219986]  worker_thread+0x49/0x4a0
[  663.220011]  kthread+0x107/0x140
[  663.220038]  ? process_one_work+0x6a0/0x6a0
[  663.220063]  ? kthread_create_on_node+0x40/0x40
[  663.220090]  ret_from_fork+0x2e/0x40
[  663.220121] kworker/4:2     D    0   284      2 0x00000000
[  663.220148] Workqueue: srp_remove srp_remove_work [ib_srp]
[  663.220171] Call Trace:
[  663.220193]  __schedule+0x2da/0xb00
[  663.220214]  schedule+0x38/0x90
[  663.220236]  blk_mq_freeze_queue_wait+0x51/0xa0
[  663.220258]  ? remove_wait_queue+0x70/0x70
[  663.220281]  blk_mq_freeze_queue+0x15/0x20
[  663.220302]  blk_freeze_queue+0x9/0x10
[  663.220324]  blk_cleanup_queue+0xdd/0x290
[  663.220346]  __scsi_remove_device+0x49/0xd0
[  663.220368]  scsi_forget_host+0x5b/0x60
[  663.220390]  scsi_remove_host+0x6c/0x110
[  663.220412]  srp_remove_work+0x8b/0x220 [ib_srp]
[  663.220434]  process_one_work+0x208/0x6a0
[  663.220454]  ? process_one_work+0x18d/0x6a0
[  663.220475]  worker_thread+0x49/0x4a0
[  663.220496]  kthread+0x107/0x140
[  663.220516]  ? process_one_work+0x6a0/0x6a0
[  663.220537]  ? kthread_create_on_node+0x40/0x40
[  663.220560]  ret_from_fork+0x2e/0x40
[  663.220581] kworker/10:2    D    0   285      2 0x00000000
[  663.220605] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.220626] Call Trace:
[  663.220648]  __schedule+0x2da/0xb00
[  663.220669]  ? bit_wait+0x50/0x50
[  663.220690]  schedule+0x38/0x90
[  663.220711]  schedule_timeout+0x2fe/0x640
[  663.220732]  ? mark_held_locks+0x6f/0xa0
[  663.220753]  ? ktime_get+0x74/0x130
[  663.220774]  ? bit_wait+0x50/0x50
[  663.220794]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.220816]  ? trace_hardirqs_on+0xd/0x10
[  663.220837]  ? ktime_get+0x98/0x130
[  663.220858]  ? __delayacct_blkio_start+0x1a/0x30
[  663.220879]  ? bit_wait+0x50/0x50
[  663.220899]  io_schedule_timeout+0x9f/0x110
[  663.220920]  bit_wait_io+0x16/0x60
[  663.220941]  __wait_on_bit+0x53/0x80
[  663.220963]  ? bit_wait+0x50/0x50
[  663.220993]  out_of_line_wait_on_bit+0x6e/0x80
[  663.221015]  ? prepare_to_wait_event+0x170/0x170
[  663.221037]  sync_mapping_buffers+0x22f/0x390
[  663.221059]  __generic_file_fsync+0x4d/0x90
[  663.221081]  ext4_sync_file+0x2b4/0x540
[  663.221103]  vfs_fsync_range+0x46/0xa0
[  663.221124]  dio_complete+0x181/0x1b0
[  663.221146]  dio_aio_complete_work+0x17/0x20
[  663.221170]  process_one_work+0x208/0x6a0
[  663.221191]  ? process_one_work+0x18d/0x6a0
[  663.221214]  worker_thread+0x49/0x4a0
[  663.221237]  kthread+0x107/0x140
[  663.221259]  ? process_one_work+0x6a0/0x6a0
[  663.221281]  ? kthread_create_on_node+0x40/0x40
[  663.221304]  ret_from_fork+0x2e/0x40
[  663.221330] kworker/10:3    D    0   405      2 0x00000000
[  663.221354] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.221379] Call Trace:
[  663.221401]  __schedule+0x2da/0xb00
[  663.221424]  ? bit_wait+0x50/0x50
[  663.221446]  schedule+0x38/0x90
[  663.221470]  schedule_timeout+0x2fe/0x640
[  663.221495]  ? ktime_get+0x74/0x130
[  663.221520]  ? bit_wait+0x50/0x50
[  663.221546]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.221574]  ? trace_hardirqs_on+0xd/0x10
[  663.221600]  ? ktime_get+0x98/0x130
[  663.221622]  ? __delayacct_blkio_start+0x1a/0x30
[  663.221646]  ? bit_wait+0x50/0x50
[  663.221668]  io_schedule_timeout+0x9f/0x110
[  663.221690]  bit_wait_io+0x16/0x60
[  663.221712]  __wait_on_bit+0x53/0x80
[  663.221734]  ? bit_wait+0x50/0x50
[  663.221756]  out_of_line_wait_on_bit+0x6e/0x80
[  663.221778]  ? prepare_to_wait_event+0x170/0x170
[  663.221801]  __sync_dirty_buffer+0xdc/0x130
[  663.221822]  sync_dirty_buffer+0xe/0x10
[  663.221844]  ext4_write_inode+0x121/0x140
[  663.221866]  __writeback_single_inode+0x3ae/0x720
[  663.221887]  ? _raw_spin_unlock+0x22/0x30
[  663.221909]  writeback_single_inode+0xd0/0x190
[  663.221932]  sync_inode_metadata+0x2f/0x40
[  663.221953]  __generic_file_fsync+0x74/0x90
[  663.221975]  ext4_sync_file+0x2b4/0x540
[  663.221995]  vfs_fsync_range+0x46/0xa0
[  663.222016]  dio_complete+0x181/0x1b0
[  663.222036]  dio_aio_complete_work+0x17/0x20
[  663.222057]  process_one_work+0x208/0x6a0
[  663.222078]  ? process_one_work+0x18d/0x6a0
[  663.222099]  worker_thread+0x49/0x4a0
[  663.222121]  kthread+0x107/0x140
[  663.222141]  ? process_one_work+0x6a0/0x6a0
[  663.222161]  ? kthread_create_on_node+0x40/0x40
[  663.222183]  ret_from_fork+0x2e/0x40
[  663.222277] fio             D    0  9265   8846 0x00000000
[  663.222302] Call Trace:
[  663.222325]  __schedule+0x2da/0xb00
[  663.222347]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.222372]  schedule+0x38/0x90
[  663.222396]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.222423]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.222450]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.222479]  ? ___slab_alloc+0x178/0x520
[  663.222503]  call_rwsem_down_write_failed+0x17/0x30
[  663.222528]  down_write+0x5a/0x70
[  663.222553]  ? ext4_file_write_iter+0x45/0x360
[  663.222576]  ext4_file_write_iter+0x45/0x360
[  663.222600]  ? __sb_start_write+0xde/0x200
[  663.222621]  ? aio_write+0x14e/0x160
[  663.222643]  aio_write+0xd1/0x160
[  663.222795]  ? __might_fault+0x3e/0x90
[  663.222823]  do_io_submit+0x37d/0x900
[  663.222848]  ? do_io_submit+0x1ac/0x900
[  663.222871]  SyS_io_submit+0xb/0x10
[  663.222893]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.222915] RIP: 0033:0x7fc269d02787
[  663.222937] RSP: 002b:00007fc249e6f948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
[  663.222961] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.222984] RDX: 00007fc24004f190 RSI: 0000000000000001 RDI: 00007fc27bbfc000
[  663.223006] RBP: 0000000000000038 R08: 0000000000000001 R09: 00007fc24004cfe0
[  663.223029] R10: 00007fc240039000 R11: 0000000000000202 R12: 00007fc24f052000
[  663.223051] R13: 00007fc24004f360 R14: 0000000000000000 R15: 0000000000000001
[  663.223074] fio             D    0  9266   8846 0x00000000
[  663.223286] Call Trace:
[  663.223311]  __schedule+0x2da/0xb00
[  663.223333]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.223356]  schedule+0x38/0x90
[  663.223379]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.223402]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.223424]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.223447]  ? ___slab_alloc+0x178/0x520
[  663.223470]  call_rwsem_down_write_failed+0x17/0x30
[  663.223493]  down_write+0x5a/0x70
[  663.223514]  ? ext4_file_write_iter+0x45/0x360
[  663.223536]  ext4_file_write_iter+0x45/0x360
[  663.223558]  ? __sb_start_write+0xde/0x200
[  663.223579]  ? aio_write+0x14e/0x160
[  663.223599]  aio_write+0xd1/0x160
[  663.223621]  ? __might_fault+0x3e/0x90
[  663.223642]  do_io_submit+0x37d/0x900
[  663.223663]  ? do_io_submit+0x1ac/0x900
[  663.223684]  SyS_io_submit+0xb/0x10
[  663.223705]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.223725] RIP: 0033:0x7fc269d02787
[  663.223747] RSP: 002b:00007fc24a670948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
[  663.223769] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.223790] RDX: 00007fc23804f188 RSI: 0000000000000001 RDI: 00007fc27bc08000
[  663.223811] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc23804d2a0
[  663.223833] R10: 00007fc238050000 R11: 0000000000000202 R12: 0000000000000020
[  663.223855] R13: 00007fc23804ffe0 R14: 0000000000001000 R15: 0000000000000080
[  663.223877] fio             D    0  9267   8846 0x00000000
[  663.224191] Call Trace:
[  663.224215]  __schedule+0x2da/0xb00
[  663.224236]  schedule+0x38/0x90
[  663.224258]  schedule_timeout+0x2fe/0x640
[  663.224280]  ? mark_held_locks+0x6f/0xa0
[  663.224303]  ? ktime_get+0x74/0x130
[  663.224325]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.224346]  ? trace_hardirqs_on+0xd/0x10
[  663.224368]  ? ktime_get+0x98/0x130
[  663.224389]  ? __delayacct_blkio_start+0x1a/0x30
[  663.224411]  io_schedule_timeout+0x9f/0x110
[  663.224433]  blk_mq_get_tag+0x158/0x260
[  663.224454]  ? remove_wait_queue+0x70/0x70
[  663.224476]  __blk_mq_alloc_request+0x16/0xe0
[  663.224521]  blk_mq_sched_get_request+0x279/0x370
[  663.224544]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.224567]  ? generic_make_request+0xca/0x290
[  663.224592]  blk_sq_make_request+0x111/0xc90
[  663.224620]  ? blk_queue_enter+0x2d/0x280
[  663.224643]  ? generic_make_request+0xca/0x290
[  663.224666]  generic_make_request+0xd7/0x290
[  663.224690]  submit_bio+0x5f/0x120
[  663.224711]  ? trace_hardirqs_on+0xd/0x10
[  663.224733]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.224758]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.224781]  ? _raw_spin_unlock+0x22/0x30
[  663.224806]  ? ext4_get_block_trans+0xe0/0xe0
[  663.224829]  __blockdev_direct_IO+0x35/0x40
[  663.224852]  ext4_direct_IO+0x19c/0x7b0
[  663.224874]  generic_file_direct_write+0xa6/0x150
[  663.224897]  __generic_file_write_iter+0xbb/0x1c0
[  663.224920]  ext4_file_write_iter+0x77/0x360
[  663.224946]  ? __sb_start_write+0xde/0x200
[  663.224967]  ? aio_write+0x14e/0x160
[  663.224988]  aio_write+0xd1/0x160
[  663.225009]  ? __might_fault+0x3e/0x90
[  663.225029]  do_io_submit+0x37d/0x900
[  663.225051]  ? do_io_submit+0x1ac/0x900
[  663.225076]  SyS_io_submit+0xb/0x10
[  663.225102]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225128] RIP: 0033:0x7fc269d02787
[  663.225151] RSP: 002b:00007fc24ae71948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
[  663.225175] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.225205] RDX: 00007fc22804f2c8 RSI: 0000000000000001 RDI: 00007fc27bc0e000
[  663.225232] RBP: 00000000000000b0 R08: 0000000000000001 R09: 00007fc228045fa0
[  663.225259] R10: 00007fc228012000 R11: 0000000000000202 R12: 00007fc24f06e110
[  663.225283] R13: 00007fc22804f360 R14: 0000000000000000 R15: 0000000000000001
[  663.225309] fio             D    0  9268   8846 0x00000000
[  663.225338] Call Trace:
[  663.225366]  __schedule+0x2da/0xb00
[  663.225392]  schedule+0x38/0x90
[  663.225420]  schedule_timeout+0x2fe/0x640
[  663.225446]  ? mark_held_locks+0x6f/0xa0
[  663.225483]  ? ktime_get+0x74/0x130
[  663.225484]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.225486]  ? trace_hardirqs_on+0xd/0x10
[  663.225487]  ? ktime_get+0x98/0x130
[  663.225489]  ? __delayacct_blkio_start+0x1a/0x30
[  663.225491]  io_schedule_timeout+0x9f/0x110
[  663.225493]  blk_mq_get_tag+0x158/0x260
[  663.225494]  ? remove_wait_queue+0x70/0x70
[  663.225496]  __blk_mq_alloc_request+0x16/0xe0
[  663.225499]  blk_mq_sched_get_request+0x279/0x370
[  663.225500]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.225503]  ? generic_make_request+0xca/0x290
[  663.225505]  blk_sq_make_request+0x111/0xc90
[  663.225507]  ? blk_queue_enter+0x2d/0x280
[  663.225509]  ? generic_make_request+0xca/0x290
[  663.225511]  generic_make_request+0xd7/0x290
[  663.225513]  submit_bio+0x5f/0x120
[  663.225514]  ? trace_hardirqs_on+0xd/0x10
[  663.225516]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.225518]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.225520]  ? _raw_spin_unlock+0x22/0x30
[  663.225522]  ? ext4_get_block_trans+0xe0/0xe0
[  663.225524]  __blockdev_direct_IO+0x35/0x40
[  663.225526]  ext4_direct_IO+0x19c/0x7b0
[  663.225528]  generic_file_direct_write+0xa6/0x150
[  663.225530]  __generic_file_write_iter+0xbb/0x1c0
[  663.225531]  ext4_file_write_iter+0x77/0x360
[  663.225533]  ? __sb_start_write+0xde/0x200
[  663.225534]  ? aio_write+0x14e/0x160
[  663.225536]  aio_write+0xd1/0x160
[  663.225537]  ? __might_fault+0x3e/0x90
[  663.225539]  do_io_submit+0x37d/0x900
[  663.225540]  ? do_io_submit+0x1ac/0x900
[  663.225542]  SyS_io_submit+0xb/0x10
[  663.225543]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225544] RIP: 0033:0x7fc269d02787
[  663.225545] RSP: 002b:00007fc24f04e948 EFLAGS: 00000212 ORIG_RAX: 00000000000000d1
[  663.225549] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.225550] RDX: 00007fc22004f1d0 RSI: 0000000000000001 RDI: 00007fc27bc00000
[  663.225551] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc22004b8e0
[  663.225552] R10: 00007fc220031000 R11: 0000000000000212 R12: 0000000000000020
[  663.225552] R13: 00007fc22004ffe0 R14: 0000000000001000 R15: 0000000000000080
[  663.225554] fio             D    0  9269   8846 0x00000000
[  663.225557] Call Trace:
[  663.225559]  __schedule+0x2da/0xb00
[  663.225560]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.225562]  schedule+0x38/0x90
[  663.225564]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.225566]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.225568]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.225570]  ? ___slab_alloc+0x178/0x520
[  663.225573]  call_rwsem_down_write_failed+0x17/0x30
[  663.225575]  down_write+0x5a/0x70
[  663.225576]  ? ext4_file_write_iter+0x45/0x360
[  663.225578]  ext4_file_write_iter+0x45/0x360
[  663.225579]  ? __sb_start_write+0xde/0x200
[  663.225581]  ? aio_write+0x14e/0x160
[  663.225582]  aio_write+0xd1/0x160
[  663.225584]  ? __might_fault+0x3e/0x90
[  663.225585]  do_io_submit+0x37d/0x900
[  663.225586]  ? do_io_submit+0x1ac/0x900
[  663.225588]  SyS_io_submit+0xb/0x10
[  663.225589]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225590] RIP: 0033:0x7fc269d02787
[  663.225591] RSP: 002b:00007fc24e84d948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
[  663.225593] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.225594] RDX: 00007fc23004f1e8 RSI: 0000000000000001 RDI: 00007fc27bc06000
[  663.225595] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc23004b020
[  663.225595] R10: 00007fc23002e000 R11: 0000000000000202 R12: 0000000000000020
[  663.225596] R13: 00007fc23004ffe0 R14: 0000000000001000 R15: 0000000000000080
[  663.225598] fio             D    0  9270   8846 0x00000000
[  663.225600] Call Trace:
[  663.225602]  __schedule+0x2da/0xb00
[  663.225603]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.225605]  schedule+0x38/0x90
[  663.225607]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.225609]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.225611]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.225614]  ? ___slab_alloc+0x178/0x520
[  663.225616]  call_rwsem_down_write_failed+0x17/0x30
[  663.225618]  down_write+0x5a/0x70
[  663.225620]  ? ext4_file_write_iter+0x45/0x360
[  663.225622]  ext4_file_write_iter+0x45/0x360
[  663.225623]  ? __sb_start_write+0xde/0x200
[  663.225624]  ? aio_write+0x14e/0x160
[  663.225626]  aio_write+0xd1/0x160
[  663.225627]  ? __might_fault+0x3e/0x90
[  663.225629]  do_io_submit+0x37d/0x900
[  663.225630]  ? do_io_submit+0x1ac/0x900
[  663.225632]  SyS_io_submit+0xb/0x10
[  663.225633]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225634] RIP: 0033:0x7fc269d02787
[  663.225635] RSP: 002b:00007fc24e04c948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
[  663.225636] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.225637] RDX: 00007fc21804f298 RSI: 0000000000000001 RDI: 00007fc27bc10000
[  663.225638] RBP: 0000000011bb7409 R08: 0000000000000001 R09: 00007fc2180470e0
[  663.225639] R10: 00007fc218018000 R11: 0000000000000202 R12: 0000000068baf4ba
[  663.225640] R13: 00000000165c8e46 R14: 000000002adf21b1 R15: 00000000169850e2
[  663.225642] fio             D    0  9271   8846 0x00000000
[  663.225644] Call Trace:
[  663.225646]  __schedule+0x2da/0xb00
[  663.225647]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.225649]  schedule+0x38/0x90
[  663.225651]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.225653]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.225656]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.225658]  ? ___slab_alloc+0x178/0x520
[  663.225660]  call_rwsem_down_write_failed+0x17/0x30
[  663.225662]  down_write+0x5a/0x70
[  663.225664]  ? ext4_file_write_iter+0x45/0x360
[  663.225665]  ext4_file_write_iter+0x45/0x360
[  663.225667]  ? __sb_start_write+0xde/0x200
[  663.225668]  ? aio_write+0x14e/0x160
[  663.225669]  aio_write+0xd1/0x160
[  663.225671]  ? __might_fault+0x3e/0x90
[  663.225673]  do_io_submit+0x37d/0x900
[  663.225674]  ? do_io_submit+0x1ac/0x900
[  663.225676]  SyS_io_submit+0xb/0x10
[  663.225677]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225678] RIP: 0033:0x7fc269d02787
[  663.225679] RSP: 002b:00007fc24d84b948 EFLAGS: 00000206 ORIG_RAX: 00000000000000d1
[  663.225681] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.225682] RDX: 00007fc21004f210 RSI: 0000000000000001 RDI: 00007fc27bc0c000
[  663.225682] RBP: 0000000000000060 R08: 0000000000000001 R09: 00007fc21004a1e0
[  663.225683] R10: 00007fc210029000 R11: 0000000000000206 R12: 00007fc24f0a6330
[  663.225684] R13: 00007fc21004f360 R14: 0000000000000000 R15: 0000000000000001
[  663.225686] fio             D    0  9272   8846 0x00000000
[  663.225689] Call Trace:
[  663.225691]  __schedule+0x2da/0xb00
[  663.225693]  schedule+0x38/0x90
[  663.225695]  schedule_timeout+0x2fe/0x640
[  663.225696]  ? mark_held_locks+0x6f/0xa0
[  663.225699]  ? ktime_get+0x74/0x130
[  663.225700]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.225702]  ? trace_hardirqs_on+0xd/0x10
[  663.225703]  ? ktime_get+0x98/0x130
[  663.225705]  ? __delayacct_blkio_start+0x1a/0x30
[  663.225707]  io_schedule_timeout+0x9f/0x110
[  663.225708]  blk_mq_get_tag+0x158/0x260
[  663.225710]  ? remove_wait_queue+0x70/0x70
[  663.225712]  __blk_mq_alloc_request+0x16/0xe0
[  663.225714]  blk_mq_sched_get_request+0x279/0x370
[  663.225715]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.225718]  ? generic_make_request+0xca/0x290
[  663.225720]  blk_sq_make_request+0x111/0xc90
[  663.225722]  ? blk_queue_enter+0x2d/0x280
[  663.225724]  ? generic_make_request+0xca/0x290
[  663.225727]  generic_make_request+0xd7/0x290
[  663.225730]  submit_bio+0x5f/0x120
[  663.225731]  ? trace_hardirqs_on+0xd/0x10
[  663.225733]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.225735]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.225737]  ? _raw_spin_unlock+0x22/0x30
[  663.225739]  ? ext4_get_block_trans+0xe0/0xe0
[  663.225741]  __blockdev_direct_IO+0x35/0x40
[  663.225743]  ext4_direct_IO+0x19c/0x7b0
[  663.225745]  generic_file_direct_write+0xa6/0x150
[  663.225747]  __generic_file_write_iter+0xbb/0x1c0
[  663.225749]  ext4_file_write_iter+0x77/0x360
[  663.225750]  ? __sb_start_write+0xde/0x200
[  663.225752]  ? aio_write+0x14e/0x160
[  663.225753]  aio_write+0xd1/0x160
[  663.225755]  ? __might_fault+0x3e/0x90
[  663.225756]  do_io_submit+0x37d/0x900
[  663.225758]  ? do_io_submit+0x1ac/0x900
[  663.225759]  SyS_io_submit+0xb/0x10
[  663.225761]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225762] RIP: 0033:0x7fc269d02787
[  663.225763] RSP: 002b:00007fc24d04a948 EFLAGS: 00000206 ORIG_RAX: 00000000000000d1
[  663.225764] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.225765] RDX: 00007fc20804f260 RSI: 0000000000000001 RDI: 00007fc27bbfa000
[  663.225766] RBP: 00000000000000b8 R08: 0000000000000001 R09: 00007fc208048520
[  663.225767] R10: 00007fc20801f000 R11: 0000000000000206 R12: 00007fc24f0b43b8
[  663.225768] R13: 00007fc20804f360 R14: 0000000000000000 R15: 0000000000000001
[  663.225770] fio             D    0  9273   8846 0x00000000
[  663.225772] Call Trace:
[  663.225774]  __schedule+0x2da/0xb00
[  663.225776]  schedule+0x38/0x90
[  663.225778]  schedule_timeout+0x2fe/0x640
[  663.225780]  ? mark_held_locks+0x6f/0xa0
[  663.225782]  ? ktime_get+0x74/0x130
[  663.225783]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.225784]  ? trace_hardirqs_on+0xd/0x10
[  663.225786]  ? ktime_get+0x98/0x130
[  663.225788]  ? __delayacct_blkio_start+0x1a/0x30
[  663.225790]  io_schedule_timeout+0x9f/0x110
[  663.225791]  blk_mq_get_tag+0x158/0x260
[  663.225793]  ? remove_wait_queue+0x70/0x70
[  663.225795]  __blk_mq_alloc_request+0x16/0xe0
[  663.225797]  blk_mq_sched_get_request+0x279/0x370
[  663.225798]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.225800]  ? generic_make_request+0xca/0x290
[  663.225803]  blk_sq_make_request+0x111/0xc90
[  663.225805]  ? blk_queue_enter+0x2d/0x280
[  663.225807]  ? generic_make_request+0xca/0x290
[  663.225809]  generic_make_request+0xd7/0x290
[  663.225811]  submit_bio+0x5f/0x120
[  663.225813]  ? trace_hardirqs_on+0xd/0x10
[  663.225814]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.225816]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.225818]  ? _raw_spin_unlock+0x22/0x30
[  663.225820]  ? ext4_get_block_trans+0xe0/0xe0
[  663.225822]  __blockdev_direct_IO+0x35/0x40
[  663.225824]  ext4_direct_IO+0x19c/0x7b0
[  663.225826]  generic_file_direct_write+0xa6/0x150
[  663.225828]  __generic_file_write_iter+0xbb/0x1c0
[  663.225829]  ext4_file_write_iter+0x77/0x360
[  663.225831]  ? __sb_start_write+0xde/0x200
[  663.225832]  ? aio_write+0x14e/0x160
[  663.225833]  aio_write+0xd1/0x160
[  663.225835]  ? __might_fault+0x3e/0x90
[  663.225837]  do_io_submit+0x37d/0x900
[  663.225838]  ? do_io_submit+0x1ac/0x900
[  663.225839]  SyS_io_submit+0xb/0x10
[  663.225841]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225842] RIP: 0033:0x7fc269d02787
[  663.225842] RSP: 002b:00007fc24c849948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
[  663.225844] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.225845] RDX: 00007fc1d004f200 RSI: 0000000000000001 RDI: 00007fc27bbf2000
[  663.225846] RBP: 0000000000000050 R08: 0000000000000001 R09: 00007fc1d004a7a0
[  663.225847] R10: 00007fc1d002b000 R11: 0000000000000202 R12: 00007fc24f0c2440
[  663.225848] R13: 00007fc1d004f360 R14: 0000000000000000 R15: 0000000000000001
[  663.225849] fio             D    0  9274   8846 0x00000000
[  663.225851] Call Trace:
[  663.225853]  __schedule+0x2da/0xb00
[  663.225855]  schedule+0x38/0x90
[  663.225857]  schedule_timeout+0x2fe/0x640
[  663.225858]  ? mark_held_locks+0x6f/0xa0
[  663.225860]  ? ktime_get+0x74/0x130
[  663.225861]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.225863]  ? trace_hardirqs_on+0xd/0x10
[  663.225864]  ? ktime_get+0x98/0x130
[  663.225866]  ? __delayacct_blkio_start+0x1a/0x30
[  663.225868]  io_schedule_timeout+0x9f/0x110
[  663.225869]  blk_mq_get_tag+0x158/0x260
[  663.225871]  ? remove_wait_queue+0x70/0x70
[  663.225873]  __blk_mq_alloc_request+0x16/0xe0
[  663.225875]  blk_mq_sched_get_request+0x279/0x370
[  663.225876]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.225878]  ? generic_make_request+0xca/0x290
[  663.225880]  blk_sq_make_request+0x111/0xc90
[  663.225882]  ? blk_queue_enter+0x2d/0x280
[  663.225885]  ? generic_make_request+0xca/0x290
[  663.225887]  generic_make_request+0xd7/0x290
[  663.225889]  submit_bio+0x5f/0x120
[  663.225890]  ? trace_hardirqs_on+0xd/0x10
[  663.225892]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.225894]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.225895]  ? _raw_spin_unlock+0x22/0x30
[  663.225898]  ? ext4_get_block_trans+0xe0/0xe0
[  663.225901]  __blockdev_direct_IO+0x35/0x40
[  663.225903]  ext4_direct_IO+0x19c/0x7b0
[  663.225905]  generic_file_direct_write+0xa6/0x150
[  663.225907]  __generic_file_write_iter+0xbb/0x1c0
[  663.225908]  ext4_file_write_iter+0x77/0x360
[  663.225910]  ? __sb_start_write+0xde/0x200
[  663.225911]  ? aio_write+0x14e/0x160
[  663.225913]  aio_write+0xd1/0x160
[  663.225915]  ? __might_fault+0x3e/0x90
[  663.225916]  do_io_submit+0x37d/0x900
[  663.225918]  ? do_io_submit+0x1ac/0x900
[  663.225920]  SyS_io_submit+0xb/0x10
[  663.225921]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225922] RIP: 0033:0x7fc269d02787
[  663.225923] RSP: 002b:00007fc24c048948 EFLAGS: 00000206 ORIG_RAX: 00000000000000d1
[  663.225925] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.225926] RDX: 00007fc20004f1f0 RSI: 0000000000000001 RDI: 00007fc27bc02000
[  663.225927] RBP: 000000001d36c979 R08: 0000000000000001 R09: 00007fc20004ad60
[  663.225927] R10: 00007fc20002d000 R11: 0000000000000206 R12: 00000000d322e5e6
[  663.225928] R13: 000000002b3b7b00 R14: 00000000573a44d8 R15: 0000000001e412a3
[  663.225930] fio             D    0  9275   8846 0x00000000
[  663.225932] Call Trace:
[  663.225935]  __schedule+0x2da/0xb00
[  663.225936]  schedule+0x38/0x90
[  663.225939]  schedule_timeout+0x2fe/0x640
[  663.225940]  ? mark_held_locks+0x6f/0xa0
[  663.225942]  ? ktime_get+0x74/0x130
[  663.225943]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.225945]  ? trace_hardirqs_on+0xd/0x10
[  663.225946]  ? ktime_get+0x98/0x130
[  663.225948]  ? __delayacct_blkio_start+0x1a/0x30
[  663.225949]  io_schedule_timeout+0x9f/0x110
[  663.225951]  blk_mq_get_tag+0x158/0x260
[  663.225952]  ? remove_wait_queue+0x70/0x70
[  663.225954]  __blk_mq_alloc_request+0x16/0xe0
[  663.225956]  blk_mq_sched_get_request+0x279/0x370
[  663.225958]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.225960]  ? generic_make_request+0xca/0x290
[  663.225963]  blk_sq_make_request+0x111/0xc90
[  663.225965]  ? blk_queue_enter+0x2d/0x280
[  663.225967]  ? generic_make_request+0xca/0x290
[  663.225969]  generic_make_request+0xd7/0x290
[  663.225971]  submit_bio+0x5f/0x120
[  663.225973]  ? trace_hardirqs_on+0xd/0x10
[  663.225974]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.225976]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.225977]  ? _raw_spin_unlock+0x22/0x30
[  663.225980]  ? ext4_get_block_trans+0xe0/0xe0
[  663.225981]  __blockdev_direct_IO+0x35/0x40
[  663.225983]  ext4_direct_IO+0x19c/0x7b0
[  663.225985]  generic_file_direct_write+0xa6/0x150
[  663.225987]  __generic_file_write_iter+0xbb/0x1c0
[  663.225988]  ext4_file_write_iter+0x77/0x360
[  663.225990]  ? __sb_start_write+0xde/0x200
[  663.225991]  ? aio_write+0x14e/0x160
[  663.225992]  aio_write+0xd1/0x160
[  663.225994]  ? __might_fault+0x3e/0x90
[  663.225995]  do_io_submit+0x37d/0x900
[  663.225997]  ? do_io_submit+0x1ac/0x900
[  663.225998]  SyS_io_submit+0xb/0x10
[  663.226000]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226000] RIP: 0033:0x7fc269d02787
[  663.226001] RSP: 002b:00007fc24b847948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
[  663.226003] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.226004] RDX: 00007fc1f804f1c0 RSI: 0000000000000001 RDI: 00007fc27bbf8000
[  663.226005] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc1f804bea0
[  663.226006] R10: 00007fc1f8033000 R11: 0000000000000202 R12: 0000000000000020
[  663.226007] R13: 00007fc1f804ffe0 R14: 0000000000001000 R15: 0000000000000080
[  663.226009] fio             D    0  9276   8846 0x00000000
[  663.226011] Call Trace:
[  663.226013]  __schedule+0x2da/0xb00
[  663.226014]  schedule+0x38/0x90
[  663.226016]  schedule_timeout+0x2fe/0x640
[  663.226018]  ? mark_held_locks+0x6f/0xa0
[  663.226019]  ? ktime_get+0x74/0x130
[  663.226021]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226022]  ? trace_hardirqs_on+0xd/0x10
[  663.226024]  ? ktime_get+0x98/0x130
[  663.226025]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226027]  io_schedule_timeout+0x9f/0x110
[  663.226028]  blk_mq_get_tag+0x158/0x260
[  663.226030]  ? remove_wait_queue+0x70/0x70
[  663.226032]  __blk_mq_alloc_request+0x16/0xe0
[  663.226033]  blk_mq_sched_get_request+0x279/0x370
[  663.226035]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.226038]  ? generic_make_request+0xca/0x290
[  663.226040]  blk_sq_make_request+0x111/0xc90
[  663.226042]  ? blk_queue_enter+0x2d/0x280
[  663.226044]  ? generic_make_request+0xca/0x290
[  663.226046]  generic_make_request+0xd7/0x290
[  663.226048]  submit_bio+0x5f/0x120
[  663.226049]  ? trace_hardirqs_on+0xd/0x10
[  663.226051]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.226053]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.226054]  ? _raw_spin_unlock+0x22/0x30
[  663.226057]  ? ext4_get_block_trans+0xe0/0xe0
[  663.226058]  __blockdev_direct_IO+0x35/0x40
[  663.226060]  ext4_direct_IO+0x19c/0x7b0
[  663.226062]  generic_file_direct_write+0xa6/0x150
[  663.226064]  __generic_file_write_iter+0xbb/0x1c0
[  663.226066]  ext4_file_write_iter+0x77/0x360
[  663.226068]  ? __sb_start_write+0xde/0x200
[  663.226070]  ? aio_write+0x14e/0x160
[  663.226072]  aio_write+0xd1/0x160
[  663.226074]  ? __might_fault+0x3e/0x90
[  663.226076]  do_io_submit+0x37d/0x900
[  663.226077]  ? do_io_submit+0x1ac/0x900
[  663.226079]  SyS_io_submit+0xb/0x10
[  663.226080]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226081] RIP: 0033:0x7fc269d02787
[  663.226082] RSP: 002b:00007fc248e6d948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
[  663.226084] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.226085] RDX: 00007fc1f004f158 RSI: 0000000000000001 RDI: 00007fc27bbfe000
[  663.226086] RBP: 0000000000000815 R08: 0000000000000001 R09: 00007fc1f004e3e0
[  663.226087] R10: 00007fc1f0040000 R11: 0000000000000202 R12: 00000000000006d0
[  663.226088] R13: 00007fc1f004e930 R14: 0000000000001000 R15: 0000000000000830
[  663.226089] fio             D    0  9277   8846 0x00000000
[  663.226091] Call Trace:
[  663.226094]  __schedule+0x2da/0xb00
[  663.226095]  schedule+0x38/0x90
[  663.226097]  schedule_timeout+0x2fe/0x640
[  663.226099]  ? mark_held_locks+0x6f/0xa0
[  663.226101]  ? ktime_get+0x74/0x130
[  663.226102]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226103]  ? trace_hardirqs_on+0xd/0x10
[  663.226105]  ? ktime_get+0x98/0x130
[  663.226107]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226109]  io_schedule_timeout+0x9f/0x110
[  663.226110]  blk_mq_get_tag+0x158/0x260
[  663.226112]  ? remove_wait_queue+0x70/0x70
[  663.226114]  __blk_mq_alloc_request+0x16/0xe0
[  663.226115]  blk_mq_sched_get_request+0x279/0x370
[  663.226117]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.226119]  ? generic_make_request+0xca/0x290
[  663.226121]  blk_sq_make_request+0x111/0xc90
[  663.226123]  ? blk_queue_enter+0x2d/0x280
[  663.226125]  ? generic_make_request+0xca/0x290
[  663.226127]  generic_make_request+0xd7/0x290
[  663.226130]  submit_bio+0x5f/0x120
[  663.226131]  ? trace_hardirqs_on+0xd/0x10
[  663.226133]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.226135]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.226136]  ? _raw_spin_unlock+0x22/0x30
[  663.226138]  ? ext4_get_block_trans+0xe0/0xe0
[  663.226140]  __blockdev_direct_IO+0x35/0x40
[  663.226142]  ext4_direct_IO+0x19c/0x7b0
[  663.226144]  generic_file_direct_write+0xa6/0x150
[  663.226146]  __generic_file_write_iter+0xbb/0x1c0
[  663.226148]  ext4_file_write_iter+0x77/0x360
[  663.226149]  ? __sb_start_write+0xde/0x200
[  663.226150]  ? aio_write+0x14e/0x160
[  663.226152]  aio_write+0xd1/0x160
[  663.226153]  ? __might_fault+0x3e/0x90
[  663.226155]  do_io_submit+0x37d/0x900
[  663.226156]  ? do_io_submit+0x1ac/0x900
[  663.226157]  SyS_io_submit+0xb/0x10
[  663.226159]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226160] RIP: 0033:0x7fc269d02787
[  663.226160] RSP: 002b:00007fc24866c948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
[  663.226162] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.226163] RDX: 00007fc1d804f1e8 RSI: 0000000000000001 RDI: 00007fc27bbf6000
[  663.226164] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc1d804b020
[  663.226165] R10: 00007fc1d802e000 R11: 0000000000000202 R12: 0000000000000020
[  663.226166] R13: 00007fc1d804ffe0 R14: 0000000000001000 R15: 0000000000000080
[  663.226167] fio             D    0  9278   8846 0x00000000
[  663.226169] Call Trace:
[  663.226171]  __schedule+0x2da/0xb00
[  663.226172]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.226174]  schedule+0x38/0x90
[  663.226176]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226178]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226181]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226183]  ? ___slab_alloc+0x178/0x520
[  663.226186]  call_rwsem_down_write_failed+0x17/0x30
[  663.226188]  down_write+0x5a/0x70
[  663.226190]  ? ext4_file_write_iter+0x45/0x360
[  663.226191]  ext4_file_write_iter+0x45/0x360
[  663.226193]  ? __sb_start_write+0xde/0x200
[  663.226194]  ? aio_write+0x14e/0x160
[  663.226195]  aio_write+0xd1/0x160
[  663.226197]  ? __might_fault+0x3e/0x90
[  663.226198]  do_io_submit+0x37d/0x900
[  663.226201]  ? do_io_submit+0x1ac/0x900
[  663.226202]  SyS_io_submit+0xb/0x10
[  663.226204]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226205] RIP: 0033:0x7fc269d02787
[  663.226206] RSP: 002b:00007fc247e6b948 EFLAGS: 00000206 ORIG_RAX: 00000000000000d1
[  663.226208] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.226209] RDX: 00007fc23c04f288 RSI: 0000000000000001 RDI: 00007fc27bc0a000
[  663.226210] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc23c0476a0
[  663.226211] R10: 00007fc23c01a000 R11: 0000000000000206 R12: 0000000000000020
[  663.226212] R13: 00007fc23c04ffe0 R14: 0000000000001000 R15: 0000000000000080
[  663.226214] fio             D    0  9279   8846 0x00000000
[  663.226216] Call Trace:
[  663.226218]  __schedule+0x2da/0xb00
[  663.226220]  schedule+0x38/0x90
[  663.226222]  schedule_timeout+0x2fe/0x640
[  663.226223]  ? mark_held_locks+0x6f/0xa0
[  663.226225]  ? ktime_get+0x74/0x130
[  663.226227]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226228]  ? trace_hardirqs_on+0xd/0x10
[  663.226230]  ? ktime_get+0x98/0x130
[  663.226231]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226233]  io_schedule_timeout+0x9f/0x110
[  663.226235]  blk_mq_get_tag+0x158/0x260
[  663.226237]  ? remove_wait_queue+0x70/0x70
[  663.226239]  __blk_mq_alloc_request+0x16/0xe0
[  663.226241]  blk_mq_sched_get_request+0x279/0x370
[  663.226243]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.226245]  ? generic_make_request+0xca/0x290
[  663.226247]  blk_sq_make_request+0x111/0xc90
[  663.226249]  ? blk_queue_enter+0x2d/0x280
[  663.226251]  ? generic_make_request+0xca/0x290
[  663.226253]  generic_make_request+0xd7/0x290
[  663.226255]  submit_bio+0x5f/0x120
[  663.226257]  ? trace_hardirqs_on+0xd/0x10
[  663.226258]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.226261]  ? ext4_get_block_trans+0xe0/0xe0
[  663.226263]  __blockdev_direct_IO+0x35/0x40
[  663.226265]  ext4_direct_IO+0x19c/0x7b0
[  663.226267]  generic_file_direct_write+0xa6/0x150
[  663.226269]  __generic_file_write_iter+0xbb/0x1c0
[  663.226270]  ext4_file_write_iter+0x77/0x360
[  663.226272]  ? __sb_start_write+0xde/0x200
[  663.226274]  ? aio_write+0x14e/0x160
[  663.226275]  aio_write+0xd1/0x160
[  663.226276]  ? __might_fault+0x3e/0x90
[  663.226278]  do_io_submit+0x37d/0x900
[  663.226279]  ? do_io_submit+0x1ac/0x900
[  663.226281]  SyS_io_submit+0xb/0x10
[  663.226282]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226283] RIP: 0033:0x7fc269d02787
[  663.226284] RSP: 002b:00007fc24766a948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
[  663.226285] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.226286] RDX: 00007fc1e804f248 RSI: 0000000000000001 RDI: 00007fc27bbf4000
[  663.226287] RBP: 0000000000000088 R08: 0000000000000001 R09: 00007fc1e8048da0
[  663.226288] R10: 00007fc1e8022000 R11: 0000000000000202 R12: 00007fc24f116770
[  663.226289] R13: 00007fc1e804f360 R14: 0000000000000000 R15: 0000000000000001
[  663.226291] fio             D    0  9280   8846 0x00000000
[  663.226293] Call Trace:
[  663.226295]  __schedule+0x2da/0xb00
[  663.226297]  schedule+0x38/0x90
[  663.226298]  schedule_timeout+0x2fe/0x640
[  663.226300]  ? mark_held_locks+0x6f/0xa0
[  663.226302]  ? ktime_get+0x74/0x130
[  663.226303]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226305]  ? trace_hardirqs_on+0xd/0x10
[  663.226307]  ? ktime_get+0x98/0x130
[  663.226309]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226310]  io_schedule_timeout+0x9f/0x110
[  663.226312]  blk_mq_get_tag+0x158/0x260
[  663.226313]  ? remove_wait_queue+0x70/0x70
[  663.226315]  __blk_mq_alloc_request+0x16/0xe0
[  663.226317]  blk_mq_sched_get_request+0x279/0x370
[  663.226319]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.226321]  ? generic_make_request+0xca/0x290
[  663.226323]  blk_sq_make_request+0x111/0xc90
[  663.226325]  ? blk_queue_enter+0x2d/0x280
[  663.226327]  ? generic_make_request+0xca/0x290
[  663.226329]  generic_make_request+0xd7/0x290
[  663.226331]  submit_bio+0x5f/0x120
[  663.226333]  ? trace_hardirqs_on+0xd/0x10
[  663.226334]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.226337]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.226338]  ? _raw_spin_unlock+0x22/0x30
[  663.226341]  ? ext4_get_block_trans+0xe0/0xe0
[  663.226343]  __blockdev_direct_IO+0x35/0x40
[  663.226345]  ext4_direct_IO+0x19c/0x7b0
[  663.226347]  generic_file_direct_write+0xa6/0x150
[  663.226349]  __generic_file_write_iter+0xbb/0x1c0
[  663.226350]  ext4_file_write_iter+0x77/0x360
[  663.226352]  ? __sb_start_write+0xde/0x200
[  663.226354]  ? aio_write+0x14e/0x160
[  663.226355]  aio_write+0xd1/0x160
[  663.226357]  ? __might_fault+0x3e/0x90
[  663.226358]  do_io_submit+0x37d/0x900
[  663.226360]  ? do_io_submit+0x1ac/0x900
[  663.226362]  SyS_io_submit+0xb/0x10
[  663.226363]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226364] RIP: 0033:0x7fc269d02787
[  663.226365] RSP: 002b:00007fc246e69948 EFLAGS: 00000202 ORIG_RAX: 00000000000000d1
[  663.226367] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d02787
[  663.226368] RDX: 00007fc1e004f188 RSI: 0000000000000001 RDI: 00007fc27bc04000
[  663.226369] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc1e004d2a0
[  663.226370] R10: 00007fc1e0050000 R11: 0000000000000202 R12: 0000000000000020
[  663.226371] R13: 00007fc1e004ffe0 R14: 0000000000001000 R15: 0000000000000080
[  663.226374] kworker/10:4    D    0  9296      2 0x00000000
[  663.226377] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226378] Call Trace:
[  663.226381]  __schedule+0x2da/0xb00
[  663.226383]  ? bit_wait+0x50/0x50
[  663.226384]  schedule+0x38/0x90
[  663.226386]  schedule_timeout+0x2fe/0x640
[  663.226388]  ? mark_held_locks+0x6f/0xa0
[  663.226390]  ? ktime_get+0x74/0x130
[  663.226392]  ? bit_wait+0x50/0x50
[  663.226393]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226395]  ? trace_hardirqs_on+0xd/0x10
[  663.226396]  ? ktime_get+0x98/0x130
[  663.226398]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226400]  ? bit_wait+0x50/0x50
[  663.226401]  io_schedule_timeout+0x9f/0x110
[  663.226404]  bit_wait_io+0x16/0x60
[  663.226406]  __wait_on_bit+0x53/0x80
[  663.226407]  ? bit_wait+0x50/0x50
[  663.226409]  out_of_line_wait_on_bit+0x6e/0x80
[  663.226411]  ? prepare_to_wait_event+0x170/0x170
[  663.226413]  sync_mapping_buffers+0x22f/0x390
[  663.226415]  __generic_file_fsync+0x4d/0x90
[  663.226418]  ext4_sync_file+0x2b4/0x540
[  663.226420]  vfs_fsync_range+0x46/0xa0
[  663.226421]  dio_complete+0x181/0x1b0
[  663.226423]  dio_aio_complete_work+0x17/0x20
[  663.226424]  process_one_work+0x208/0x6a0
[  663.226425]  ? process_one_work+0x18d/0x6a0
[  663.226427]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226429]  worker_thread+0x49/0x4a0
[  663.226431]  kthread+0x107/0x140
[  663.226432]  ? process_one_work+0x6a0/0x6a0
[  663.226434]  ? kthread_create_on_node+0x40/0x40
[  663.226436]  ret_from_fork+0x2e/0x40
[  663.226438] kworker/10:5    D    0  9297      2 0x00000000
[  663.226441] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226442] Call Trace:
[  663.226444]  __schedule+0x2da/0xb00
[  663.226446]  schedule+0x38/0x90
[  663.226448]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226450]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226452]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226454]  call_rwsem_down_write_failed+0x17/0x30
[  663.226457]  down_write+0x5a/0x70
[  663.226458]  ? __generic_file_fsync+0x43/0x90
[  663.226460]  __generic_file_fsync+0x43/0x90
[  663.226462]  ext4_sync_file+0x2b4/0x540
[  663.226463]  vfs_fsync_range+0x46/0xa0
[  663.226465]  dio_complete+0x181/0x1b0
[  663.226466]  dio_aio_complete_work+0x17/0x20
[  663.226468]  process_one_work+0x208/0x6a0
[  663.226470]  ? process_one_work+0x18d/0x6a0
[  663.226472]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226473]  worker_thread+0x49/0x4a0
[  663.226475]  kthread+0x107/0x140
[  663.226477]  ? process_one_work+0x6a0/0x6a0
[  663.226479]  ? kthread_create_on_node+0x40/0x40
[  663.226480]  ret_from_fork+0x2e/0x40
[  663.226482] kworker/10:6    D    0  9298      2 0x00000000
[  663.226485] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226486] Call Trace:
[  663.226488]  __schedule+0x2da/0xb00
[  663.226490]  schedule+0x38/0x90
[  663.226492]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226494]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226496]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226498]  call_rwsem_down_write_failed+0x17/0x30
[  663.226500]  down_write+0x5a/0x70
[  663.226503]  ? __generic_file_fsync+0x43/0x90
[  663.226504]  __generic_file_fsync+0x43/0x90
[  663.226506]  ext4_sync_file+0x2b4/0x540
[  663.226508]  vfs_fsync_range+0x46/0xa0
[  663.226509]  dio_complete+0x181/0x1b0
[  663.226510]  dio_aio_complete_work+0x17/0x20
[  663.226512]  process_one_work+0x208/0x6a0
[  663.226513]  ? process_one_work+0x18d/0x6a0
[  663.226515]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226517]  worker_thread+0x49/0x4a0
[  663.226519]  kthread+0x107/0x140
[  663.226521]  ? process_one_work+0x6a0/0x6a0
[  663.226522]  ? kthread_create_on_node+0x40/0x40
[  663.226524]  ret_from_fork+0x2e/0x40
[  663.226525] kworker/10:7    D    0  9299      2 0x00000000
[  663.226528] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226529] Call Trace:
[  663.226531]  __schedule+0x2da/0xb00
[  663.226533]  schedule+0x38/0x90
[  663.226535]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226537]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226539]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226541]  call_rwsem_down_write_failed+0x17/0x30
[  663.226543]  down_write+0x5a/0x70
[  663.226545]  ? __generic_file_fsync+0x43/0x90
[  663.226546]  __generic_file_fsync+0x43/0x90
[  663.226548]  ext4_sync_file+0x2b4/0x540
[  663.226549]  vfs_fsync_range+0x46/0xa0
[  663.226550]  dio_complete+0x181/0x1b0
[  663.226552]  dio_aio_complete_work+0x17/0x20
[  663.226553]  process_one_work+0x208/0x6a0
[  663.226554]  ? process_one_work+0x18d/0x6a0
[  663.226556]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226557]  worker_thread+0x49/0x4a0
[  663.226559]  kthread+0x107/0x140
[  663.226560]  ? process_one_work+0x6a0/0x6a0
[  663.226562]  ? kthread_create_on_node+0x40/0x40
[  663.226563]  ret_from_fork+0x2e/0x40
[  663.226565] kworker/10:8    D    0  9300      2 0x00000000
[  663.226568] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226569] Call Trace:
[  663.226571]  __schedule+0x2da/0xb00
[  663.226573]  schedule+0x38/0x90
[  663.226575]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226577]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226578]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226581]  call_rwsem_down_write_failed+0x17/0x30
[  663.226583]  down_write+0x5a/0x70
[  663.226584]  ? __generic_file_fsync+0x43/0x90
[  663.226586]  __generic_file_fsync+0x43/0x90
[  663.226588]  ext4_sync_file+0x2b4/0x540
[  663.226589]  vfs_fsync_range+0x46/0xa0
[  663.226591]  dio_complete+0x181/0x1b0
[  663.226592]  dio_aio_complete_work+0x17/0x20
[  663.226594]  process_one_work+0x208/0x6a0
[  663.226595]  ? process_one_work+0x18d/0x6a0
[  663.226597]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226599]  worker_thread+0x49/0x4a0
[  663.226601]  kthread+0x107/0x140
[  663.226602]  ? process_one_work+0x6a0/0x6a0
[  663.226604]  ? kthread_create_on_node+0x40/0x40
[  663.226606]  ret_from_fork+0x2e/0x40
[  663.226607] kworker/10:9    D    0  9301      2 0x00000000
[  663.226610] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226611] Call Trace:
[  663.226613]  __schedule+0x2da/0xb00
[  663.226615]  schedule+0x38/0x90
[  663.226617]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226619]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226621]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226624]  call_rwsem_down_write_failed+0x17/0x30
[  663.226626]  down_write+0x5a/0x70
[  663.226628]  ? __generic_file_fsync+0x43/0x90
[  663.226629]  __generic_file_fsync+0x43/0x90
[  663.226632]  ext4_sync_file+0x2b4/0x540
[  663.226633]  vfs_fsync_range+0x46/0xa0
[  663.226635]  dio_complete+0x181/0x1b0
[  663.226636]  dio_aio_complete_work+0x17/0x20
[  663.226638]  process_one_work+0x208/0x6a0
[  663.226639]  ? process_one_work+0x18d/0x6a0
[  663.226641]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226642]  worker_thread+0x49/0x4a0
[  663.226644]  kthread+0x107/0x140
[  663.226646]  ? process_one_work+0x6a0/0x6a0
[  663.226647]  ? kthread_create_on_node+0x40/0x40
[  663.226649]  ret_from_fork+0x2e/0x40
[  663.226650] kworker/10:10   D    0  9302      2 0x00000000
[  663.226653] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226654] Call Trace:
[  663.226656]  __schedule+0x2da/0xb00
[  663.226658]  schedule+0x38/0x90
[  663.226660]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226662]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226664]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226667]  call_rwsem_down_write_failed+0x17/0x30
[  663.226669]  down_write+0x5a/0x70
[  663.226671]  ? __generic_file_fsync+0x43/0x90
[  663.226673]  __generic_file_fsync+0x43/0x90
[  663.226674]  ext4_sync_file+0x2b4/0x540
[  663.226676]  vfs_fsync_range+0x46/0xa0
[  663.226677]  dio_complete+0x181/0x1b0
[  663.226678]  dio_aio_complete_work+0x17/0x20
[  663.226680]  process_one_work+0x208/0x6a0
[  663.226681]  ? process_one_work+0x18d/0x6a0
[  663.226683]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226684]  worker_thread+0x49/0x4a0
[  663.226686]  kthread+0x107/0x140
[  663.226687]  ? process_one_work+0x6a0/0x6a0
[  663.226688]  ? kthread_create_on_node+0x40/0x40
[  663.226690]  ret_from_fork+0x2e/0x40
[  663.226692] kworker/10:11   D    0  9303      2 0x00000000
[  663.226695] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226696] Call Trace:
[  663.226698]  __schedule+0x2da/0xb00
[  663.226700]  schedule+0x38/0x90
[  663.226702]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226704]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226706]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226708]  call_rwsem_down_write_failed+0x17/0x30
[  663.226710]  down_write+0x5a/0x70
[  663.226712]  ? __generic_file_fsync+0x43/0x90
[  663.226713]  __generic_file_fsync+0x43/0x90
[  663.226715]  ext4_sync_file+0x2b4/0x540
[  663.226717]  vfs_fsync_range+0x46/0xa0
[  663.226718]  dio_complete+0x181/0x1b0
[  663.226719]  dio_aio_complete_work+0x17/0x20
[  663.226721]  process_one_work+0x208/0x6a0
[  663.226722]  ? process_one_work+0x18d/0x6a0
[  663.226724]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226726]  worker_thread+0x49/0x4a0
[  663.226728]  kthread+0x107/0x140
[  663.226729]  ? process_one_work+0x6a0/0x6a0
[  663.226731]  ? kthread_create_on_node+0x40/0x40
[  663.226732]  ret_from_fork+0x2e/0x40
[  663.226734] kworker/10:12   D    0  9304      2 0x00000000
[  663.226737] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226738] Call Trace:
[  663.226740]  __schedule+0x2da/0xb00
[  663.226742]  schedule+0x38/0x90
[  663.226744]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226746]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226748]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226751]  call_rwsem_down_write_failed+0x17/0x30
[  663.226753]  down_write+0x5a/0x70
[  663.226755]  ? __generic_file_fsync+0x43/0x90
[  663.226757]  __generic_file_fsync+0x43/0x90
[  663.226758]  ext4_sync_file+0x2b4/0x540
[  663.226760]  vfs_fsync_range+0x46/0xa0
[  663.226761]  dio_complete+0x181/0x1b0
[  663.226763]  dio_aio_complete_work+0x17/0x20
[  663.226764]  process_one_work+0x208/0x6a0
[  663.226765]  ? process_one_work+0x18d/0x6a0
[  663.226767]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226769]  worker_thread+0x49/0x4a0
[  663.226771]  kthread+0x107/0x140
[  663.226772]  ? process_one_work+0x6a0/0x6a0
[  663.226773]  ? kthread_create_on_node+0x40/0x40
[  663.226775]  ret_from_fork+0x2e/0x40
[  663.226777] kworker/10:13   D    0  9305      2 0x00000000
[  663.226779] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226781] Call Trace:
[  663.226783]  __schedule+0x2da/0xb00
[  663.226785]  ? bit_wait+0x50/0x50
[  663.226787]  schedule+0x38/0x90
[  663.226789]  schedule_timeout+0x2fe/0x640
[  663.226790]  ? mark_held_locks+0x6f/0xa0
[  663.226792]  ? ktime_get+0x74/0x130
[  663.226794]  ? bit_wait+0x50/0x50
[  663.226795]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226796]  ? trace_hardirqs_on+0xd/0x10
[  663.226798]  ? ktime_get+0x98/0x130
[  663.226800]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226801]  ? bit_wait+0x50/0x50
[  663.226803]  io_schedule_timeout+0x9f/0x110
[  663.226805]  bit_wait_io+0x16/0x60
[  663.226808]  __wait_on_bit+0x53/0x80
[  663.226810]  ? bit_wait+0x50/0x50
[  663.226811]  out_of_line_wait_on_bit+0x6e/0x80
[  663.226813]  ? prepare_to_wait_event+0x170/0x170
[  663.226816]  sync_mapping_buffers+0x22f/0x390
[  663.226818]  __generic_file_fsync+0x4d/0x90
[  663.226820]  ext4_sync_file+0x2b4/0x540
[  663.226822]  vfs_fsync_range+0x46/0xa0
[  663.226823]  dio_complete+0x181/0x1b0
[  663.226824]  dio_aio_complete_work+0x17/0x20
[  663.226826]  process_one_work+0x208/0x6a0
[  663.226827]  ? process_one_work+0x18d/0x6a0
[  663.226829]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226830]  worker_thread+0x49/0x4a0
[  663.226832]  kthread+0x107/0x140
[  663.226834]  ? process_one_work+0x6a0/0x6a0
[  663.226835]  ? kthread_create_on_node+0x40/0x40
[  663.226837]  ret_from_fork+0x2e/0x40
[  663.226838] kworker/10:14   D    0  9306      2 0x00000000
[  663.226841] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226842] Call Trace:
[  663.226844]  __schedule+0x2da/0xb00
[  663.226846]  schedule+0x38/0x90
[  663.226848]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226851]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226853]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226855]  ? trace_hardirqs_on+0xd/0x10
[  663.226857]  call_rwsem_down_write_failed+0x17/0x30
[  663.226859]  down_write+0x5a/0x70
[  663.226861]  ? __generic_file_fsync+0x43/0x90
[  663.226863]  __generic_file_fsync+0x43/0x90
[  663.226864]  ext4_sync_file+0x2b4/0x540
[  663.226866]  vfs_fsync_range+0x46/0xa0
[  663.226867]  dio_complete+0x181/0x1b0
[  663.226869]  dio_aio_complete_work+0x17/0x20
[  663.226870]  process_one_work+0x208/0x6a0
[  663.226872]  ? process_one_work+0x18d/0x6a0
[  663.226874]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226875]  worker_thread+0x49/0x4a0
[  663.226877]  kthread+0x107/0x140
[  663.226878]  ? process_one_work+0x6a0/0x6a0
[  663.226880]  ? kthread_create_on_node+0x40/0x40
[  663.226881]  ret_from_fork+0x2e/0x40
[  663.226883] kworker/10:15   D    0  9307      2 0x00000000
[  663.226885] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226886] Call Trace:
[  663.226888]  __schedule+0x2da/0xb00
[  663.226890]  schedule+0x38/0x90
[  663.226892]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226894]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226896]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226898]  call_rwsem_down_write_failed+0x17/0x30
[  663.226900]  down_write+0x5a/0x70
[  663.226901]  ? __generic_file_fsync+0x43/0x90
[  663.226903]  __generic_file_fsync+0x43/0x90
[  663.226905]  ext4_sync_file+0x2b4/0x540
[  663.226906]  vfs_fsync_range+0x46/0xa0
[  663.226907]  dio_complete+0x181/0x1b0
[  663.226909]  dio_aio_complete_work+0x17/0x20
[  663.226910]  process_one_work+0x208/0x6a0
[  663.226912]  ? process_one_work+0x18d/0x6a0
[  663.226914]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226916]  worker_thread+0x49/0x4a0
[  663.226917]  kthread+0x107/0x140
[  663.226918]  ? process_one_work+0x6a0/0x6a0
[  663.226920]  ? kthread_create_on_node+0x40/0x40
[  663.226921]  ret_from_fork+0x2e/0x40
[  663.226923] kworker/10:16   D    0  9308      2 0x00000000
[  663.226926] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226927] Call Trace:
[  663.226929]  __schedule+0x2da/0xb00
[  663.226931]  schedule+0x38/0x90
[  663.226933]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226935]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226937]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226939]  call_rwsem_down_write_failed+0x17/0x30
[  663.226941]  down_write+0x5a/0x70
[  663.226943]  ? __generic_file_fsync+0x43/0x90
[  663.226945]  __generic_file_fsync+0x43/0x90
[  663.226946]  ext4_sync_file+0x2b4/0x540
[  663.226948]  vfs_fsync_range+0x46/0xa0
[  663.226949]  dio_complete+0x181/0x1b0
[  663.226951]  dio_aio_complete_work+0x17/0x20
[  663.226952]  process_one_work+0x208/0x6a0
[  663.226954]  ? process_one_work+0x18d/0x6a0
[  663.226956]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226957]  worker_thread+0x49/0x4a0
[  663.226959]  kthread+0x107/0x140
[  663.226960]  ? process_one_work+0x6a0/0x6a0
[  663.226962]  ? kthread_create_on_node+0x40/0x40
[  663.226964]  ret_from_fork+0x2e/0x40
[  663.226965] kworker/10:17   D    0  9309      2 0x00000000
[  663.226968] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226969] Call Trace:
[  663.226971]  __schedule+0x2da/0xb00
[  663.226973]  schedule+0x38/0x90
[  663.226975]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226978]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226980]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226982]  call_rwsem_down_write_failed+0x17/0x30
[  663.226984]  down_write+0x5a/0x70
[  663.226986]  ? __generic_file_fsync+0x43/0x90
[  663.226988]  __generic_file_fsync+0x43/0x90
[  663.226989]  ext4_sync_file+0x2b4/0x540
[  663.226992]  vfs_fsync_range+0x46/0xa0
[  663.226993]  dio_complete+0x181/0x1b0
[  663.226994]  dio_aio_complete_work+0x17/0x20
[  663.226995]  process_one_work+0x208/0x6a0
[  663.226996]  ? process_one_work+0x18d/0x6a0
[  663.226998]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227000]  worker_thread+0x49/0x4a0
[  663.227001]  kthread+0x107/0x140
[  663.227002]  ? process_one_work+0x6a0/0x6a0
[  663.227004]  ? kthread_create_on_node+0x40/0x40
[  663.227005]  ret_from_fork+0x2e/0x40
[  663.227007] kworker/10:18   D    0  9310      2 0x00000000
[  663.227010] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227011] Call Trace:
[  663.227013]  __schedule+0x2da/0xb00
[  663.227015]  schedule+0x38/0x90
[  663.227017]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227019]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227020]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227023]  call_rwsem_down_write_failed+0x17/0x30
[  663.227025]  down_write+0x5a/0x70
[  663.227027]  ? __generic_file_fsync+0x43/0x90
[  663.227028]  __generic_file_fsync+0x43/0x90
[  663.227031]  ext4_sync_file+0x2b4/0x540
[  663.227032]  vfs_fsync_range+0x46/0xa0
[  663.227034]  dio_complete+0x181/0x1b0
[  663.227035]  dio_aio_complete_work+0x17/0x20
[  663.227036]  process_one_work+0x208/0x6a0
[  663.227037]  ? process_one_work+0x18d/0x6a0
[  663.227039]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227041]  worker_thread+0x49/0x4a0
[  663.227043]  kthread+0x107/0x140
[  663.227044]  ? process_one_work+0x6a0/0x6a0
[  663.227046]  ? kthread_create_on_node+0x40/0x40
[  663.227047]  ret_from_fork+0x2e/0x40
[  663.227049] kworker/10:19   D    0  9311      2 0x00000000
[  663.227052] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227053] Call Trace:
[  663.227055]  __schedule+0x2da/0xb00
[  663.227057]  schedule+0x38/0x90
[  663.227059]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227061]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227063]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227066]  call_rwsem_down_write_failed+0x17/0x30
[  663.227068]  down_write+0x5a/0x70
[  663.227070]  ? __generic_file_fsync+0x43/0x90
[  663.227072]  __generic_file_fsync+0x43/0x90
[  663.227074]  ext4_sync_file+0x2b4/0x540
[  663.227075]  vfs_fsync_range+0x46/0xa0
[  663.227077]  dio_complete+0x181/0x1b0
[  663.227078]  dio_aio_complete_work+0x17/0x20
[  663.227079]  process_one_work+0x208/0x6a0
[  663.227080]  ? process_one_work+0x18d/0x6a0
[  663.227082]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227083]  worker_thread+0x49/0x4a0
[  663.227085]  kthread+0x107/0x140
[  663.227087]  ? process_one_work+0x6a0/0x6a0
[  663.227088]  ? kthread_create_on_node+0x40/0x40
[  663.227090]  ret_from_fork+0x2e/0x40
[  663.227092] kworker/10:20   D    0  9312      2 0x00000000
[  663.227094] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227095] Call Trace:
[  663.227098]  __schedule+0x2da/0xb00
[  663.227099]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.227101]  schedule+0x38/0x90
[  663.227103]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227105]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227106]  ? trace_hardirqs_on+0xd/0x10
[  663.227108]  call_rwsem_down_write_failed+0x17/0x30
[  663.227111]  down_write+0x5a/0x70
[  663.227112]  ? __generic_file_fsync+0x43/0x90
[  663.227114]  __generic_file_fsync+0x43/0x90
[  663.227115]  ext4_sync_file+0x2b4/0x540
[  663.227117]  vfs_fsync_range+0x46/0xa0
[  663.227118]  dio_complete+0x181/0x1b0
[  663.227119]  dio_aio_complete_work+0x17/0x20
[  663.227121]  process_one_work+0x208/0x6a0
[  663.227122]  ? process_one_work+0x18d/0x6a0
[  663.227123]  worker_thread+0x49/0x4a0
[  663.227125]  kthread+0x107/0x140
[  663.227126]  ? process_one_work+0x6a0/0x6a0
[  663.227128]  ? kthread_create_on_node+0x40/0x40
[  663.227129]  ret_from_fork+0x2e/0x40
[  663.227131] kworker/10:21   D    0  9313      2 0x00000000
[  663.227134] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227135] Call Trace:
[  663.227137]  __schedule+0x2da/0xb00
[  663.227139]  schedule+0x38/0x90
[  663.227141]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227143]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227145]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227147]  call_rwsem_down_write_failed+0x17/0x30
[  663.227149]  down_write+0x5a/0x70
[  663.227151]  ? __generic_file_fsync+0x43/0x90
[  663.227153]  __generic_file_fsync+0x43/0x90
[  663.227155]  ext4_sync_file+0x2b4/0x540
[  663.227156]  vfs_fsync_range+0x46/0xa0
[  663.227157]  dio_complete+0x181/0x1b0
[  663.227159]  dio_aio_complete_work+0x17/0x20
[  663.227160]  process_one_work+0x208/0x6a0
[  663.227161]  ? process_one_work+0x18d/0x6a0
[  663.227163]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227165]  worker_thread+0x49/0x4a0
[  663.227167]  kthread+0x107/0x140
[  663.227168]  ? process_one_work+0x6a0/0x6a0
[  663.227170]  ? kthread_create_on_node+0x40/0x40
[  663.227171]  ret_from_fork+0x2e/0x40
[  663.227173] kworker/10:22   D    0  9314      2 0x00000000
[  663.227176] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227177] Call Trace:
[  663.227179]  __schedule+0x2da/0xb00
[  663.227181]  schedule+0x38/0x90
[  663.227183]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227185]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227187]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227190]  call_rwsem_down_write_failed+0x17/0x30
[  663.227192]  down_write+0x5a/0x70
[  663.227194]  ? __generic_file_fsync+0x43/0x90
[  663.227195]  __generic_file_fsync+0x43/0x90
[  663.227197]  ext4_sync_file+0x2b4/0x540
[  663.227199]  vfs_fsync_range+0x46/0xa0
[  663.227200]  dio_complete+0x181/0x1b0
[  663.227202]  dio_aio_complete_work+0x17/0x20
[  663.227203]  process_one_work+0x208/0x6a0
[  663.227204]  ? process_one_work+0x18d/0x6a0
[  663.227206]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227207]  worker_thread+0x49/0x4a0
[  663.227209]  kthread+0x107/0x140
[  663.227210]  ? process_one_work+0x6a0/0x6a0
[  663.227212]  ? kthread_create_on_node+0x40/0x40
[  663.227213]  ret_from_fork+0x2e/0x40
[  663.227215] kworker/10:23   D    0  9315      2 0x00000000
[  663.227218] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227219] Call Trace:
[  663.227221]  __schedule+0x2da/0xb00
[  663.227223]  schedule+0x38/0x90
[  663.227225]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227226]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227228]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227231]  call_rwsem_down_write_failed+0x17/0x30
[  663.227233]  down_write+0x5a/0x70
[  663.227234]  ? __generic_file_fsync+0x43/0x90
[  663.227236]  __generic_file_fsync+0x43/0x90
[  663.227237]  ext4_sync_file+0x2b4/0x540
[  663.227239]  vfs_fsync_range+0x46/0xa0
[  663.227240]  dio_complete+0x181/0x1b0
[  663.227241]  dio_aio_complete_work+0x17/0x20
[  663.227242]  process_one_work+0x208/0x6a0
[  663.227243]  ? process_one_work+0x18d/0x6a0
[  663.227246]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227247]  worker_thread+0x49/0x4a0
[  663.227249]  kthread+0x107/0x140
[  663.227250]  ? process_one_work+0x6a0/0x6a0
[  663.227252]  ? kthread_create_on_node+0x40/0x40
[  663.227253]  ret_from_fork+0x2e/0x40
[  663.227255] kworker/10:24   D    0  9316      2 0x00000000
[  663.227257] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227258] Call Trace:
[  663.227260]  __schedule+0x2da/0xb00
[  663.227262]  schedule+0x38/0x90
[  663.227264]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227265]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227267]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227269]  ? trace_hardirqs_on+0xd/0x10
[  663.227271]  call_rwsem_down_write_failed+0x17/0x30
[  663.227273]  down_write+0x5a/0x70
[  663.227274]  ? __generic_file_fsync+0x43/0x90
[  663.227276]  __generic_file_fsync+0x43/0x90
[  663.227278]  ext4_sync_file+0x2b4/0x540
[  663.227280]  vfs_fsync_range+0x46/0xa0
[  663.227281]  dio_complete+0x181/0x1b0
[  663.227282]  dio_aio_complete_work+0x17/0x20
[  663.227284]  process_one_work+0x208/0x6a0
[  663.227285]  ? process_one_work+0x18d/0x6a0
[  663.227287]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227288]  worker_thread+0x49/0x4a0
[  663.227290]  kthread+0x107/0x140
[  663.227291]  ? process_one_work+0x6a0/0x6a0
[  663.227293]  ? kthread_create_on_node+0x40/0x40
[  663.227295]  ret_from_fork+0x2e/0x40
[  663.227296] kworker/10:25   D    0  9317      2 0x00000000
[  663.227299] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227300] Call Trace:
[  663.227302]  __schedule+0x2da/0xb00
[  663.227304]  schedule+0x38/0x90
[  663.227306]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227309]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227311]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227314]  call_rwsem_down_write_failed+0x17/0x30
[  663.227316]  down_write+0x5a/0x70
[  663.227318]  ? __generic_file_fsync+0x43/0x90
[  663.227320]  __generic_file_fsync+0x43/0x90
[  663.227321]  ext4_sync_file+0x2b4/0x540
[  663.227323]  vfs_fsync_range+0x46/0xa0
[  663.227324]  dio_complete+0x181/0x1b0
[  663.227326]  dio_aio_complete_work+0x17/0x20
[  663.227327]  process_one_work+0x208/0x6a0
[  663.227328]  ? process_one_work+0x18d/0x6a0
[  663.227330]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227332]  worker_thread+0x49/0x4a0
[  663.227334]  kthread+0x107/0x140
[  663.227335]  ? process_one_work+0x6a0/0x6a0
[  663.227337]  ? kthread_create_on_node+0x40/0x40
[  663.227338]  ret_from_fork+0x2e/0x40
[  663.227340] kworker/10:26   D    0  9318      2 0x00000000
[  663.227342] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227343] Call Trace:
[  663.227345]  __schedule+0x2da/0xb00
[  663.227347]  schedule+0x38/0x90
[  663.227349]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227351]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227353]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227356]  call_rwsem_down_write_failed+0x17/0x30
[  663.227358]  down_write+0x5a/0x70
[  663.227359]  ? __generic_file_fsync+0x43/0x90
[  663.227361]  __generic_file_fsync+0x43/0x90
[  663.227363]  ext4_sync_file+0x2b4/0x540
[  663.227365]  vfs_fsync_range+0x46/0xa0
[  663.227366]  dio_complete+0x181/0x1b0
[  663.227367]  dio_aio_complete_work+0x17/0x20
[  663.227368]  process_one_work+0x208/0x6a0
[  663.227369]  ? process_one_work+0x18d/0x6a0
[  663.227371]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227373]  worker_thread+0x49/0x4a0
[  663.227374]  kthread+0x107/0x140
[  663.227376]  ? process_one_work+0x6a0/0x6a0
[  663.227377]  ? kthread_create_on_node+0x40/0x40
[  663.227378]  ret_from_fork+0x2e/0x40
[  663.227380] kworker/10:27   D    0  9319      2 0x00000000
[  663.227383] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227383] Call Trace:
[  663.227385]  __schedule+0x2da/0xb00
[  663.227387]  schedule+0x38/0x90
[  663.227389]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227391]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227393]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227395]  call_rwsem_down_write_failed+0x17/0x30
[  663.227397]  down_write+0x5a/0x70
[  663.227400]  ? __generic_file_fsync+0x43/0x90
[  663.227401]  __generic_file_fsync+0x43/0x90
[  663.227403]  ext4_sync_file+0x2b4/0x540
[  663.227404]  vfs_fsync_range+0x46/0xa0
[  663.227406]  dio_complete+0x181/0x1b0
[  663.227407]  dio_aio_complete_work+0x17/0x20
[  663.227409]  process_one_work+0x208/0x6a0
[  663.227410]  ? process_one_work+0x18d/0x6a0
[  663.227412]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227414]  worker_thread+0x49/0x4a0
[  663.227416]  kthread+0x107/0x140
[  663.227417]  ? process_one_work+0x6a0/0x6a0
[  663.227419]  ? kthread_create_on_node+0x40/0x40
[  663.227420]  ret_from_fork+0x2e/0x40
[  663.227422] kworker/10:28   D    0  9320      2 0x00000000
[  663.227425] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227426] Call Trace:
[  663.227428]  __schedule+0x2da/0xb00
[  663.227430]  schedule+0x38/0x90
[  663.227432]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227435]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227437]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227440]  call_rwsem_down_write_failed+0x17/0x30
[  663.227442]  down_write+0x5a/0x70
[  663.227443]  ? __generic_file_fsync+0x43/0x90
[  663.227445]  __generic_file_fsync+0x43/0x90
[  663.227447]  ext4_sync_file+0x2b4/0x540
[  663.227448]  vfs_fsync_range+0x46/0xa0
[  663.227449]  dio_complete+0x181/0x1b0
[  663.227451]  dio_aio_complete_work+0x17/0x20
[  663.227452]  process_one_work+0x208/0x6a0
[  663.227454]  ? process_one_work+0x18d/0x6a0
[  663.227455]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227457]  worker_thread+0x49/0x4a0
[  663.227459]  kthread+0x107/0x140
[  663.227460]  ? process_one_work+0x6a0/0x6a0
[  663.227462]  ? kthread_create_on_node+0x40/0x40
[  663.227463]  ret_from_fork+0x2e/0x40
[  663.227466] kworker/10:29   D    0  9321      2 0x00000000
[  663.227468] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227469] Call Trace:
[  663.227471]  __schedule+0x2da/0xb00
[  663.227473]  schedule+0x38/0x90
[  663.227475]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227477]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227479]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227481]  call_rwsem_down_write_failed+0x17/0x30
[  663.227483]  down_write+0x5a/0x70
[  663.227485]  ? __generic_file_fsync+0x43/0x90
[  663.227486]  __generic_file_fsync+0x43/0x90
[  663.227488]  ext4_sync_file+0x2b4/0x540
[  663.227489]  vfs_fsync_range+0x46/0xa0
[  663.227490]  dio_complete+0x181/0x1b0
[  663.227492]  dio_aio_complete_work+0x17/0x20
[  663.227493]  process_one_work+0x208/0x6a0
[  663.227494]  ? process_one_work+0x18d/0x6a0
[  663.227496]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227497]  worker_thread+0x49/0x4a0
[  663.227499]  kthread+0x107/0x140
[  663.227501]  ? process_one_work+0x6a0/0x6a0
[  663.227502]  ? kthread_create_on_node+0x40/0x40
[  663.227504]  ret_from_fork+0x2e/0x40
[  663.227505] kworker/10:30   D    0  9322      2 0x00000000
[  663.227508] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227508] Call Trace:
[  663.227510]  __schedule+0x2da/0xb00
[  663.227512]  schedule+0x38/0x90
[  663.227514]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227516]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227518]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227520]  ? trace_hardirqs_on+0xd/0x10
[  663.227523]  call_rwsem_down_write_failed+0x17/0x30
[  663.227525]  down_write+0x5a/0x70
[  663.227527]  ? __generic_file_fsync+0x43/0x90
[  663.227528]  __generic_file_fsync+0x43/0x90
[  663.227530]  ext4_sync_file+0x2b4/0x540
[  663.227532]  vfs_fsync_range+0x46/0xa0
[  663.227533]  dio_complete+0x181/0x1b0
[  663.227535]  dio_aio_complete_work+0x17/0x20
[  663.227536]  process_one_work+0x208/0x6a0
[  663.227537]  ? process_one_work+0x18d/0x6a0
[  663.227539]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227540]  worker_thread+0x49/0x4a0
[  663.227542]  kthread+0x107/0x140
[  663.227544]  ? process_one_work+0x6a0/0x6a0
[  663.227545]  ? kthread_create_on_node+0x40/0x40
[  663.227547]  ret_from_fork+0x2e/0x40
[  663.227549] kworker/10:31   D    0  9323      2 0x00000000
[  663.227551] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227552] Call Trace:
[  663.227555]  __schedule+0x2da/0xb00
[  663.227556]  schedule+0x38/0x90
[  663.227559]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227561]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227563]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227565]  call_rwsem_down_write_failed+0x17/0x30
[  663.227568]  down_write+0x5a/0x70
[  663.227569]  ? __generic_file_fsync+0x43/0x90
[  663.227571]  __generic_file_fsync+0x43/0x90
[  663.227572]  ext4_sync_file+0x2b4/0x540
[  663.227574]  vfs_fsync_range+0x46/0xa0
[  663.227575]  dio_complete+0x181/0x1b0
[  663.227577]  dio_aio_complete_work+0x17/0x20
[  663.227578]  process_one_work+0x208/0x6a0
[  663.227579]  ? process_one_work+0x18d/0x6a0
[  663.227581]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227582]  worker_thread+0x49/0x4a0
[  663.227584]  kthread+0x107/0x140
[  663.227586]  ? process_one_work+0x6a0/0x6a0
[  663.227587]  ? kthread_create_on_node+0x40/0x40
[  663.227589]  ret_from_fork+0x2e/0x40
[  663.227591] kworker/10:32   D    0  9324      2 0x00000000
[  663.227593] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227594] Call Trace:
[  663.227596]  __schedule+0x2da/0xb00
[  663.227598]  schedule+0x38/0x90
[  663.227601]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227603]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227604]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227607]  call_rwsem_down_write_failed+0x17/0x30
[  663.227609]  down_write+0x5a/0x70
[  663.227610]  ? __generic_file_fsync+0x43/0x90
[  663.227612]  __generic_file_fsync+0x43/0x90
[  663.227614]  ext4_sync_file+0x2b4/0x540
[  663.227616]  vfs_fsync_range+0x46/0xa0
[  663.227617]  dio_complete+0x181/0x1b0
[  663.227618]  dio_aio_complete_work+0x17/0x20
[  663.227619]  process_one_work+0x208/0x6a0
[  663.227620]  ? process_one_work+0x18d/0x6a0
[  663.227622]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227624]  worker_thread+0x49/0x4a0
[  663.227625]  kthread+0x107/0x140
[  663.227626]  ? process_one_work+0x6a0/0x6a0
[  663.227628]  ? kthread_create_on_node+0x40/0x40
[  663.227629]  ret_from_fork+0x2e/0x40
[  663.227631] kworker/10:33   D    0  9325      2 0x00000000
[  663.227634] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227635] Call Trace:
[  663.227637]  __schedule+0x2da/0xb00
[  663.227639]  schedule+0x38/0x90
[  663.227641]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227643]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227645]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227647]  call_rwsem_down_write_failed+0x17/0x30
[  663.227649]  down_write+0x5a/0x70
[  663.227651]  ? __generic_file_fsync+0x43/0x90
[  663.227653]  __generic_file_fsync+0x43/0x90
[  663.227654]  ext4_sync_file+0x2b4/0x540
[  663.227656]  vfs_fsync_range+0x46/0xa0
[  663.227657]  dio_complete+0x181/0x1b0
[  663.227659]  dio_aio_complete_work+0x17/0x20
[  663.227660]  process_one_work+0x208/0x6a0
[  663.227661]  ? process_one_work+0x18d/0x6a0
[  663.227663]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227665]  worker_thread+0x49/0x4a0
[  663.227667]  kthread+0x107/0x140
[  663.227668]  ? process_one_work+0x6a0/0x6a0
[  663.227670]  ? kthread_create_on_node+0x40/0x40
[  663.227672]  ret_from_fork+0x2e/0x40
[  663.227673] kworker/10:34   D    0  9326      2 0x00000000
[  663.227676] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227677] Call Trace:
[  663.227679]  __schedule+0x2da/0xb00
[  663.227681]  schedule+0x38/0x90
[  663.227683]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227685]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227687]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227690]  call_rwsem_down_write_failed+0x17/0x30
[  663.227692]  down_write+0x5a/0x70
[  663.227693]  ? __generic_file_fsync+0x43/0x90
[  663.227695]  __generic_file_fsync+0x43/0x90
[  663.227697]  ext4_sync_file+0x2b4/0x540
[  663.227698]  vfs_fsync_range+0x46/0xa0
[  663.227700]  dio_complete+0x181/0x1b0
[  663.227701]  dio_aio_complete_work+0x17/0x20
[  663.227702]  process_one_work+0x208/0x6a0
[  663.227703]  ? process_one_work+0x18d/0x6a0
[  663.227705]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227707]  worker_thread+0x49/0x4a0
[  663.227709]  kthread+0x107/0x140
[  663.227710]  ? process_one_work+0x6a0/0x6a0
[  663.227711]  ? kthread_create_on_node+0x40/0x40
[  663.227713]  ret_from_fork+0x2e/0x40
[  663.227715] kworker/10:35   D    0  9327      2 0x00000000
[  663.227717] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227718] Call Trace:
[  663.227720]  __schedule+0x2da/0xb00
[  663.227722]  schedule+0x38/0x90
[  663.227724]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227726]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227728]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227730]  call_rwsem_down_write_failed+0x17/0x30
[  663.227733]  down_write+0x5a/0x70
[  663.227734]  ? __generic_file_fsync+0x43/0x90
[  663.227736]  __generic_file_fsync+0x43/0x90
[  663.227738]  ext4_sync_file+0x2b4/0x540
[  663.227739]  vfs_fsync_range+0x46/0xa0
[  663.227741]  dio_complete+0x181/0x1b0
[  663.227742]  dio_aio_complete_work+0x17/0x20
[  663.227743]  process_one_work+0x208/0x6a0
[  663.227745]  ? process_one_work+0x18d/0x6a0
[  663.227747]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227748]  worker_thread+0x49/0x4a0
[  663.227750]  kthread+0x107/0x140
[  663.227751]  ? process_one_work+0x6a0/0x6a0
[  663.227753]  ? kthread_create_on_node+0x40/0x40
[  663.227754]  ret_from_fork+0x2e/0x40
[  663.227756] kworker/10:36   D    0  9328      2 0x00000000
[  663.227759] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227760] Call Trace:
[  663.227762]  __schedule+0x2da/0xb00
[  663.227764]  schedule+0x38/0x90
[  663.227767]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227769]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227771]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227773]  call_rwsem_down_write_failed+0x17/0x30
[  663.227775]  down_write+0x5a/0x70
[  663.227777]  ? __generic_file_fsync+0x43/0x90
[  663.227779]  __generic_file_fsync+0x43/0x90
[  663.227780]  ext4_sync_file+0x2b4/0x540
[  663.227782]  vfs_fsync_range+0x46/0xa0
[  663.227783]  dio_complete+0x181/0x1b0
[  663.227785]  dio_aio_complete_work+0x17/0x20
[  663.227786]  process_one_work+0x208/0x6a0
[  663.227787]  ? process_one_work+0x18d/0x6a0
[  663.227789]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227790]  worker_thread+0x49/0x4a0
[  663.227792]  kthread+0x107/0x140
[  663.227794]  ? process_one_work+0x6a0/0x6a0
[  663.227795]  ? kthread_create_on_node+0x40/0x40
[  663.227797]  ret_from_fork+0x2e/0x40
[  663.227799] kworker/10:37   D    0  9329      2 0x00000000
[  663.227802] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227803] Call Trace:
[  663.227805]  __schedule+0x2da/0xb00
[  663.227807]  schedule+0x38/0x90
[  663.227809]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227810]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227812]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227815]  call_rwsem_down_write_failed+0x17/0x30
[  663.227817]  down_write+0x5a/0x70
[  663.227818]  ? __generic_file_fsync+0x43/0x90
[  663.227820]  __generic_file_fsync+0x43/0x90
[  663.227821]  ext4_sync_file+0x2b4/0x540
[  663.227823]  vfs_fsync_range+0x46/0xa0
[  663.227824]  dio_complete+0x181/0x1b0
[  663.227825]  dio_aio_complete_work+0x17/0x20
[  663.227826]  process_one_work+0x208/0x6a0
[  663.227828]  ? process_one_work+0x18d/0x6a0
[  663.227829]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227831]  worker_thread+0x49/0x4a0
[  663.227833]  kthread+0x107/0x140
[  663.227834]  ? process_one_work+0x6a0/0x6a0
[  663.227836]  ? kthread_create_on_node+0x40/0x40
[  663.227837]  ret_from_fork+0x2e/0x40
[  663.227838] kworker/10:38   D    0  9330      2 0x00000000
[  663.227841] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227842] Call Trace:
[  663.227844]  __schedule+0x2da/0xb00
[  663.227845]  schedule+0x38/0x90
[  663.227847]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227849]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227851]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227853]  call_rwsem_down_write_failed+0x17/0x30
[  663.227855]  down_write+0x5a/0x70
[  663.227857]  ? __generic_file_fsync+0x43/0x90
[  663.227858]  __generic_file_fsync+0x43/0x90
[  663.227860]  ext4_sync_file+0x2b4/0x540
[  663.227861]  vfs_fsync_range+0x46/0xa0
[  663.227863]  dio_complete+0x181/0x1b0
[  663.227864]  dio_aio_complete_work+0x17/0x20
[  663.227865]  process_one_work+0x208/0x6a0
[  663.227866]  ? process_one_work+0x18d/0x6a0
[  663.227868]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227869]  worker_thread+0x49/0x4a0
[  663.227871]  kthread+0x107/0x140
[  663.227872]  ? process_one_work+0x6a0/0x6a0
[  663.227874]  ? kthread_create_on_node+0x40/0x40
[  663.227876]  ret_from_fork+0x2e/0x40
[  663.227877] kworker/10:39   D    0  9331      2 0x00000000
[  663.227880] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227881] Call Trace:
[  663.227883]  __schedule+0x2da/0xb00
[  663.227885]  schedule+0x38/0x90
[  663.227887]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227889]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227891]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227894]  call_rwsem_down_write_failed+0x17/0x30
[  663.227896]  down_write+0x5a/0x70
[  663.227897]  ? __generic_file_fsync+0x43/0x90
[  663.227899]  __generic_file_fsync+0x43/0x90
[  663.227901]  ext4_sync_file+0x2b4/0x540
[  663.227902]  vfs_fsync_range+0x46/0xa0
[  663.227904]  dio_complete+0x181/0x1b0
[  663.227905]  dio_aio_complete_work+0x17/0x20
[  663.227907]  process_one_work+0x208/0x6a0
[  663.227908]  ? process_one_work+0x18d/0x6a0
[  663.227910]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227911]  worker_thread+0x49/0x4a0
[  663.227913]  kthread+0x107/0x140
[  663.227915]  ? process_one_work+0x6a0/0x6a0
[  663.227916]  ? kthread_create_on_node+0x40/0x40
[  663.227918]  ret_from_fork+0x2e/0x40
[  663.227920] kworker/10:40   D    0  9332      2 0x00000000
[  663.227922] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227923] Call Trace:
[  663.227925]  __schedule+0x2da/0xb00
[  663.227927]  schedule+0x38/0x90
[  663.227929]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227931]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227933]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227936]  call_rwsem_down_write_failed+0x17/0x30
[  663.227938]  down_write+0x5a/0x70
[  663.227939]  ? __generic_file_fsync+0x43/0x90
[  663.227941]  __generic_file_fsync+0x43/0x90
[  663.227942]  ext4_sync_file+0x2b4/0x540
[  663.227944]  vfs_fsync_range+0x46/0xa0
[  663.227945]  dio_complete+0x181/0x1b0
[  663.227947]  dio_aio_complete_work+0x17/0x20
[  663.227948]  process_one_work+0x208/0x6a0
[  663.227949]  ? process_one_work+0x18d/0x6a0
[  663.227951]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227953]  worker_thread+0x49/0x4a0
[  663.227954]  kthread+0x107/0x140
[  663.227956]  ? process_one_work+0x6a0/0x6a0
[  663.227957]  ? kthread_create_on_node+0x40/0x40
[  663.227959]  ret_from_fork+0x2e/0x40
[  663.227960] kworker/10:41   D    0  9333      2 0x00000000
[  663.227963] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227964] Call Trace:
[  663.227966]  __schedule+0x2da/0xb00
[  663.227968]  schedule+0x38/0x90
[  663.227969]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227971]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227973]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227976]  call_rwsem_down_write_failed+0x17/0x30
[  663.227978]  down_write+0x5a/0x70
[  663.227979]  ? __generic_file_fsync+0x43/0x90
[  663.227981]  __generic_file_fsync+0x43/0x90
[  663.227982]  ext4_sync_file+0x2b4/0x540
[  663.227983]  vfs_fsync_range+0x46/0xa0
[  663.227985]  dio_complete+0x181/0x1b0
[  663.227986]  dio_aio_complete_work+0x17/0x20
[  663.227987]  process_one_work+0x208/0x6a0
[  663.227988]  ? process_one_work+0x18d/0x6a0
[  663.227990]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227991]  worker_thread+0x49/0x4a0
[  663.227993]  kthread+0x107/0x140
[  663.227994]  ? process_one_work+0x6a0/0x6a0
[  663.227996]  ? kthread_create_on_node+0x40/0x40
[  663.227997]  ret_from_fork+0x2e/0x40
[  663.227999] kworker/10:42   D    0  9334      2 0x00000000
[  663.228001] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228002] Call Trace:
[  663.228004]  __schedule+0x2da/0xb00
[  663.228006]  schedule+0x38/0x90
[  663.228008]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228011]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228013]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228015]  call_rwsem_down_write_failed+0x17/0x30
[  663.228017]  down_write+0x5a/0x70
[  663.228019]  ? __generic_file_fsync+0x43/0x90
[  663.228020]  __generic_file_fsync+0x43/0x90
[  663.228022]  ext4_sync_file+0x2b4/0x540
[  663.228024]  vfs_fsync_range+0x46/0xa0
[  663.228025]  dio_complete+0x181/0x1b0
[  663.228027]  dio_aio_complete_work+0x17/0x20
[  663.228028]  process_one_work+0x208/0x6a0
[  663.228029]  ? process_one_work+0x18d/0x6a0
[  663.228031]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228033]  worker_thread+0x49/0x4a0
[  663.228035]  kthread+0x107/0x140
[  663.228036]  ? process_one_work+0x6a0/0x6a0
[  663.228038]  ? kthread_create_on_node+0x40/0x40
[  663.228039]  ret_from_fork+0x2e/0x40
[  663.228042] kworker/10:43   D    0  9335      2 0x00000000
[  663.228044] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228045] Call Trace:
[  663.228048]  __schedule+0x2da/0xb00
[  663.228050]  schedule+0x38/0x90
[  663.228052]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228054]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228056]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228058]  call_rwsem_down_write_failed+0x17/0x30
[  663.228060]  down_write+0x5a/0x70
[  663.228062]  ? __generic_file_fsync+0x43/0x90
[  663.228064]  __generic_file_fsync+0x43/0x90
[  663.228065]  ext4_sync_file+0x2b4/0x540
[  663.228067]  vfs_fsync_range+0x46/0xa0
[  663.228068]  dio_complete+0x181/0x1b0
[  663.228069]  dio_aio_complete_work+0x17/0x20
[  663.228071]  process_one_work+0x208/0x6a0
[  663.228072]  ? process_one_work+0x18d/0x6a0
[  663.228074]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228075]  worker_thread+0x49/0x4a0
[  663.228077]  kthread+0x107/0x140
[  663.228079]  ? process_one_work+0x6a0/0x6a0
[  663.228081]  ? kthread_create_on_node+0x40/0x40
[  663.228082]  ret_from_fork+0x2e/0x40
[  663.228084] kworker/10:44   D    0  9336      2 0x00000000
[  663.228087] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228088] Call Trace:
[  663.228090]  __schedule+0x2da/0xb00
[  663.228092]  schedule+0x38/0x90
[  663.228093]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228095]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228097]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228100]  call_rwsem_down_write_failed+0x17/0x30
[  663.228101]  down_write+0x5a/0x70
[  663.228103]  ? __generic_file_fsync+0x43/0x90
[  663.228104]  __generic_file_fsync+0x43/0x90
[  663.228106]  ext4_sync_file+0x2b4/0x540
[  663.228108]  vfs_fsync_range+0x46/0xa0
[  663.228109]  dio_complete+0x181/0x1b0
[  663.228110]  dio_aio_complete_work+0x17/0x20
[  663.228112]  process_one_work+0x208/0x6a0
[  663.228114]  ? process_one_work+0x18d/0x6a0
[  663.228115]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228117]  worker_thread+0x49/0x4a0
[  663.228118]  kthread+0x107/0x140
[  663.228120]  ? process_one_work+0x6a0/0x6a0
[  663.228122]  ? kthread_create_on_node+0x40/0x40
[  663.228123]  ret_from_fork+0x2e/0x40
[  663.228125] kworker/10:45   D    0  9337      2 0x00000000
[  663.228127] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228128] Call Trace:
[  663.228130]  __schedule+0x2da/0xb00
[  663.228132]  schedule+0x38/0x90
[  663.228134]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228136]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228138]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228141]  call_rwsem_down_write_failed+0x17/0x30
[  663.228143]  down_write+0x5a/0x70
[  663.228145]  ? __generic_file_fsync+0x43/0x90
[  663.228146]  __generic_file_fsync+0x43/0x90
[  663.228148]  ext4_sync_file+0x2b4/0x540
[  663.228149]  vfs_fsync_range+0x46/0xa0
[  663.228151]  dio_complete+0x181/0x1b0
[  663.228152]  dio_aio_complete_work+0x17/0x20
[  663.228153]  process_one_work+0x208/0x6a0
[  663.228155]  ? process_one_work+0x18d/0x6a0
[  663.228157]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228158]  worker_thread+0x49/0x4a0
[  663.228160]  kthread+0x107/0x140
[  663.228162]  ? process_one_work+0x6a0/0x6a0
[  663.228164]  ? kthread_create_on_node+0x40/0x40
[  663.228166]  ? ___slab_alloc+0x4db/0x520
[  663.228168]  ? mempool_alloc_slab+0x10/0x20
[  663.228169]  ret_from_fork+0x2e/0x40
[  663.228171] kworker/10:46   D    0  9338      2 0x00000000
[  663.228174] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228175] Call Trace:
[  663.228177]  __schedule+0x2da/0xb00
[  663.228179]  schedule+0x38/0x90
[  663.228181]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228183]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228185]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228188]  call_rwsem_down_write_failed+0x17/0x30
[  663.228190]  down_write+0x5a/0x70
[  663.228192]  ? __generic_file_fsync+0x43/0x90
[  663.228193]  __generic_file_fsync+0x43/0x90
[  663.228195]  ext4_sync_file+0x2b4/0x540
[  663.228196]  vfs_fsync_range+0x46/0xa0
[  663.228198]  dio_complete+0x181/0x1b0
[  663.228199]  dio_aio_complete_work+0x17/0x20
[  663.228200]  process_one_work+0x208/0x6a0
[  663.228202]  ? process_one_work+0x18d/0x6a0
[  663.228204]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228206]  worker_thread+0x49/0x4a0
[  663.228207]  kthread+0x107/0x140
[  663.228208]  ? process_one_work+0x6a0/0x6a0
[  663.228210]  ? kthread_create_on_node+0x40/0x40
[  663.228212]  ? ___slab_alloc+0x4db/0x520
[  663.228214]  ? mempool_alloc_slab+0x10/0x20
[  663.228215]  ret_from_fork+0x2e/0x40
[  663.228217] kworker/10:47   D    0  9339      2 0x00000000
[  663.228219] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228220] Call Trace:
[  663.228222]  __schedule+0x2da/0xb00
[  663.228224]  schedule+0x38/0x90
[  663.228226]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228228]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228230]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228232]  call_rwsem_down_write_failed+0x17/0x30
[  663.228234]  down_write+0x5a/0x70
[  663.228236]  ? __generic_file_fsync+0x43/0x90
[  663.228238]  __generic_file_fsync+0x43/0x90
[  663.228239]  ext4_sync_file+0x2b4/0x540
[  663.228241]  vfs_fsync_range+0x46/0xa0
[  663.228243]  dio_complete+0x181/0x1b0
[  663.228244]  dio_aio_complete_work+0x17/0x20
[  663.228245]  process_one_work+0x208/0x6a0
[  663.228246]  ? process_one_work+0x18d/0x6a0
[  663.228248]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228249]  worker_thread+0x49/0x4a0
[  663.228251]  kthread+0x107/0x140
[  663.228253]  ? process_one_work+0x6a0/0x6a0
[  663.228254]  ? kthread_create_on_node+0x40/0x40
[  663.228256]  ret_from_fork+0x2e/0x40
[  663.228258] kworker/10:48   D    0  9340      2 0x00000000
[  663.228261] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228262] Call Trace:
[  663.228264]  __schedule+0x2da/0xb00
[  663.228266]  schedule+0x38/0x90
[  663.228268]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228270]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228272]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228274]  call_rwsem_down_write_failed+0x17/0x30
[  663.228276]  down_write+0x5a/0x70
[  663.228278]  ? __generic_file_fsync+0x43/0x90
[  663.228280]  __generic_file_fsync+0x43/0x90
[  663.228282]  ext4_sync_file+0x2b4/0x540
[  663.228283]  vfs_fsync_range+0x46/0xa0
[  663.228285]  dio_complete+0x181/0x1b0
[  663.228286]  dio_aio_complete_work+0x17/0x20
[  663.228287]  process_one_work+0x208/0x6a0
[  663.228289]  ? process_one_work+0x18d/0x6a0
[  663.228291]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228292]  worker_thread+0x49/0x4a0
[  663.228294]  kthread+0x107/0x140
[  663.228295]  ? process_one_work+0x6a0/0x6a0
[  663.228297]  ? kthread_create_on_node+0x40/0x40
[  663.228298]  ret_from_fork+0x2e/0x40
[  663.228300] kworker/10:49   D    0  9341      2 0x00000000
[  663.228303] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228304] Call Trace:
[  663.228306]  __schedule+0x2da/0xb00
[  663.228308]  schedule+0x38/0x90
[  663.228310]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228312]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228313]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228316]  call_rwsem_down_write_failed+0x17/0x30
[  663.228318]  down_write+0x5a/0x70
[  663.228319]  ? __generic_file_fsync+0x43/0x90
[  663.228321]  __generic_file_fsync+0x43/0x90
[  663.228323]  ext4_sync_file+0x2b4/0x540
[  663.228324]  vfs_fsync_range+0x46/0xa0
[  663.228326]  dio_complete+0x181/0x1b0
[  663.228327]  dio_aio_complete_work+0x17/0x20
[  663.228328]  process_one_work+0x208/0x6a0
[  663.228329]  ? process_one_work+0x18d/0x6a0
[  663.228331]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228332]  worker_thread+0x49/0x4a0
[  663.228334]  kthread+0x107/0x140
[  663.228335]  ? process_one_work+0x6a0/0x6a0
[  663.228337]  ? kthread_create_on_node+0x40/0x40
[  663.228338]  ret_from_fork+0x2e/0x40
[  663.228340] kworker/10:50   D    0  9342      2 0x00000000
[  663.228342] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228343] Call Trace:
[  663.228345]  __schedule+0x2da/0xb00
[  663.228347]  schedule+0x38/0x90
[  663.228348]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228350]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228352]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228355]  call_rwsem_down_write_failed+0x17/0x30
[  663.228357]  down_write+0x5a/0x70
[  663.228359]  ? __generic_file_fsync+0x43/0x90
[  663.228360]  __generic_file_fsync+0x43/0x90
[  663.228362]  ext4_sync_file+0x2b4/0x540
[  663.228363]  vfs_fsync_range+0x46/0xa0
[  663.228364]  dio_complete+0x181/0x1b0
[  663.228366]  dio_aio_complete_work+0x17/0x20
[  663.228367]  process_one_work+0x208/0x6a0
[  663.228368]  ? process_one_work+0x18d/0x6a0
[  663.228370]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228372]  worker_thread+0x49/0x4a0
[  663.228373]  kthread+0x107/0x140
[  663.228375]  ? process_one_work+0x6a0/0x6a0
[  663.228376]  ? kthread_create_on_node+0x40/0x40
[  663.228378]  ret_from_fork+0x2e/0x40
[  663.228379] kworker/10:51   D    0  9343      2 0x00000000
[  663.228382] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228383] Call Trace:
[  663.228385]  __schedule+0x2da/0xb00
[  663.228387]  schedule+0x38/0x90
[  663.228390]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228392]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228394]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228396]  call_rwsem_down_write_failed+0x17/0x30
[  663.228399]  down_write+0x5a/0x70
[  663.228400]  ? __generic_file_fsync+0x43/0x90
[  663.228402]  __generic_file_fsync+0x43/0x90
[  663.228404]  ext4_sync_file+0x2b4/0x540
[  663.228405]  vfs_fsync_range+0x46/0xa0
[  663.228407]  dio_complete+0x181/0x1b0
[  663.228408]  dio_aio_complete_work+0x17/0x20
[  663.228410]  process_one_work+0x208/0x6a0
[  663.228411]  ? process_one_work+0x18d/0x6a0
[  663.228413]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228414]  worker_thread+0x49/0x4a0
[  663.228416]  kthread+0x107/0x140
[  663.228417]  ? process_one_work+0x6a0/0x6a0
[  663.228419]  ? kthread_create_on_node+0x40/0x40
[  663.228422]  ? kmem_cache_alloc+0x230/0x2c0
[  663.228424]  ? __slab_alloc+0x3e/0x70
[  663.228426]  ? ___slab_alloc+0x4db/0x520
[  663.228427]  ret_from_fork+0x2e/0x40
[  663.228429] kworker/10:52   D    0  9344      2 0x00000000
[  663.228432] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228433] Call Trace:
[  663.228435]  __schedule+0x2da/0xb00
[  663.228436]  schedule+0x38/0x90
[  663.228438]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228440]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228442]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228445]  call_rwsem_down_write_failed+0x17/0x30
[  663.228447]  down_write+0x5a/0x70
[  663.228449]  ? __generic_file_fsync+0x43/0x90
[  663.228450]  __generic_file_fsync+0x43/0x90
[  663.228452]  ext4_sync_file+0x2b4/0x540
[  663.228454]  vfs_fsync_range+0x46/0xa0
[  663.228455]  dio_complete+0x181/0x1b0
[  663.228456]  dio_aio_complete_work+0x17/0x20
[  663.228458]  process_one_work+0x208/0x6a0
[  663.228459]  ? process_one_work+0x18d/0x6a0
[  663.228460]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228462]  worker_thread+0x49/0x4a0
[  663.228464]  kthread+0x107/0x140
[  663.228465]  ? process_one_work+0x6a0/0x6a0
[  663.228466]  ? kthread_create_on_node+0x40/0x40
[  663.228468]  ? ___slab_alloc+0x4db/0x520
[  663.228470]  ? mempool_alloc_slab+0x10/0x20
[  663.228471]  ret_from_fork+0x2e/0x40
[  663.228473] kworker/10:53   D    0  9345      2 0x00000000
[  663.228475] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228476] Call Trace:
[  663.228478]  __schedule+0x2da/0xb00
[  663.228480]  schedule+0x38/0x90
[  663.228482]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228484]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228486]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228488]  ? trace_hardirqs_on+0xd/0x10
[  663.228491]  call_rwsem_down_write_failed+0x17/0x30
[  663.228493]  down_write+0x5a/0x70
[  663.228495]  ? __generic_file_fsync+0x43/0x90
[  663.228497]  __generic_file_fsync+0x43/0x90
[  663.228498]  ext4_sync_file+0x2b4/0x540
[  663.228500]  vfs_fsync_range+0x46/0xa0
[  663.228501]  dio_complete+0x181/0x1b0
[  663.228503]  dio_aio_complete_work+0x17/0x20
[  663.228504]  process_one_work+0x208/0x6a0
[  663.228505]  ? process_one_work+0x18d/0x6a0
[  663.228507]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228508]  worker_thread+0x49/0x4a0
[  663.228510]  kthread+0x107/0x140
[  663.228511]  ? process_one_work+0x6a0/0x6a0
[  663.228513]  ? kthread_create_on_node+0x40/0x40
[  663.228515]  ret_from_fork+0x2e/0x40
[  663.228517] kworker/10:54   D    0  9346      2 0x00000000
[  663.228520] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228521] Call Trace:
[  663.228523]  __schedule+0x2da/0xb00
[  663.228525]  schedule+0x38/0x90
[  663.228527]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228529]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228531]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228533]  ? trace_hardirqs_on+0xd/0x10
[  663.228535]  call_rwsem_down_write_failed+0x17/0x30
[  663.228537]  down_write+0x5a/0x70
[  663.228539]  ? __generic_file_fsync+0x43/0x90
[  663.228540]  __generic_file_fsync+0x43/0x90
[  663.228542]  ext4_sync_file+0x2b4/0x540
[  663.228544]  vfs_fsync_range+0x46/0xa0
[  663.228545]  dio_complete+0x181/0x1b0
[  663.228546]  dio_aio_complete_work+0x17/0x20
[  663.228548]  process_one_work+0x208/0x6a0
[  663.228549]  ? process_one_work+0x18d/0x6a0
[  663.228551]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228552]  worker_thread+0x49/0x4a0
[  663.228554]  kthread+0x107/0x140
[  663.228555]  ? process_one_work+0x6a0/0x6a0
[  663.228557]  ? kthread_create_on_node+0x40/0x40
[  663.228559]  ret_from_fork+0x2e/0x40
[  663.228561] kworker/10:55   D    0  9347      2 0x00000000
[  663.228564] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228565] Call Trace:
[  663.228567]  __schedule+0x2da/0xb00
[  663.228568]  schedule+0x38/0x90
[  663.228570]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228572]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228574]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228576]  call_rwsem_down_write_failed+0x17/0x30
[  663.228578]  down_write+0x5a/0x70
[  663.228580]  ? __generic_file_fsync+0x43/0x90
[  663.228581]  __generic_file_fsync+0x43/0x90
[  663.228583]  ext4_sync_file+0x2b4/0x540
[  663.228584]  vfs_fsync_range+0x46/0xa0
[  663.228586]  dio_complete+0x181/0x1b0
[  663.228587]  dio_aio_complete_work+0x17/0x20
[  663.228589]  process_one_work+0x208/0x6a0
[  663.228590]  ? process_one_work+0x18d/0x6a0
[  663.228592]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228593]  worker_thread+0x49/0x4a0
[  663.228595]  kthread+0x107/0x140
[  663.228597]  ? process_one_work+0x6a0/0x6a0
[  663.228598]  ? kthread_create_on_node+0x40/0x40
[  663.228600]  ret_from_fork+0x2e/0x40
[  663.228602] kworker/10:56   D    0  9348      2 0x00000000
[  663.228604] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228605] Call Trace:
[  663.228607]  __schedule+0x2da/0xb00
[  663.228609]  schedule+0x38/0x90
[  663.228611]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228613]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228615]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228617]  call_rwsem_down_write_failed+0x17/0x30
[  663.228619]  down_write+0x5a/0x70
[  663.228621]  ? __generic_file_fsync+0x43/0x90
[  663.228623]  __generic_file_fsync+0x43/0x90
[  663.228625]  ext4_sync_file+0x2b4/0x540
[  663.228626]  vfs_fsync_range+0x46/0xa0
[  663.228627]  dio_complete+0x181/0x1b0
[  663.228629]  dio_aio_complete_work+0x17/0x20
[  663.228630]  process_one_work+0x208/0x6a0
[  663.228632]  ? process_one_work+0x18d/0x6a0
[  663.228633]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228635]  worker_thread+0x49/0x4a0
[  663.228637]  kthread+0x107/0x140
[  663.228638]  ? process_one_work+0x6a0/0x6a0
[  663.228640]  ? kthread_create_on_node+0x40/0x40
[  663.228641]  ret_from_fork+0x2e/0x40
[  663.228643] kworker/10:57   D    0  9349      2 0x00000000
[  663.228645] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228647] Call Trace:
[  663.228649]  __schedule+0x2da/0xb00
[  663.228651]  schedule+0x38/0x90
[  663.228653]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228655]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228657]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228660]  call_rwsem_down_write_failed+0x17/0x30
[  663.228662]  down_write+0x5a/0x70
[  663.228663]  ? __generic_file_fsync+0x43/0x90
[  663.228665]  __generic_file_fsync+0x43/0x90
[  663.228666]  ext4_sync_file+0x2b4/0x540
[  663.228668]  vfs_fsync_range+0x46/0xa0
[  663.228669]  dio_complete+0x181/0x1b0
[  663.228670]  dio_aio_complete_work+0x17/0x20
[  663.228671]  process_one_work+0x208/0x6a0
[  663.228672]  ? process_one_work+0x18d/0x6a0
[  663.228674]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228675]  worker_thread+0x49/0x4a0
[  663.228677]  kthread+0x107/0x140
[  663.228678]  ? process_one_work+0x6a0/0x6a0
[  663.228680]  ? kthread_create_on_node+0x40/0x40
[  663.228681]  ret_from_fork+0x2e/0x40
[  663.228683] kworker/10:58   D    0  9350      2 0x00000000
[  663.228686] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228687] Call Trace:
[  663.228689]  __schedule+0x2da/0xb00
[  663.228690]  schedule+0x38/0x90
[  663.228692]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228694]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228696]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228698]  call_rwsem_down_write_failed+0x17/0x30
[  663.228701]  down_write+0x5a/0x70
[  663.228702]  ? __generic_file_fsync+0x43/0x90
[  663.228704]  __generic_file_fsync+0x43/0x90
[  663.228706]  ext4_sync_file+0x2b4/0x540
[  663.228707]  vfs_fsync_range+0x46/0xa0
[  663.228709]  dio_complete+0x181/0x1b0
[  663.228710]  dio_aio_complete_work+0x17/0x20
[  663.228711]  process_one_work+0x208/0x6a0
[  663.228712]  ? process_one_work+0x18d/0x6a0
[  663.228714]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228716]  worker_thread+0x49/0x4a0
[  663.228718]  kthread+0x107/0x140
[  663.228719]  ? process_one_work+0x6a0/0x6a0
[  663.228721]  ? kthread_create_on_node+0x40/0x40
[  663.228722]  ret_from_fork+0x2e/0x40
[  663.228724] kworker/10:59   D    0  9351      2 0x00000000
[  663.228727] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228728] Call Trace:
[  663.228730]  __schedule+0x2da/0xb00
[  663.228732]  schedule+0x38/0x90
[  663.228734]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228736]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228738]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228740]  call_rwsem_down_write_failed+0x17/0x30
[  663.228742]  down_write+0x5a/0x70
[  663.228744]  ? __generic_file_fsync+0x43/0x90
[  663.228746]  __generic_file_fsync+0x43/0x90
[  663.228748]  ext4_sync_file+0x2b4/0x540
[  663.228750]  vfs_fsync_range+0x46/0xa0
[  663.228751]  dio_complete+0x181/0x1b0
[  663.228752]  dio_aio_complete_work+0x17/0x20
[  663.228754]  process_one_work+0x208/0x6a0
[  663.228755]  ? process_one_work+0x18d/0x6a0
[  663.228757]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228758]  worker_thread+0x49/0x4a0
[  663.228760]  kthread+0x107/0x140
[  663.228761]  ? process_one_work+0x6a0/0x6a0
[  663.228763]  ? kthread_create_on_node+0x40/0x40
[  663.228764]  ret_from_fork+0x2e/0x40
[  663.228766] kworker/10:60   D    0  9352      2 0x00000000
[  663.228769] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228770] Call Trace:
[  663.228772]  __schedule+0x2da/0xb00
[  663.228774]  schedule+0x38/0x90
[  663.228776]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228778]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228780]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228782]  call_rwsem_down_write_failed+0x17/0x30
[  663.228784]  down_write+0x5a/0x70
[  663.228786]  ? __generic_file_fsync+0x43/0x90
[  663.228788]  __generic_file_fsync+0x43/0x90
[  663.228790]  ext4_sync_file+0x2b4/0x540
[  663.228791]  vfs_fsync_range+0x46/0xa0
[  663.228792]  dio_complete+0x181/0x1b0
[  663.228794]  dio_aio_complete_work+0x17/0x20
[  663.228795]  process_one_work+0x208/0x6a0
[  663.228796]  ? process_one_work+0x18d/0x6a0
[  663.228798]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228799]  worker_thread+0x49/0x4a0
[  663.228801]  kthread+0x107/0x140
[  663.228802]  ? process_one_work+0x6a0/0x6a0
[  663.228804]  ? kthread_create_on_node+0x40/0x40
[  663.228805]  ret_from_fork+0x2e/0x40
[  663.228806] kworker/10:61   D    0  9353      2 0x00000000
[  663.228809] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228811] Call Trace:
[  663.228813]  __schedule+0x2da/0xb00
[  663.228814]  schedule+0x38/0x90
[  663.228816]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228818]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228820]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228823]  call_rwsem_down_write_failed+0x17/0x30
[  663.228825]  down_write+0x5a/0x70
[  663.228826]  ? __generic_file_fsync+0x43/0x90
[  663.228828]  __generic_file_fsync+0x43/0x90
[  663.228830]  ext4_sync_file+0x2b4/0x540
[  663.228831]  vfs_fsync_range+0x46/0xa0
[  663.228832]  dio_complete+0x181/0x1b0
[  663.228834]  dio_aio_complete_work+0x17/0x20
[  663.228835]  process_one_work+0x208/0x6a0
[  663.228836]  ? process_one_work+0x18d/0x6a0
[  663.228838]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228840]  worker_thread+0x49/0x4a0
[  663.228842]  kthread+0x107/0x140
[  663.228843]  ? process_one_work+0x6a0/0x6a0
[  663.228845]  ? kthread_create_on_node+0x40/0x40
[  663.228847]  ret_from_fork+0x2e/0x40
[  663.228848] kworker/10:62   D    0  9354      2 0x00000000
[  663.228851] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228852] Call Trace:
[  663.228854]  __schedule+0x2da/0xb00
[  663.228856]  schedule+0x38/0x90
[  663.228858]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228860]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228862]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228865]  call_rwsem_down_write_failed+0x17/0x30
[  663.228867]  down_write+0x5a/0x70
[  663.228868]  ? __generic_file_fsync+0x43/0x90
[  663.228870]  __generic_file_fsync+0x43/0x90
[  663.228872]  ext4_sync_file+0x2b4/0x540
[  663.228873]  vfs_fsync_range+0x46/0xa0
[  663.228876]  dio_complete+0x181/0x1b0
[  663.228877]  dio_aio_complete_work+0x17/0x20
[  663.228879]  process_one_work+0x208/0x6a0
[  663.228880]  ? process_one_work+0x18d/0x6a0
[  663.228882]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228883]  worker_thread+0x49/0x4a0
[  663.228885]  kthread+0x107/0x140
[  663.228886]  ? process_one_work+0x6a0/0x6a0
[  663.228888]  ? kthread_create_on_node+0x40/0x40
[  663.228889]  ret_from_fork+0x2e/0x40
[  663.228891] kworker/10:63   D    0  9355      2 0x00000000
[  663.228893] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228894] Call Trace:
[  663.228896]  __schedule+0x2da/0xb00
[  663.228898]  schedule+0x38/0x90
[  663.228900]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228902]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228904]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228905]  ? trace_hardirqs_on+0xd/0x10
[  663.228907]  call_rwsem_down_write_failed+0x17/0x30
[  663.228909]  down_write+0x5a/0x70
[  663.228911]  ? __generic_file_fsync+0x43/0x90
[  663.228913]  __generic_file_fsync+0x43/0x90
[  663.228914]  ext4_sync_file+0x2b4/0x540
[  663.228916]  vfs_fsync_range+0x46/0xa0
[  663.228917]  dio_complete+0x181/0x1b0
[  663.228918]  dio_aio_complete_work+0x17/0x20
[  663.228920]  process_one_work+0x208/0x6a0
[  663.228921]  ? process_one_work+0x18d/0x6a0
[  663.228923]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228924]  worker_thread+0x49/0x4a0
[  663.228926]  kthread+0x107/0x140
[  663.228927]  ? process_one_work+0x6a0/0x6a0
[  663.228929]  ? kthread_create_on_node+0x40/0x40
[  663.228931]  ret_from_fork+0x2e/0x40
[  663.228933] kworker/10:64   D    0  9356      2 0x00000000
[  663.228935] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228936] Call Trace:
[  663.228938]  __schedule+0x2da/0xb00
[  663.228940]  schedule+0x38/0x90
[  663.228943]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228945]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228947]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228949]  call_rwsem_down_write_failed+0x17/0x30
[  663.228951]  down_write+0x5a/0x70
[  663.228953]  ? __generic_file_fsync+0x43/0x90
[  663.228955]  __generic_file_fsync+0x43/0x90
[  663.228956]  ext4_sync_file+0x2b4/0x540
[  663.228958]  vfs_fsync_range+0x46/0xa0
[  663.228959]  dio_complete+0x181/0x1b0
[  663.228961]  dio_aio_complete_work+0x17/0x20
[  663.228962]  process_one_work+0x208/0x6a0
[  663.228963]  ? process_one_work+0x18d/0x6a0
[  663.228965]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228967]  worker_thread+0x49/0x4a0
[  663.228969]  kthread+0x107/0x140
[  663.228970]  ? process_one_work+0x6a0/0x6a0
[  663.228972]  ? kthread_create_on_node+0x40/0x40
[  663.228974]  ret_from_fork+0x2e/0x40
[  663.228976] kworker/10:65   D    0  9357      2 0x00000000
[  663.228978] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228979] Call Trace:
[  663.228981]  __schedule+0x2da/0xb00
[  663.228983]  schedule+0x38/0x90
[  663.228985]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228987]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228989]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228992]  call_rwsem_down_write_failed+0x17/0x30
[  663.228994]  down_write+0x5a/0x70
[  663.228995]  ? __generic_file_fsync+0x43/0x90
[  663.228997]  __generic_file_fsync+0x43/0x90
[  663.228998]  ext4_sync_file+0x2b4/0x540
[  663.229000]  vfs_fsync_range+0x46/0xa0
[  663.229001]  dio_complete+0x181/0x1b0
[  663.229002]  dio_aio_complete_work+0x17/0x20
[  663.229003]  process_one_work+0x208/0x6a0
[  663.229004]  ? process_one_work+0x18d/0x6a0
[  663.229007]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229008]  worker_thread+0x49/0x4a0
[  663.229010]  kthread+0x107/0x140
[  663.229011]  ? process_one_work+0x6a0/0x6a0
[  663.229013]  ? kthread_create_on_node+0x40/0x40
[  663.229014]  ret_from_fork+0x2e/0x40
[  663.229016] kworker/10:66   D    0  9358      2 0x00000000
[  663.229018] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229019] Call Trace:
[  663.229021]  __schedule+0x2da/0xb00
[  663.229023]  schedule+0x38/0x90
[  663.229025]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229027]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229029]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229032]  call_rwsem_down_write_failed+0x17/0x30
[  663.229034]  down_write+0x5a/0x70
[  663.229036]  ? __generic_file_fsync+0x43/0x90
[  663.229038]  __generic_file_fsync+0x43/0x90
[  663.229040]  ext4_sync_file+0x2b4/0x540
[  663.229041]  vfs_fsync_range+0x46/0xa0
[  663.229043]  dio_complete+0x181/0x1b0
[  663.229044]  dio_aio_complete_work+0x17/0x20
[  663.229045]  process_one_work+0x208/0x6a0
[  663.229047]  ? process_one_work+0x18d/0x6a0
[  663.229049]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229050]  worker_thread+0x49/0x4a0
[  663.229052]  kthread+0x107/0x140
[  663.229053]  ? process_one_work+0x6a0/0x6a0
[  663.229055]  ? kthread_create_on_node+0x40/0x40
[  663.229057]  ret_from_fork+0x2e/0x40
[  663.229058] kworker/10:67   D    0  9359      2 0x00000000
[  663.229061] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229062] Call Trace:
[  663.229064]  __schedule+0x2da/0xb00
[  663.229066]  schedule+0x38/0x90
[  663.229068]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229070]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229072]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229074]  ? trace_hardirqs_on+0xd/0x10
[  663.229076]  call_rwsem_down_write_failed+0x17/0x30
[  663.229079]  down_write+0x5a/0x70
[  663.229081]  ? __generic_file_fsync+0x43/0x90
[  663.229083]  __generic_file_fsync+0x43/0x90
[  663.229085]  ext4_sync_file+0x2b4/0x540
[  663.229086]  vfs_fsync_range+0x46/0xa0
[  663.229087]  dio_complete+0x181/0x1b0
[  663.229089]  dio_aio_complete_work+0x17/0x20
[  663.229090]  process_one_work+0x208/0x6a0
[  663.229092]  ? process_one_work+0x18d/0x6a0
[  663.229094]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229095]  worker_thread+0x49/0x4a0
[  663.229097]  kthread+0x107/0x140
[  663.229098]  ? process_one_work+0x6a0/0x6a0
[  663.229099]  ? kthread_create_on_node+0x40/0x40
[  663.229101]  ret_from_fork+0x2e/0x40
[  663.229103] kworker/10:68   D    0  9360      2 0x00000000
[  663.229105] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229106] Call Trace:
[  663.229108]  __schedule+0x2da/0xb00
[  663.229110]  schedule+0x38/0x90
[  663.229112]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229113]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229115]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229118]  call_rwsem_down_write_failed+0x17/0x30
[  663.229120]  down_write+0x5a/0x70
[  663.229121]  ? __generic_file_fsync+0x43/0x90
[  663.229123]  __generic_file_fsync+0x43/0x90
[  663.229124]  ext4_sync_file+0x2b4/0x540
[  663.229126]  vfs_fsync_range+0x46/0xa0
[  663.229127]  dio_complete+0x181/0x1b0
[  663.229128]  dio_aio_complete_work+0x17/0x20
[  663.229130]  process_one_work+0x208/0x6a0
[  663.229131]  ? process_one_work+0x18d/0x6a0
[  663.229132]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229134]  worker_thread+0x49/0x4a0
[  663.229136]  kthread+0x107/0x140
[  663.229137]  ? process_one_work+0x6a0/0x6a0
[  663.229138]  ? kthread_create_on_node+0x40/0x40
[  663.229140]  ret_from_fork+0x2e/0x40
[  663.229141] kworker/10:69   D    0  9361      2 0x00000000
[  663.229144] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229145] Call Trace:
[  663.229147]  __schedule+0x2da/0xb00
[  663.229148]  schedule+0x38/0x90
[  663.229150]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229152]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229154]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229156]  ? trace_hardirqs_on+0xd/0x10
[  663.229158]  call_rwsem_down_write_failed+0x17/0x30
[  663.229161]  down_write+0x5a/0x70
[  663.229163]  ? __generic_file_fsync+0x43/0x90
[  663.229164]  __generic_file_fsync+0x43/0x90
[  663.229166]  ext4_sync_file+0x2b4/0x540
[  663.229167]  vfs_fsync_range+0x46/0xa0
[  663.229169]  dio_complete+0x181/0x1b0
[  663.229170]  dio_aio_complete_work+0x17/0x20
[  663.229172]  process_one_work+0x208/0x6a0
[  663.229173]  ? process_one_work+0x18d/0x6a0
[  663.229175]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229176]  worker_thread+0x49/0x4a0
[  663.229178]  kthread+0x107/0x140
[  663.229179]  ? process_one_work+0x6a0/0x6a0
[  663.229181]  ? kthread_create_on_node+0x40/0x40
[  663.229182]  ret_from_fork+0x2e/0x40
[  663.229184] kworker/10:70   D    0  9362      2 0x00000000
[  663.229187] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229188] Call Trace:
[  663.229190]  __schedule+0x2da/0xb00
[  663.229192]  schedule+0x38/0x90
[  663.229194]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229197]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229199]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229200]  ? trace_hardirqs_on+0xd/0x10
[  663.229202]  call_rwsem_down_write_failed+0x17/0x30
[  663.229205]  down_write+0x5a/0x70
[  663.229206]  ? __generic_file_fsync+0x43/0x90
[  663.229208]  __generic_file_fsync+0x43/0x90
[  663.229210]  ext4_sync_file+0x2b4/0x540
[  663.229211]  vfs_fsync_range+0x46/0xa0
[  663.229213]  dio_complete+0x181/0x1b0
[  663.229214]  dio_aio_complete_work+0x17/0x20
[  663.229215]  process_one_work+0x208/0x6a0
[  663.229216]  ? process_one_work+0x18d/0x6a0
[  663.229218]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229219]  worker_thread+0x49/0x4a0
[  663.229221]  kthread+0x107/0x140
[  663.229223]  ? process_one_work+0x6a0/0x6a0
[  663.229224]  ? kthread_create_on_node+0x40/0x40
[  663.229225]  ret_from_fork+0x2e/0x40
[  663.229227] kworker/10:71   D    0  9363      2 0x00000000
[  663.229230] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229231] Call Trace:
[  663.229233]  __schedule+0x2da/0xb00
[  663.229235]  schedule+0x38/0x90
[  663.229237]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229239]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229241]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229243]  ? trace_hardirqs_on+0xd/0x10
[  663.229245]  call_rwsem_down_write_failed+0x17/0x30
[  663.229247]  down_write+0x5a/0x70
[  663.229248]  ? __generic_file_fsync+0x43/0x90
[  663.229250]  __generic_file_fsync+0x43/0x90
[  663.229251]  ext4_sync_file+0x2b4/0x540
[  663.229253]  vfs_fsync_range+0x46/0xa0
[  663.229254]  dio_complete+0x181/0x1b0
[  663.229256]  dio_aio_complete_work+0x17/0x20
[  663.229257]  process_one_work+0x208/0x6a0
[  663.229259]  ? process_one_work+0x18d/0x6a0
[  663.229260]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229262]  worker_thread+0x49/0x4a0
[  663.229264]  kthread+0x107/0x140
[  663.229265]  ? process_one_work+0x6a0/0x6a0
[  663.229266]  ? kthread_create_on_node+0x40/0x40
[  663.229268]  ret_from_fork+0x2e/0x40
[  663.229269] kworker/10:72   D    0  9364      2 0x00000000
[  663.229272] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229273] Call Trace:
[  663.229275]  __schedule+0x2da/0xb00
[  663.229277]  schedule+0x38/0x90
[  663.229279]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229281]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229283]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229285]  ? trace_hardirqs_on+0xd/0x10
[  663.229287]  call_rwsem_down_write_failed+0x17/0x30
[  663.229289]  down_write+0x5a/0x70
[  663.229291]  ? __generic_file_fsync+0x43/0x90
[  663.229292]  __generic_file_fsync+0x43/0x90
[  663.229294]  ext4_sync_file+0x2b4/0x540
[  663.229296]  vfs_fsync_range+0x46/0xa0
[  663.229297]  dio_complete+0x181/0x1b0
[  663.229298]  dio_aio_complete_work+0x17/0x20
[  663.229299]  process_one_work+0x208/0x6a0
[  663.229301]  ? process_one_work+0x18d/0x6a0
[  663.229303]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229304]  worker_thread+0x49/0x4a0
[  663.229306]  kthread+0x107/0x140
[  663.229308]  ? process_one_work+0x6a0/0x6a0
[  663.229310]  ? kthread_create_on_node+0x40/0x40
[  663.229311]  ret_from_fork+0x2e/0x40
[  663.229313] kworker/10:73   D    0  9365      2 0x00000000
[  663.229316] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229317] Call Trace:
[  663.229319]  __schedule+0x2da/0xb00
[  663.229321]  schedule+0x38/0x90
[  663.229323]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229325]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229327]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229328]  ? trace_hardirqs_on+0xd/0x10
[  663.229330]  call_rwsem_down_write_failed+0x17/0x30
[  663.229333]  down_write+0x5a/0x70
[  663.229334]  ? __generic_file_fsync+0x43/0x90
[  663.229336]  __generic_file_fsync+0x43/0x90
[  663.229338]  ext4_sync_file+0x2b4/0x540
[  663.229340]  vfs_fsync_range+0x46/0xa0
[  663.229341]  dio_complete+0x181/0x1b0
[  663.229342]  dio_aio_complete_work+0x17/0x20
[  663.229343]  process_one_work+0x208/0x6a0
[  663.229345]  ? process_one_work+0x18d/0x6a0
[  663.229347]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229348]  worker_thread+0x49/0x4a0
[  663.229350]  kthread+0x107/0x140
[  663.229353]  ? process_one_work+0x6a0/0x6a0
[  663.229354]  ? kthread_create_on_node+0x40/0x40
[  663.229356]  ret_from_fork+0x2e/0x40
[  663.229357] kworker/10:74   D    0  9366      2 0x00000000
[  663.229360] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229361] Call Trace:
[  663.229363]  __schedule+0x2da/0xb00
[  663.229365]  schedule+0x38/0x90
[  663.229366]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229369]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229371]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229373]  call_rwsem_down_write_failed+0x17/0x30
[  663.229375]  down_write+0x5a/0x70
[  663.229377]  ? __generic_file_fsync+0x43/0x90
[  663.229378]  __generic_file_fsync+0x43/0x90
[  663.229380]  ext4_sync_file+0x2b4/0x540
[  663.229381]  vfs_fsync_range+0x46/0xa0
[  663.229383]  dio_complete+0x181/0x1b0
[  663.229384]  dio_aio_complete_work+0x17/0x20
[  663.229385]  process_one_work+0x208/0x6a0
[  663.229387]  ? process_one_work+0x18d/0x6a0
[  663.229388]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229390]  worker_thread+0x49/0x4a0
[  663.229392]  kthread+0x107/0x140
[  663.229393]  ? process_one_work+0x6a0/0x6a0
[  663.229395]  ? kthread_create_on_node+0x40/0x40
[  663.229396]  ret_from_fork+0x2e/0x40
[  663.229398] kworker/10:75   D    0  9367      2 0x00000000
[  663.229401] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229403] Call Trace:
[  663.229405]  __schedule+0x2da/0xb00
[  663.229407]  schedule+0x38/0x90
[  663.229409]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229411]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229413]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229415]  call_rwsem_down_write_failed+0x17/0x30
[  663.229417]  down_write+0x5a/0x70
[  663.229419]  ? __generic_file_fsync+0x43/0x90
[  663.229421]  __generic_file_fsync+0x43/0x90
[  663.229423]  ext4_sync_file+0x2b4/0x540
[  663.229424]  vfs_fsync_range+0x46/0xa0
[  663.229425]  dio_complete+0x181/0x1b0
[  663.229427]  dio_aio_complete_work+0x17/0x20
[  663.229428]  process_one_work+0x208/0x6a0
[  663.229429]  ? process_one_work+0x18d/0x6a0
[  663.229431]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229433]  worker_thread+0x49/0x4a0
[  663.229435]  kthread+0x107/0x140
[  663.229436]  ? process_one_work+0x6a0/0x6a0
[  663.229438]  ? kthread_create_on_node+0x40/0x40
[  663.229439]  ret_from_fork+0x2e/0x40
[  663.229441] kworker/10:76   D    0  9368      2 0x00000000
[  663.229443] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229444] Call Trace:
[  663.229446]  __schedule+0x2da/0xb00
[  663.229448]  schedule+0x38/0x90
[  663.229450]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229453]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229455]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229457]  call_rwsem_down_write_failed+0x17/0x30
[  663.229459]  down_write+0x5a/0x70
[  663.229461]  ? __generic_file_fsync+0x43/0x90
[  663.229462]  __generic_file_fsync+0x43/0x90
[  663.229464]  ext4_sync_file+0x2b4/0x540
[  663.229465]  vfs_fsync_range+0x46/0xa0
[  663.229467]  dio_complete+0x181/0x1b0
[  663.229468]  dio_aio_complete_work+0x17/0x20
[  663.229470]  process_one_work+0x208/0x6a0
[  663.229471]  ? process_one_work+0x18d/0x6a0
[  663.229473]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229474]  worker_thread+0x49/0x4a0
[  663.229476]  kthread+0x107/0x140
[  663.229477]  ? process_one_work+0x6a0/0x6a0
[  663.229479]  ? kthread_create_on_node+0x40/0x40
[  663.229481]  ret_from_fork+0x2e/0x40
[  663.229482] kworker/10:77   D    0  9369      2 0x00000000
[  663.229485] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229486] Call Trace:
[  663.229488]  __schedule+0x2da/0xb00
[  663.229490]  schedule+0x38/0x90
[  663.229491]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229493]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229495]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229498]  call_rwsem_down_write_failed+0x17/0x30
[  663.229499]  down_write+0x5a/0x70
[  663.229502]  ? __generic_file_fsync+0x43/0x90
[  663.229503]  __generic_file_fsync+0x43/0x90
[  663.229505]  ext4_sync_file+0x2b4/0x540
[  663.229506]  vfs_fsync_range+0x46/0xa0
[  663.229507]  dio_complete+0x181/0x1b0
[  663.229509]  dio_aio_complete_work+0x17/0x20
[  663.229510]  process_one_work+0x208/0x6a0
[  663.229511]  ? process_one_work+0x18d/0x6a0
[  663.229513]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229514]  worker_thread+0x49/0x4a0
[  663.229516]  kthread+0x107/0x140
[  663.229517]  ? process_one_work+0x6a0/0x6a0
[  663.229519]  ? kthread_create_on_node+0x40/0x40
[  663.229520]  ret_from_fork+0x2e/0x40
[  663.229522] kworker/10:78   D    0  9370      2 0x00000000
[  663.229524] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229525] Call Trace:
[  663.229527]  __schedule+0x2da/0xb00
[  663.229529]  schedule+0x38/0x90
[  663.229530]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229532]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229535]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229536]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.229539]  call_rwsem_down_write_failed+0x17/0x30
[  663.229541]  down_write+0x5a/0x70
[  663.229543]  ? __generic_file_fsync+0x43/0x90
[  663.229545]  __generic_file_fsync+0x43/0x90
[  663.229546]  ext4_sync_file+0x2b4/0x540
[  663.229548]  vfs_fsync_range+0x46/0xa0
[  663.229549]  dio_complete+0x181/0x1b0
[  663.229551]  dio_aio_complete_work+0x17/0x20
[  663.229552]  process_one_work+0x208/0x6a0
[  663.229553]  ? process_one_work+0x18d/0x6a0
[  663.229555]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229556]  worker_thread+0x49/0x4a0
[  663.229558]  kthread+0x107/0x140
[  663.229560]  ? process_one_work+0x6a0/0x6a0
[  663.229561]  ? kthread_create_on_node+0x40/0x40
[  663.229563]  ret_from_fork+0x2e/0x40
[  663.229565] kworker/10:79   D    0  9371      2 0x00000000
[  663.229568] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229569] Call Trace:
[  663.229571]  __schedule+0x2da/0xb00
[  663.229573]  schedule+0x38/0x90
[  663.229575]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229577]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229579]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229582]  call_rwsem_down_write_failed+0x17/0x30
[  663.229584]  down_write+0x5a/0x70
[  663.229586]  ? __generic_file_fsync+0x43/0x90
[  663.229587]  __generic_file_fsync+0x43/0x90
[  663.229589]  ext4_sync_file+0x2b4/0x540
[  663.229591]  vfs_fsync_range+0x46/0xa0
[  663.229592]  dio_complete+0x181/0x1b0
[  663.229593]  dio_aio_complete_work+0x17/0x20
[  663.229595]  process_one_work+0x208/0x6a0
[  663.229596]  ? process_one_work+0x18d/0x6a0
[  663.229597]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229599]  worker_thread+0x49/0x4a0
[  663.229601]  kthread+0x107/0x140
[  663.229602]  ? process_one_work+0x6a0/0x6a0
[  663.229604]  ? kthread_create_on_node+0x40/0x40
[  663.229605]  ret_from_fork+0x2e/0x40
[  663.229607] kworker/10:80   D    0  9372      2 0x00000000
[  663.229609] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229610] Call Trace:
[  663.229612]  __schedule+0x2da/0xb00
[  663.229614]  schedule+0x38/0x90
[  663.229616]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229618]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229620]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229622]  call_rwsem_down_write_failed+0x17/0x30
[  663.229624]  down_write+0x5a/0x70
[  663.229626]  ? __generic_file_fsync+0x43/0x90
[  663.229628]  __generic_file_fsync+0x43/0x90
[  663.229629]  ext4_sync_file+0x2b4/0x540
[  663.229631]  vfs_fsync_range+0x46/0xa0
[  663.229633]  dio_complete+0x181/0x1b0
[  663.229634]  dio_aio_complete_work+0x17/0x20
[  663.229635]  process_one_work+0x208/0x6a0
[  663.229636]  ? process_one_work+0x18d/0x6a0
[  663.229638]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229639]  worker_thread+0x49/0x4a0
[  663.229641]  kthread+0x107/0x140
[  663.229643]  ? process_one_work+0x6a0/0x6a0
[  663.229644]  ? kthread_create_on_node+0x40/0x40
[  663.229646]  ret_from_fork+0x2e/0x40
[  663.229648] kworker/10:81   D    0  9373      2 0x00000000
[  663.229650] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229651] Call Trace:
[  663.229653]  __schedule+0x2da/0xb00
[  663.229655]  schedule+0x38/0x90
[  663.229657]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229659]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229661]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229664]  call_rwsem_down_write_failed+0x17/0x30
[  663.229666]  down_write+0x5a/0x70
[  663.229669]  ? __generic_file_fsync+0x43/0x90
[  663.229671]  __generic_file_fsync+0x43/0x90
[  663.229672]  ext4_sync_file+0x2b4/0x540
[  663.229674]  vfs_fsync_range+0x46/0xa0
[  663.229675]  dio_complete+0x181/0x1b0
[  663.229677]  dio_aio_complete_work+0x17/0x20
[  663.229678]  process_one_work+0x208/0x6a0
[  663.229679]  ? process_one_work+0x18d/0x6a0
[  663.229681]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229683]  worker_thread+0x49/0x4a0
[  663.229685]  kthread+0x107/0x140
[  663.229686]  ? process_one_work+0x6a0/0x6a0
[  663.229688]  ? kthread_create_on_node+0x40/0x40
[  663.229689]  ret_from_fork+0x2e/0x40
[  663.229691] kworker/10:82   D    0  9374      2 0x00000000
[  663.229693] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229694] Call Trace:
[  663.229697]  __schedule+0x2da/0xb00
[  663.229699]  schedule+0x38/0x90
[  663.229701]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229703]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229705]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229708]  call_rwsem_down_write_failed+0x17/0x30
[  663.229710]  down_write+0x5a/0x70
[  663.229711]  ? __generic_file_fsync+0x43/0x90
[  663.229713]  __generic_file_fsync+0x43/0x90
[  663.229715]  ext4_sync_file+0x2b4/0x540
[  663.229716]  vfs_fsync_range+0x46/0xa0
[  663.229717]  dio_complete+0x181/0x1b0
[  663.229719]  dio_aio_complete_work+0x17/0x20
[  663.229720]  process_one_work+0x208/0x6a0
[  663.229721]  ? process_one_work+0x18d/0x6a0
[  663.229723]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229724]  worker_thread+0x49/0x4a0
[  663.229726]  kthread+0x107/0x140
[  663.229727]  ? process_one_work+0x6a0/0x6a0
[  663.229729]  ? kthread_create_on_node+0x40/0x40
[  663.229731]  ret_from_fork+0x2e/0x40
[  663.229732] kworker/10:83   D    0  9375      2 0x00000000
[  663.229735] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229736] Call Trace:
[  663.229738]  __schedule+0x2da/0xb00
[  663.229739]  schedule+0x38/0x90
[  663.229741]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229743]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229745]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229747]  call_rwsem_down_write_failed+0x17/0x30
[  663.229749]  down_write+0x5a/0x70
[  663.229751]  ? __generic_file_fsync+0x43/0x90
[  663.229752]  __generic_file_fsync+0x43/0x90
[  663.229754]  ext4_sync_file+0x2b4/0x540
[  663.229755]  vfs_fsync_range+0x46/0xa0
[  663.229757]  dio_complete+0x181/0x1b0
[  663.229759]  dio_aio_complete_work+0x17/0x20
[  663.229760]  process_one_work+0x208/0x6a0
[  663.229762]  ? process_one_work+0x18d/0x6a0
[  663.229763]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229765]  worker_thread+0x49/0x4a0
[  663.229767]  kthread+0x107/0x140
[  663.229768]  ? process_one_work+0x6a0/0x6a0
[  663.229770]  ? kthread_create_on_node+0x40/0x40
[  663.229771]  ret_from_fork+0x2e/0x40
[  663.229773] kworker/10:84   D    0  9376      2 0x00000000
[  663.229775] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229777] Call Trace:
[  663.229779]  __schedule+0x2da/0xb00
[  663.229780]  schedule+0x38/0x90
[  663.229783]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229785]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229787]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229789]  call_rwsem_down_write_failed+0x17/0x30
[  663.229791]  down_write+0x5a/0x70
[  663.229793]  ? __generic_file_fsync+0x43/0x90
[  663.229795]  __generic_file_fsync+0x43/0x90
[  663.229797]  ext4_sync_file+0x2b4/0x540
[  663.229798]  vfs_fsync_range+0x46/0xa0
[  663.229800]  dio_complete+0x181/0x1b0
[  663.229801]  dio_aio_complete_work+0x17/0x20
[  663.229803]  process_one_work+0x208/0x6a0
[  663.229804]  ? process_one_work+0x18d/0x6a0
[  663.229806]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229807]  worker_thread+0x49/0x4a0
[  663.229809]  kthread+0x107/0x140
[  663.229810]  ? process_one_work+0x6a0/0x6a0
[  663.229812]  ? kthread_create_on_node+0x40/0x40
[  663.229814]  ret_from_fork+0x2e/0x40
[  663.229815] kworker/10:85   D    0  9377      2 0x00000000
[  663.229818] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229819] Call Trace:
[  663.229821]  __schedule+0x2da/0xb00
[  663.229823]  schedule+0x38/0x90
[  663.229825]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229828]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229830]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229832]  call_rwsem_down_write_failed+0x17/0x30
[  663.229834]  down_write+0x5a/0x70
[  663.229835]  ? __generic_file_fsync+0x43/0x90
[  663.229837]  __generic_file_fsync+0x43/0x90
[  663.229838]  ext4_sync_file+0x2b4/0x540
[  663.229840]  vfs_fsync_range+0x46/0xa0
[  663.229841]  dio_complete+0x181/0x1b0
[  663.229842]  dio_aio_complete_work+0x17/0x20
[  663.229844]  process_one_work+0x208/0x6a0
[  663.229845]  ? process_one_work+0x18d/0x6a0
[  663.229847]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229849]  worker_thread+0x49/0x4a0
[  663.229850]  kthread+0x107/0x140
[  663.229852]  ? process_one_work+0x6a0/0x6a0
[  663.229853]  ? kthread_create_on_node+0x40/0x40
[  663.229854]  ret_from_fork+0x2e/0x40
[  663.229856] kworker/10:86   D    0  9378      2 0x00000000
[  663.229859] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229860] Call Trace:
[  663.229862]  __schedule+0x2da/0xb00
[  663.229864]  schedule+0x38/0x90
[  663.229866]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229868]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229870]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229873]  call_rwsem_down_write_failed+0x17/0x30
[  663.229875]  down_write+0x5a/0x70
[  663.229876]  ? __generic_file_fsync+0x43/0x90
[  663.229878]  __generic_file_fsync+0x43/0x90
[  663.229880]  ext4_sync_file+0x2b4/0x540
[  663.229881]  vfs_fsync_range+0x46/0xa0
[  663.229883]  dio_complete+0x181/0x1b0
[  663.229884]  dio_aio_complete_work+0x17/0x20
[  663.229885]  process_one_work+0x208/0x6a0
[  663.229887]  ? process_one_work+0x18d/0x6a0
[  663.229888]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229890]  worker_thread+0x49/0x4a0
[  663.229893]  kthread+0x107/0x140
[  663.229894]  ? process_one_work+0x6a0/0x6a0
[  663.229896]  ? kthread_create_on_node+0x40/0x40
[  663.229897]  ret_from_fork+0x2e/0x40
[  663.229899] kworker/10:87   D    0  9379      2 0x00000000
[  663.229902] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229903] Call Trace:
[  663.229905]  __schedule+0x2da/0xb00
[  663.229906]  schedule+0x38/0x90
[  663.229908]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229910]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229912]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229915]  call_rwsem_down_write_failed+0x17/0x30
[  663.229917]  down_write+0x5a/0x70
[  663.229919]  ? __generic_file_fsync+0x43/0x90
[  663.229920]  __generic_file_fsync+0x43/0x90
[  663.229922]  ext4_sync_file+0x2b4/0x540
[  663.229924]  vfs_fsync_range+0x46/0xa0
[  663.229926]  dio_complete+0x181/0x1b0
[  663.229927]  dio_aio_complete_work+0x17/0x20
[  663.229928]  process_one_work+0x208/0x6a0
[  663.229929]  ? process_one_work+0x18d/0x6a0
[  663.229931]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229932]  worker_thread+0x49/0x4a0
[  663.229934]  kthread+0x107/0x140
[  663.229936]  ? process_one_work+0x6a0/0x6a0
[  663.229938]  ? kthread_create_on_node+0x40/0x40
[  663.229939]  ret_from_fork+0x2e/0x40
[  663.229941] kworker/10:88   D    0  9380      2 0x00000000
[  663.229943] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229944] Call Trace:
[  663.229946]  __schedule+0x2da/0xb00
[  663.229948]  schedule+0x38/0x90
[  663.229950]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229952]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229954]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229956]  call_rwsem_down_write_failed+0x17/0x30
[  663.229958]  down_write+0x5a/0x70
[  663.229960]  ? __generic_file_fsync+0x43/0x90
[  663.229962]  __generic_file_fsync+0x43/0x90
[  663.229963]  ext4_sync_file+0x2b4/0x540
[  663.229965]  vfs_fsync_range+0x46/0xa0
[  663.229966]  dio_complete+0x181/0x1b0
[  663.229967]  dio_aio_complete_work+0x17/0x20
[  663.229968]  process_one_work+0x208/0x6a0
[  663.229970]  ? process_one_work+0x18d/0x6a0
[  663.229971]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229973]  worker_thread+0x49/0x4a0
[  663.229975]  kthread+0x107/0x140
[  663.229976]  ? process_one_work+0x6a0/0x6a0
[  663.229978]  ? kthread_create_on_node+0x40/0x40
[  663.229979]  ret_from_fork+0x2e/0x40
[  663.229981] kworker/10:89   D    0  9381      2 0x00000000
[  663.229983] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229984] Call Trace:
[  663.229986]  __schedule+0x2da/0xb00
[  663.229988]  schedule+0x38/0x90
[  663.229990]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229992]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229994]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229997]  call_rwsem_down_write_failed+0x17/0x30
[  663.229999]  down_write+0x5a/0x70
[  663.230001]  ? __generic_file_fsync+0x43/0x90
[  663.230002]  __generic_file_fsync+0x43/0x90
[  663.230004]  ext4_sync_file+0x2b4/0x540
[  663.230006]  vfs_fsync_range+0x46/0xa0
[  663.230007]  dio_complete+0x181/0x1b0
[  663.230008]  dio_aio_complete_work+0x17/0x20
[  663.230010]  process_one_work+0x208/0x6a0
[  663.230011]  ? process_one_work+0x18d/0x6a0
[  663.230013]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230015]  worker_thread+0x49/0x4a0
[  663.230016]  kthread+0x107/0x140
[  663.230018]  ? process_one_work+0x6a0/0x6a0
[  663.230019]  ? kthread_create_on_node+0x40/0x40
[  663.230021]  ret_from_fork+0x2e/0x40
[  663.230023] kworker/10:90   D    0  9382      2 0x00000000
[  663.230026] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230027] Call Trace:
[  663.230029]  __schedule+0x2da/0xb00
[  663.230031]  schedule+0x38/0x90
[  663.230033]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230035]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230037]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230039]  call_rwsem_down_write_failed+0x17/0x30
[  663.230041]  down_write+0x5a/0x70
[  663.230042]  ? __generic_file_fsync+0x43/0x90
[  663.230044]  __generic_file_fsync+0x43/0x90
[  663.230046]  ext4_sync_file+0x2b4/0x540
[  663.230047]  vfs_fsync_range+0x46/0xa0
[  663.230049]  dio_complete+0x181/0x1b0
[  663.230050]  dio_aio_complete_work+0x17/0x20
[  663.230051]  process_one_work+0x208/0x6a0
[  663.230053]  ? process_one_work+0x18d/0x6a0
[  663.230054]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230056]  worker_thread+0x49/0x4a0
[  663.230058]  kthread+0x107/0x140
[  663.230059]  ? process_one_work+0x6a0/0x6a0
[  663.230060]  ? kthread_create_on_node+0x40/0x40
[  663.230062]  ret_from_fork+0x2e/0x40
[  663.230063] kworker/10:91   D    0  9383      2 0x00000000
[  663.230066] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230067] Call Trace:
[  663.230069]  __schedule+0x2da/0xb00
[  663.230070]  schedule+0x38/0x90
[  663.230072]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230074]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230076]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230079]  call_rwsem_down_write_failed+0x17/0x30
[  663.230081]  down_write+0x5a/0x70
[  663.230083]  ? __generic_file_fsync+0x43/0x90
[  663.230084]  __generic_file_fsync+0x43/0x90
[  663.230086]  ext4_sync_file+0x2b4/0x540
[  663.230087]  vfs_fsync_range+0x46/0xa0
[  663.230089]  dio_complete+0x181/0x1b0
[  663.230090]  dio_aio_complete_work+0x17/0x20
[  663.230092]  process_one_work+0x208/0x6a0
[  663.230093]  ? process_one_work+0x18d/0x6a0
[  663.230095]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230096]  worker_thread+0x49/0x4a0
[  663.230098]  kthread+0x107/0x140
[  663.230099]  ? process_one_work+0x6a0/0x6a0
[  663.230101]  ? kthread_create_on_node+0x40/0x40
[  663.230102]  ret_from_fork+0x2e/0x40
[  663.230104] kworker/10:92   D    0  9384      2 0x00000000
[  663.230107] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230108] Call Trace:
[  663.230110]  __schedule+0x2da/0xb00
[  663.230112]  schedule+0x38/0x90
[  663.230114]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230116]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230118]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230121]  call_rwsem_down_write_failed+0x17/0x30
[  663.230123]  down_write+0x5a/0x70
[  663.230125]  ? __generic_file_fsync+0x43/0x90
[  663.230126]  __generic_file_fsync+0x43/0x90
[  663.230128]  ext4_sync_file+0x2b4/0x540
[  663.230130]  vfs_fsync_range+0x46/0xa0
[  663.230131]  dio_complete+0x181/0x1b0
[  663.230132]  dio_aio_complete_work+0x17/0x20
[  663.230134]  process_one_work+0x208/0x6a0
[  663.230135]  ? process_one_work+0x18d/0x6a0
[  663.230137]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230138]  worker_thread+0x49/0x4a0
[  663.230140]  kthread+0x107/0x140
[  663.230142]  ? process_one_work+0x6a0/0x6a0
[  663.230143]  ? kthread_create_on_node+0x40/0x40
[  663.230145]  ret_from_fork+0x2e/0x40
[  663.230146] kworker/10:93   D    0  9385      2 0x00000000
[  663.230149] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230150] Call Trace:
[  663.230152]  __schedule+0x2da/0xb00
[  663.230153]  schedule+0x38/0x90
[  663.230155]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230157]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230159]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230161]  call_rwsem_down_write_failed+0x17/0x30
[  663.230163]  down_write+0x5a/0x70
[  663.230165]  ? __generic_file_fsync+0x43/0x90
[  663.230166]  __generic_file_fsync+0x43/0x90
[  663.230168]  ext4_sync_file+0x2b4/0x540
[  663.230169]  vfs_fsync_range+0x46/0xa0
[  663.230171]  dio_complete+0x181/0x1b0
[  663.230172]  dio_aio_complete_work+0x17/0x20
[  663.230173]  process_one_work+0x208/0x6a0
[  663.230174]  ? process_one_work+0x18d/0x6a0
[  663.230176]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230177]  worker_thread+0x49/0x4a0
[  663.230179]  kthread+0x107/0x140
[  663.230180]  ? process_one_work+0x6a0/0x6a0
[  663.230182]  ? kthread_create_on_node+0x40/0x40
[  663.230183]  ret_from_fork+0x2e/0x40
[  663.230185] kworker/10:94   D    0  9386      2 0x00000000
[  663.230187] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230188] Call Trace:
[  663.230190]  __schedule+0x2da/0xb00
[  663.230192]  schedule+0x38/0x90
[  663.230194]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230195]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230197]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230199]  ? trace_hardirqs_on+0xd/0x10
[  663.230201]  call_rwsem_down_write_failed+0x17/0x30
[  663.230203]  down_write+0x5a/0x70
[  663.230205]  ? __generic_file_fsync+0x43/0x90
[  663.230206]  __generic_file_fsync+0x43/0x90
[  663.230208]  ext4_sync_file+0x2b4/0x540
[  663.230209]  vfs_fsync_range+0x46/0xa0
[  663.230211]  dio_complete+0x181/0x1b0
[  663.230212]  dio_aio_complete_work+0x17/0x20
[  663.230214]  process_one_work+0x208/0x6a0
[  663.230215]  ? process_one_work+0x18d/0x6a0
[  663.230217]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230218]  worker_thread+0x49/0x4a0
[  663.230220]  kthread+0x107/0x140
[  663.230221]  ? process_one_work+0x6a0/0x6a0
[  663.230223]  ? kthread_create_on_node+0x40/0x40
[  663.230224]  ret_from_fork+0x2e/0x40
[  663.230226] kworker/10:95   D    0  9387      2 0x00000000
[  663.230230] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230231] Call Trace:
[  663.230233]  __schedule+0x2da/0xb00
[  663.230235]  schedule+0x38/0x90
[  663.230237]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230239]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230241]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230243]  call_rwsem_down_write_failed+0x17/0x30
[  663.230246]  down_write+0x5a/0x70
[  663.230247]  ? __generic_file_fsync+0x43/0x90
[  663.230249]  __generic_file_fsync+0x43/0x90
[  663.230251]  ext4_sync_file+0x2b4/0x540
[  663.230252]  vfs_fsync_range+0x46/0xa0
[  663.230254]  dio_complete+0x181/0x1b0
[  663.230255]  dio_aio_complete_work+0x17/0x20
[  663.230256]  process_one_work+0x208/0x6a0
[  663.230258]  ? process_one_work+0x18d/0x6a0
[  663.230259]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230261]  worker_thread+0x49/0x4a0
[  663.230262]  kthread+0x107/0x140
[  663.230264]  ? process_one_work+0x6a0/0x6a0
[  663.230265]  ? kthread_create_on_node+0x40/0x40
[  663.230267]  ret_from_fork+0x2e/0x40
[  663.230268] kworker/10:96   D    0  9388      2 0x00000000
[  663.230271] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230272] Call Trace:
[  663.230274]  __schedule+0x2da/0xb00
[  663.230276]  schedule+0x38/0x90
[  663.230278]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230280]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230282]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230285]  call_rwsem_down_write_failed+0x17/0x30
[  663.230287]  down_write+0x5a/0x70
[  663.230288]  ? __generic_file_fsync+0x43/0x90
[  663.230290]  __generic_file_fsync+0x43/0x90
[  663.230291]  ext4_sync_file+0x2b4/0x540
[  663.230293]  vfs_fsync_range+0x46/0xa0
[  663.230294]  dio_complete+0x181/0x1b0
[  663.230295]  dio_aio_complete_work+0x17/0x20
[  663.230296]  process_one_work+0x208/0x6a0
[  663.230298]  ? process_one_work+0x18d/0x6a0
[  663.230299]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230301]  worker_thread+0x49/0x4a0
[  663.230303]  kthread+0x107/0x140
[  663.230304]  ? process_one_work+0x6a0/0x6a0
[  663.230305]  ? kthread_create_on_node+0x40/0x40
[  663.230307]  ret_from_fork+0x2e/0x40
[  663.230308] kworker/10:97   D    0  9389      2 0x00000000
[  663.230311] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230312] Call Trace:
[  663.230314]  __schedule+0x2da/0xb00
[  663.230316]  schedule+0x38/0x90
[  663.230317]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230319]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230321]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230324]  call_rwsem_down_write_failed+0x17/0x30
[  663.230326]  down_write+0x5a/0x70
[  663.230327]  ? __generic_file_fsync+0x43/0x90
[  663.230329]  __generic_file_fsync+0x43/0x90
[  663.230331]  ext4_sync_file+0x2b4/0x540
[  663.230333]  vfs_fsync_range+0x46/0xa0
[  663.230335]  dio_complete+0x181/0x1b0
[  663.230336]  dio_aio_complete_work+0x17/0x20
[  663.230337]  process_one_work+0x208/0x6a0
[  663.230338]  ? process_one_work+0x18d/0x6a0
[  663.230340]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230342]  worker_thread+0x49/0x4a0
[  663.230344]  kthread+0x107/0x140
[  663.230345]  ? process_one_work+0x6a0/0x6a0
[  663.230347]  ? kthread_create_on_node+0x40/0x40
[  663.230348]  ret_from_fork+0x2e/0x40
[  663.230350] kworker/10:98   D    0  9390      2 0x00000000
[  663.230353] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230354] Call Trace:
[  663.230356]  __schedule+0x2da/0xb00
[  663.230358]  schedule+0x38/0x90
[  663.230360]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230362]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230364]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230367]  call_rwsem_down_write_failed+0x17/0x30
[  663.230369]  down_write+0x5a/0x70
[  663.230371]  ? __generic_file_fsync+0x43/0x90
[  663.230372]  __generic_file_fsync+0x43/0x90
[  663.230374]  ext4_sync_file+0x2b4/0x540
[  663.230376]  vfs_fsync_range+0x46/0xa0
[  663.230377]  dio_complete+0x181/0x1b0
[  663.230378]  dio_aio_complete_work+0x17/0x20
[  663.230380]  process_one_work+0x208/0x6a0
[  663.230381]  ? process_one_work+0x18d/0x6a0
[  663.230383]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230384]  worker_thread+0x49/0x4a0
[  663.230386]  kthread+0x107/0x140
[  663.230387]  ? process_one_work+0x6a0/0x6a0
[  663.230390]  ? kthread_create_on_node+0x40/0x40
[  663.230391]  ret_from_fork+0x2e/0x40
[  663.230393] kworker/10:99   D    0  9391      2 0x00000000
[  663.230396] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230397] Call Trace:
[  663.230399]  __schedule+0x2da/0xb00
[  663.230401]  schedule+0x38/0x90
[  663.230403]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230405]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230407]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230409]  call_rwsem_down_write_failed+0x17/0x30
[  663.230411]  down_write+0x5a/0x70
[  663.230413]  ? __generic_file_fsync+0x43/0x90
[  663.230414]  __generic_file_fsync+0x43/0x90
[  663.230416]  ext4_sync_file+0x2b4/0x540
[  663.230417]  vfs_fsync_range+0x46/0xa0
[  663.230419]  dio_complete+0x181/0x1b0
[  663.230420]  dio_aio_complete_work+0x17/0x20
[  663.230421]  process_one_work+0x208/0x6a0
[  663.230422]  ? process_one_work+0x18d/0x6a0
[  663.230424]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230425]  worker_thread+0x49/0x4a0
[  663.230427]  kthread+0x107/0x140
[  663.230428]  ? process_one_work+0x6a0/0x6a0
[  663.230431]  ? kthread_create_on_node+0x40/0x40
[  663.230432]  ret_from_fork+0x2e/0x40
[  663.230434] kworker/10:100  D    0  9392      2 0x00000000
[  663.230436] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230437] Call Trace:
[  663.230440]  __schedule+0x2da/0xb00
[  663.230442]  schedule+0x38/0x90
[  663.230445]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230447]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230448]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230451]  call_rwsem_down_write_failed+0x17/0x30
[  663.230453]  down_write+0x5a/0x70
[  663.230455]  ? __generic_file_fsync+0x43/0x90
[  663.230456]  __generic_file_fsync+0x43/0x90
[  663.230458]  ext4_sync_file+0x2b4/0x540
[  663.230460]  vfs_fsync_range+0x46/0xa0
[  663.230461]  dio_complete+0x181/0x1b0
[  663.230463]  dio_aio_complete_work+0x17/0x20
[  663.230464]  process_one_work+0x208/0x6a0
[  663.230465]  ? process_one_work+0x18d/0x6a0
[  663.230467]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230469]  worker_thread+0x49/0x4a0
[  663.230471]  kthread+0x107/0x140
[  663.230473]  ? process_one_work+0x6a0/0x6a0
[  663.230474]  ? kthread_create_on_node+0x40/0x40
[  663.230476]  ret_from_fork+0x2e/0x40
[  663.230478] kworker/10:101  D    0  9393      2 0x00000000
[  663.230480] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230481] Call Trace:
[  663.230483]  __schedule+0x2da/0xb00
[  663.230485]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.230487]  schedule+0x38/0x90
[  663.230489]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230491]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230493]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230494]  ? trace_hardirqs_on+0xd/0x10
[  663.230496]  call_rwsem_down_write_failed+0x17/0x30
[  663.230499]  down_write+0x5a/0x70
[  663.230500]  ? __generic_file_fsync+0x43/0x90
[  663.230502]  __generic_file_fsync+0x43/0x90
[  663.230504]  ext4_sync_file+0x2b4/0x540
[  663.230506]  vfs_fsync_range+0x46/0xa0
[  663.230507]  dio_complete+0x181/0x1b0
[  663.230508]  dio_aio_complete_work+0x17/0x20
[  663.230509]  process_one_work+0x208/0x6a0
[  663.230510]  ? process_one_work+0x18d/0x6a0
[  663.230512]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230514]  worker_thread+0x49/0x4a0
[  663.230515]  kthread+0x107/0x140
[  663.230517]  ? process_one_work+0x6a0/0x6a0
[  663.230518]  ? kthread_create_on_node+0x40/0x40
[  663.230519]  ret_from_fork+0x2e/0x40

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-26 23:14                           ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 23:14 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 14:51 -0700, Jens Axboe wrote:
> That is exactly what it means, looks like that one path doesn't handle
> that.  You'd have to exhaust the pool with atomic allocs for this to
> trigger, we don't do that at all in the normal IO path. So good catch,
> must be the dm part that enables this since it does NOWAIT allocations.
>=20
>=20
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index 3136696f4991..c27613de80c5 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -134,7 +134,8 @@ struct request *blk_mq_sched_get_request(struct reque=
st_queue *q,
>  			rq =3D __blk_mq_alloc_request(data, op);
>  	} else {
>  		rq =3D __blk_mq_alloc_request(data, op);
> -		data->hctx->tags->rqs[rq->tag] =3D rq;
> +		if (rq)
> +			data->hctx->tags->rqs[rq->tag] =3D rq;
>  	}
> =20
>  	if (rq) {

Hello Jens,

With these two patches applied the scheduling-while-atomic complaint and
the oops are gone. However, some tasks get stuck. Is the console output
below enough to figure out what is going on or do you want me to bisect
this? I don't think that any requests got stuck since no pending requests
are shown in /sys/block/*/mq/*/{pending,*/rq_list}.

Thanks,

Bart.

[  663.217074] sysrq: SysRq : Show Blocked State
[  663.217111]   task                        PC stack   pid father
[  663.217237] kworker/10:0    D    0    71      2 0x00000000
[  663.217267] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.217289] Call Trace:
[  663.217313]  __schedule+0x2da/0xb00
[  663.217337]  ? bit_wait+0x50/0x50
[  663.217360]  schedule+0x38/0x90
[  663.217383]  schedule_timeout+0x2fe/0x640
[  663.217406]  ? mark_held_locks+0x6f/0xa0
[  663.217430]  ? ktime_get+0x74/0x130
[  663.217452]  ? bit_wait+0x50/0x50
[  663.217473]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.217497]  ? trace_hardirqs_on+0xd/0x10
[  663.217520]  ? ktime_get+0x98/0x130
[  663.217542]  ? __delayacct_blkio_start+0x1a/0x30
[  663.217564]  ? bit_wait+0x50/0x50
[  663.217586]  io_schedule_timeout+0x9f/0x110
[  663.217609]  bit_wait_io+0x16/0x60
[  663.217637]  __wait_on_bit+0x53/0x80
[  663.217659]  ? bit_wait+0x50/0x50
[  663.217680]  out_of_line_wait_on_bit+0x6e/0x80
[  663.217703]  ? prepare_to_wait_event+0x170/0x170
[  663.217727]  sync_mapping_buffers+0x22f/0x390
[  663.217750]  __generic_file_fsync+0x4d/0x90
[  663.217772]  ext4_sync_file+0x2b4/0x540
[  663.217793]  vfs_fsync_range+0x46/0xa0
[  663.217814]  dio_complete+0x181/0x1b0
[  663.217835]  dio_aio_complete_work+0x17/0x20
[  663.217856]  process_one_work+0x208/0x6a0
[  663.217878]  ? process_one_work+0x18d/0x6a0
[  663.217899]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.217919]  worker_thread+0x49/0x4a0
[  663.217941]  kthread+0x107/0x140
[  663.217962]  ? process_one_work+0x6a0/0x6a0
[  663.217982]  ? kthread_create_on_node+0x40/0x40
[  663.218003]  ? acpi_ps_create_op+0x1c0/0x35a
[  663.218025]  ? acpi_ps_alloc_op+0x3f/0x80
[  663.218045]  ? acpi_os_acquire_object+0x28/0x2a
[  663.218068]  ret_from_fork+0x2e/0x40
[  663.218112] kworker/u24:8   D    0   141      2 0x00000000
[  663.218139] Workqueue: writeback wb_workfn (flush-254:0)
[  663.218160] Call Trace:
[  663.218182]  __schedule+0x2da/0xb00
[  663.218209]  schedule+0x38/0x90
[  663.218233]  schedule_timeout+0x2fe/0x640
[  663.218263]  ? ktime_get+0x74/0x130
[  663.218284]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.218307]  ? trace_hardirqs_on+0xd/0x10
[  663.218329]  ? ktime_get+0x98/0x130
[  663.218352]  ? __delayacct_blkio_start+0x1a/0x30
[  663.218448]  io_schedule_timeout+0x9f/0x110
[  663.218475]  blk_mq_get_tag+0x158/0x260
[  663.218499]  ? remove_wait_queue+0x70/0x70
[  663.218525]  __blk_mq_alloc_request+0x16/0xe0
[  663.218548]  blk_mq_sched_get_request+0x279/0x370
[  663.218571]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.218596]  ? generic_make_request+0xca/0x290
[  663.218619]  blk_sq_make_request+0x111/0xc90
[  663.218642]  ? blk_queue_enter+0x2d/0x280
[  663.218665]  ? generic_make_request+0xca/0x290
[  663.218688]  generic_make_request+0xd7/0x290
[  663.218712]  ? _raw_spin_unlock_irqrestore+0x31/0x50
[  663.218737]  submit_bio+0x5f/0x120
[  663.218761]  submit_bh_wbc+0x14d/0x180
[  663.218783]  __block_write_full_page+0x193/0x3f0
[  663.218805]  ? I_BDEV+0x10/0x10
[  663.218826]  ? I_BDEV+0x10/0x10
[  663.218847]  block_write_full_page+0xd0/0x120
[  663.218870]  blkdev_writepage+0x13/0x20
[  663.218892]  __writepage+0x11/0x40
[  663.218914]  write_cache_pages+0x216/0x640
[  663.218935]  ? wb_position_ratio+0x1f0/0x1f0
[  663.218957]  generic_writepages+0x41/0x60
[  663.218979]  blkdev_writepages+0x2a/0x30
[  663.219000]  do_writepages+0x1c/0x30
[  663.219022]  __writeback_single_inode+0x57/0x720
[  663.219042]  ? _raw_spin_unlock+0x22/0x30
[  663.219064]  writeback_sb_inodes+0x2f4/0x6a0
[  663.219086]  __writeback_inodes_wb+0x8d/0xc0
[  663.219107]  wb_writeback+0x33c/0x530
[  663.219127]  ? mark_held_locks+0x6f/0xa0
[  663.219149]  wb_workfn+0x356/0x630
[  663.219170]  process_one_work+0x208/0x6a0
[  663.219191]  ? process_one_work+0x18d/0x6a0
[  663.219212]  worker_thread+0x49/0x4a0
[  663.219233]  kthread+0x107/0x140
[  663.219253]  ? process_one_work+0x6a0/0x6a0
[  663.219275]  ? kthread_create_on_node+0x40/0x40
[  663.219299]  ret_from_fork+0x2e/0x40
[  663.219323] kworker/10:1    D    0   155      2 0x00000000
[  663.219349] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.219371] Call Trace:
[  663.219393]  __schedule+0x2da/0xb00
[  663.219421]  ? bit_wait+0x50/0x50
[  663.219443]  schedule+0x38/0x90
[  663.219465]  schedule_timeout+0x2fe/0x640
[  663.219487]  ? mark_held_locks+0x6f/0xa0
[  663.219509]  ? ktime_get+0x74/0x130
[  663.219531]  ? bit_wait+0x50/0x50
[  663.219553]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.219576]  ? trace_hardirqs_on+0xd/0x10
[  663.219598]  ? ktime_get+0x98/0x130
[  663.219619]  ? __delayacct_blkio_start+0x1a/0x30
[  663.219643]  ? bit_wait+0x50/0x50
[  663.219665]  io_schedule_timeout+0x9f/0x110
[  663.219688]  bit_wait_io+0x16/0x60
[  663.219711]  __wait_on_bit+0x53/0x80
[  663.219734]  ? bit_wait+0x50/0x50
[  663.219756]  out_of_line_wait_on_bit+0x6e/0x80
[  663.219779]  ? prepare_to_wait_event+0x170/0x170
[  663.219803]  sync_mapping_buffers+0x22f/0x390
[  663.219826]  __generic_file_fsync+0x4d/0x90
[  663.219848]  ext4_sync_file+0x2b4/0x540
[  663.219870]  vfs_fsync_range+0x46/0xa0
[  663.219892]  dio_complete+0x181/0x1b0
[  663.219915]  dio_aio_complete_work+0x17/0x20
[  663.219939]  process_one_work+0x208/0x6a0
[  663.219962]  ? process_one_work+0x18d/0x6a0
[  663.219986]  worker_thread+0x49/0x4a0
[  663.220011]  kthread+0x107/0x140
[  663.220038]  ? process_one_work+0x6a0/0x6a0
[  663.220063]  ? kthread_create_on_node+0x40/0x40
[  663.220090]  ret_from_fork+0x2e/0x40
[  663.220121] kworker/4:2     D    0   284      2 0x00000000
[  663.220148] Workqueue: srp_remove srp_remove_work [ib_srp]
[  663.220171] Call Trace:
[  663.220193]  __schedule+0x2da/0xb00
[  663.220214]  schedule+0x38/0x90
[  663.220236]  blk_mq_freeze_queue_wait+0x51/0xa0
[  663.220258]  ? remove_wait_queue+0x70/0x70
[  663.220281]  blk_mq_freeze_queue+0x15/0x20
[  663.220302]  blk_freeze_queue+0x9/0x10
[  663.220324]  blk_cleanup_queue+0xdd/0x290
[  663.220346]  __scsi_remove_device+0x49/0xd0
[  663.220368]  scsi_forget_host+0x5b/0x60
[  663.220390]  scsi_remove_host+0x6c/0x110
[  663.220412]  srp_remove_work+0x8b/0x220 [ib_srp]
[  663.220434]  process_one_work+0x208/0x6a0
[  663.220454]  ? process_one_work+0x18d/0x6a0
[  663.220475]  worker_thread+0x49/0x4a0
[  663.220496]  kthread+0x107/0x140
[  663.220516]  ? process_one_work+0x6a0/0x6a0
[  663.220537]  ? kthread_create_on_node+0x40/0x40
[  663.220560]  ret_from_fork+0x2e/0x40
[  663.220581] kworker/10:2    D    0   285      2 0x00000000
[  663.220605] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.220626] Call Trace:
[  663.220648]  __schedule+0x2da/0xb00
[  663.220669]  ? bit_wait+0x50/0x50
[  663.220690]  schedule+0x38/0x90
[  663.220711]  schedule_timeout+0x2fe/0x640
[  663.220732]  ? mark_held_locks+0x6f/0xa0
[  663.220753]  ? ktime_get+0x74/0x130
[  663.220774]  ? bit_wait+0x50/0x50
[  663.220794]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.220816]  ? trace_hardirqs_on+0xd/0x10
[  663.220837]  ? ktime_get+0x98/0x130
[  663.220858]  ? __delayacct_blkio_start+0x1a/0x30
[  663.220879]  ? bit_wait+0x50/0x50
[  663.220899]  io_schedule_timeout+0x9f/0x110
[  663.220920]  bit_wait_io+0x16/0x60
[  663.220941]  __wait_on_bit+0x53/0x80
[  663.220963]  ? bit_wait+0x50/0x50
[  663.220993]  out_of_line_wait_on_bit+0x6e/0x80
[  663.221015]  ? prepare_to_wait_event+0x170/0x170
[  663.221037]  sync_mapping_buffers+0x22f/0x390
[  663.221059]  __generic_file_fsync+0x4d/0x90
[  663.221081]  ext4_sync_file+0x2b4/0x540
[  663.221103]  vfs_fsync_range+0x46/0xa0
[  663.221124]  dio_complete+0x181/0x1b0
[  663.221146]  dio_aio_complete_work+0x17/0x20
[  663.221170]  process_one_work+0x208/0x6a0
[  663.221191]  ? process_one_work+0x18d/0x6a0
[  663.221214]  worker_thread+0x49/0x4a0
[  663.221237]  kthread+0x107/0x140
[  663.221259]  ? process_one_work+0x6a0/0x6a0
[  663.221281]  ? kthread_create_on_node+0x40/0x40
[  663.221304]  ret_from_fork+0x2e/0x40
[  663.221330] kworker/10:3    D    0   405      2 0x00000000
[  663.221354] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.221379] Call Trace:
[  663.221401]  __schedule+0x2da/0xb00
[  663.221424]  ? bit_wait+0x50/0x50
[  663.221446]  schedule+0x38/0x90
[  663.221470]  schedule_timeout+0x2fe/0x640
[  663.221495]  ? ktime_get+0x74/0x130
[  663.221520]  ? bit_wait+0x50/0x50
[  663.221546]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.221574]  ? trace_hardirqs_on+0xd/0x10
[  663.221600]  ? ktime_get+0x98/0x130
[  663.221622]  ? __delayacct_blkio_start+0x1a/0x30
[  663.221646]  ? bit_wait+0x50/0x50
[  663.221668]  io_schedule_timeout+0x9f/0x110
[  663.221690]  bit_wait_io+0x16/0x60
[  663.221712]  __wait_on_bit+0x53/0x80
[  663.221734]  ? bit_wait+0x50/0x50
[  663.221756]  out_of_line_wait_on_bit+0x6e/0x80
[  663.221778]  ? prepare_to_wait_event+0x170/0x170
[  663.221801]  __sync_dirty_buffer+0xdc/0x130
[  663.221822]  sync_dirty_buffer+0xe/0x10
[  663.221844]  ext4_write_inode+0x121/0x140
[  663.221866]  __writeback_single_inode+0x3ae/0x720
[  663.221887]  ? _raw_spin_unlock+0x22/0x30
[  663.221909]  writeback_single_inode+0xd0/0x190
[  663.221932]  sync_inode_metadata+0x2f/0x40
[  663.221953]  __generic_file_fsync+0x74/0x90
[  663.221975]  ext4_sync_file+0x2b4/0x540
[  663.221995]  vfs_fsync_range+0x46/0xa0
[  663.222016]  dio_complete+0x181/0x1b0
[  663.222036]  dio_aio_complete_work+0x17/0x20
[  663.222057]  process_one_work+0x208/0x6a0
[  663.222078]  ? process_one_work+0x18d/0x6a0
[  663.222099]  worker_thread+0x49/0x4a0
[  663.222121]  kthread+0x107/0x140
[  663.222141]  ? process_one_work+0x6a0/0x6a0
[  663.222161]  ? kthread_create_on_node+0x40/0x40
[  663.222183]  ret_from_fork+0x2e/0x40
[  663.222277] fio             D    0  9265   8846 0x00000000
[  663.222302] Call Trace:
[  663.222325]  __schedule+0x2da/0xb00
[  663.222347]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.222372]  schedule+0x38/0x90
[  663.222396]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.222423]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.222450]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.222479]  ? ___slab_alloc+0x178/0x520
[  663.222503]  call_rwsem_down_write_failed+0x17/0x30
[  663.222528]  down_write+0x5a/0x70
[  663.222553]  ? ext4_file_write_iter+0x45/0x360
[  663.222576]  ext4_file_write_iter+0x45/0x360
[  663.222600]  ? __sb_start_write+0xde/0x200
[  663.222621]  ? aio_write+0x14e/0x160
[  663.222643]  aio_write+0xd1/0x160
[  663.222795]  ? __might_fault+0x3e/0x90
[  663.222823]  do_io_submit+0x37d/0x900
[  663.222848]  ? do_io_submit+0x1ac/0x900
[  663.222871]  SyS_io_submit+0xb/0x10
[  663.222893]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.222915] RIP: 0033:0x7fc269d02787
[  663.222937] RSP: 002b:00007fc249e6f948 EFLAGS: 00000202 ORIG_RAX: 000000=
00000000d1
[  663.222961] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.222984] RDX: 00007fc24004f190 RSI: 0000000000000001 RDI: 00007fc27bb=
fc000
[  663.223006] RBP: 0000000000000038 R08: 0000000000000001 R09: 00007fc2400=
4cfe0
[  663.223029] R10: 00007fc240039000 R11: 0000000000000202 R12: 00007fc24f0=
52000
[  663.223051] R13: 00007fc24004f360 R14: 0000000000000000 R15: 00000000000=
00001
[  663.223074] fio             D    0  9266   8846 0x00000000
[  663.223286] Call Trace:
[  663.223311]  __schedule+0x2da/0xb00
[  663.223333]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.223356]  schedule+0x38/0x90
[  663.223379]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.223402]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.223424]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.223447]  ? ___slab_alloc+0x178/0x520
[  663.223470]  call_rwsem_down_write_failed+0x17/0x30
[  663.223493]  down_write+0x5a/0x70
[  663.223514]  ? ext4_file_write_iter+0x45/0x360
[  663.223536]  ext4_file_write_iter+0x45/0x360
[  663.223558]  ? __sb_start_write+0xde/0x200
[  663.223579]  ? aio_write+0x14e/0x160
[  663.223599]  aio_write+0xd1/0x160
[  663.223621]  ? __might_fault+0x3e/0x90
[  663.223642]  do_io_submit+0x37d/0x900
[  663.223663]  ? do_io_submit+0x1ac/0x900
[  663.223684]  SyS_io_submit+0xb/0x10
[  663.223705]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.223725] RIP: 0033:0x7fc269d02787
[  663.223747] RSP: 002b:00007fc24a670948 EFLAGS: 00000202 ORIG_RAX: 000000=
00000000d1
[  663.223769] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.223790] RDX: 00007fc23804f188 RSI: 0000000000000001 RDI: 00007fc27bc=
08000
[  663.223811] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc2380=
4d2a0
[  663.223833] R10: 00007fc238050000 R11: 0000000000000202 R12: 00000000000=
00020
[  663.223855] R13: 00007fc23804ffe0 R14: 0000000000001000 R15: 00000000000=
00080
[  663.223877] fio             D    0  9267   8846 0x00000000
[  663.224191] Call Trace:
[  663.224215]  __schedule+0x2da/0xb00
[  663.224236]  schedule+0x38/0x90
[  663.224258]  schedule_timeout+0x2fe/0x640
[  663.224280]  ? mark_held_locks+0x6f/0xa0
[  663.224303]  ? ktime_get+0x74/0x130
[  663.224325]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.224346]  ? trace_hardirqs_on+0xd/0x10
[  663.224368]  ? ktime_get+0x98/0x130
[  663.224389]  ? __delayacct_blkio_start+0x1a/0x30
[  663.224411]  io_schedule_timeout+0x9f/0x110
[  663.224433]  blk_mq_get_tag+0x158/0x260
[  663.224454]  ? remove_wait_queue+0x70/0x70
[  663.224476]  __blk_mq_alloc_request+0x16/0xe0
[  663.224521]  blk_mq_sched_get_request+0x279/0x370
[  663.224544]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.224567]  ? generic_make_request+0xca/0x290
[  663.224592]  blk_sq_make_request+0x111/0xc90
[  663.224620]  ? blk_queue_enter+0x2d/0x280
[  663.224643]  ? generic_make_request+0xca/0x290
[  663.224666]  generic_make_request+0xd7/0x290
[  663.224690]  submit_bio+0x5f/0x120
[  663.224711]  ? trace_hardirqs_on+0xd/0x10
[  663.224733]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.224758]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.224781]  ? _raw_spin_unlock+0x22/0x30
[  663.224806]  ? ext4_get_block_trans+0xe0/0xe0
[  663.224829]  __blockdev_direct_IO+0x35/0x40
[  663.224852]  ext4_direct_IO+0x19c/0x7b0
[  663.224874]  generic_file_direct_write+0xa6/0x150
[  663.224897]  __generic_file_write_iter+0xbb/0x1c0
[  663.224920]  ext4_file_write_iter+0x77/0x360
[  663.224946]  ? __sb_start_write+0xde/0x200
[  663.224967]  ? aio_write+0x14e/0x160
[  663.224988]  aio_write+0xd1/0x160
[  663.225009]  ? __might_fault+0x3e/0x90
[  663.225029]  do_io_submit+0x37d/0x900
[  663.225051]  ? do_io_submit+0x1ac/0x900
[  663.225076]  SyS_io_submit+0xb/0x10
[  663.225102]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225128] RIP: 0033:0x7fc269d02787
[  663.225151] RSP: 002b:00007fc24ae71948 EFLAGS: 00000202 ORIG_RAX: 000000=
00000000d1
[  663.225175] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.225205] RDX: 00007fc22804f2c8 RSI: 0000000000000001 RDI: 00007fc27bc=
0e000
[  663.225232] RBP: 00000000000000b0 R08: 0000000000000001 R09: 00007fc2280=
45fa0
[  663.225259] R10: 00007fc228012000 R11: 0000000000000202 R12: 00007fc24f0=
6e110
[  663.225283] R13: 00007fc22804f360 R14: 0000000000000000 R15: 00000000000=
00001
[  663.225309] fio             D    0  9268   8846 0x00000000
[  663.225338] Call Trace:
[  663.225366]  __schedule+0x2da/0xb00
[  663.225392]  schedule+0x38/0x90
[  663.225420]  schedule_timeout+0x2fe/0x640
[  663.225446]  ? mark_held_locks+0x6f/0xa0
[  663.225483]  ? ktime_get+0x74/0x130
[  663.225484]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.225486]  ? trace_hardirqs_on+0xd/0x10
[  663.225487]  ? ktime_get+0x98/0x130
[  663.225489]  ? __delayacct_blkio_start+0x1a/0x30
[  663.225491]  io_schedule_timeout+0x9f/0x110
[  663.225493]  blk_mq_get_tag+0x158/0x260
[  663.225494]  ? remove_wait_queue+0x70/0x70
[  663.225496]  __blk_mq_alloc_request+0x16/0xe0
[  663.225499]  blk_mq_sched_get_request+0x279/0x370
[  663.225500]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.225503]  ? generic_make_request+0xca/0x290
[  663.225505]  blk_sq_make_request+0x111/0xc90
[  663.225507]  ? blk_queue_enter+0x2d/0x280
[  663.225509]  ? generic_make_request+0xca/0x290
[  663.225511]  generic_make_request+0xd7/0x290
[  663.225513]  submit_bio+0x5f/0x120
[  663.225514]  ? trace_hardirqs_on+0xd/0x10
[  663.225516]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.225518]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.225520]  ? _raw_spin_unlock+0x22/0x30
[  663.225522]  ? ext4_get_block_trans+0xe0/0xe0
[  663.225524]  __blockdev_direct_IO+0x35/0x40
[  663.225526]  ext4_direct_IO+0x19c/0x7b0
[  663.225528]  generic_file_direct_write+0xa6/0x150
[  663.225530]  __generic_file_write_iter+0xbb/0x1c0
[  663.225531]  ext4_file_write_iter+0x77/0x360
[  663.225533]  ? __sb_start_write+0xde/0x200
[  663.225534]  ? aio_write+0x14e/0x160
[  663.225536]  aio_write+0xd1/0x160
[  663.225537]  ? __might_fault+0x3e/0x90
[  663.225539]  do_io_submit+0x37d/0x900
[  663.225540]  ? do_io_submit+0x1ac/0x900
[  663.225542]  SyS_io_submit+0xb/0x10
[  663.225543]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225544] RIP: 0033:0x7fc269d02787
[  663.225545] RSP: 002b:00007fc24f04e948 EFLAGS: 00000212 ORIG_RAX: 000000=
00000000d1
[  663.225549] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.225550] RDX: 00007fc22004f1d0 RSI: 0000000000000001 RDI: 00007fc27bc=
00000
[  663.225551] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc2200=
4b8e0
[  663.225552] R10: 00007fc220031000 R11: 0000000000000212 R12: 00000000000=
00020
[  663.225552] R13: 00007fc22004ffe0 R14: 0000000000001000 R15: 00000000000=
00080
[  663.225554] fio             D    0  9269   8846 0x00000000
[  663.225557] Call Trace:
[  663.225559]  __schedule+0x2da/0xb00
[  663.225560]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.225562]  schedule+0x38/0x90
[  663.225564]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.225566]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.225568]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.225570]  ? ___slab_alloc+0x178/0x520
[  663.225573]  call_rwsem_down_write_failed+0x17/0x30
[  663.225575]  down_write+0x5a/0x70
[  663.225576]  ? ext4_file_write_iter+0x45/0x360
[  663.225578]  ext4_file_write_iter+0x45/0x360
[  663.225579]  ? __sb_start_write+0xde/0x200
[  663.225581]  ? aio_write+0x14e/0x160
[  663.225582]  aio_write+0xd1/0x160
[  663.225584]  ? __might_fault+0x3e/0x90
[  663.225585]  do_io_submit+0x37d/0x900
[  663.225586]  ? do_io_submit+0x1ac/0x900
[  663.225588]  SyS_io_submit+0xb/0x10
[  663.225589]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225590] RIP: 0033:0x7fc269d02787
[  663.225591] RSP: 002b:00007fc24e84d948 EFLAGS: 00000202 ORIG_RAX: 000000=
00000000d1
[  663.225593] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.225594] RDX: 00007fc23004f1e8 RSI: 0000000000000001 RDI: 00007fc27bc=
06000
[  663.225595] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc2300=
4b020
[  663.225595] R10: 00007fc23002e000 R11: 0000000000000202 R12: 00000000000=
00020
[  663.225596] R13: 00007fc23004ffe0 R14: 0000000000001000 R15: 00000000000=
00080
[  663.225598] fio             D    0  9270   8846 0x00000000
[  663.225600] Call Trace:
[  663.225602]  __schedule+0x2da/0xb00
[  663.225603]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.225605]  schedule+0x38/0x90
[  663.225607]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.225609]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.225611]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.225614]  ? ___slab_alloc+0x178/0x520
[  663.225616]  call_rwsem_down_write_failed+0x17/0x30
[  663.225618]  down_write+0x5a/0x70
[  663.225620]  ? ext4_file_write_iter+0x45/0x360
[  663.225622]  ext4_file_write_iter+0x45/0x360
[  663.225623]  ? __sb_start_write+0xde/0x200
[  663.225624]  ? aio_write+0x14e/0x160
[  663.225626]  aio_write+0xd1/0x160
[  663.225627]  ? __might_fault+0x3e/0x90
[  663.225629]  do_io_submit+0x37d/0x900
[  663.225630]  ? do_io_submit+0x1ac/0x900
[  663.225632]  SyS_io_submit+0xb/0x10
[  663.225633]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225634] RIP: 0033:0x7fc269d02787
[  663.225635] RSP: 002b:00007fc24e04c948 EFLAGS: 00000202 ORIG_RAX: 000000=
00000000d1
[  663.225636] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.225637] RDX: 00007fc21804f298 RSI: 0000000000000001 RDI: 00007fc27bc=
10000
[  663.225638] RBP: 0000000011bb7409 R08: 0000000000000001 R09: 00007fc2180=
470e0
[  663.225639] R10: 00007fc218018000 R11: 0000000000000202 R12: 0000000068b=
af4ba
[  663.225640] R13: 00000000165c8e46 R14: 000000002adf21b1 R15: 00000000169=
850e2
[  663.225642] fio             D    0  9271   8846 0x00000000
[  663.225644] Call Trace:
[  663.225646]  __schedule+0x2da/0xb00
[  663.225647]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.225649]  schedule+0x38/0x90
[  663.225651]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.225653]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.225656]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.225658]  ? ___slab_alloc+0x178/0x520
[  663.225660]  call_rwsem_down_write_failed+0x17/0x30
[  663.225662]  down_write+0x5a/0x70
[  663.225664]  ? ext4_file_write_iter+0x45/0x360
[  663.225665]  ext4_file_write_iter+0x45/0x360
[  663.225667]  ? __sb_start_write+0xde/0x200
[  663.225668]  ? aio_write+0x14e/0x160
[  663.225669]  aio_write+0xd1/0x160
[  663.225671]  ? __might_fault+0x3e/0x90
[  663.225673]  do_io_submit+0x37d/0x900
[  663.225674]  ? do_io_submit+0x1ac/0x900
[  663.225676]  SyS_io_submit+0xb/0x10
[  663.225677]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225678] RIP: 0033:0x7fc269d02787
[  663.225679] RSP: 002b:00007fc24d84b948 EFLAGS: 00000206 ORIG_RAX: 000000=
00000000d1
[  663.225681] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.225682] RDX: 00007fc21004f210 RSI: 0000000000000001 RDI: 00007fc27bc=
0c000
[  663.225682] RBP: 0000000000000060 R08: 0000000000000001 R09: 00007fc2100=
4a1e0
[  663.225683] R10: 00007fc210029000 R11: 0000000000000206 R12: 00007fc24f0=
a6330
[  663.225684] R13: 00007fc21004f360 R14: 0000000000000000 R15: 00000000000=
00001
[  663.225686] fio             D    0  9272   8846 0x00000000
[  663.225689] Call Trace:
[  663.225691]  __schedule+0x2da/0xb00
[  663.225693]  schedule+0x38/0x90
[  663.225695]  schedule_timeout+0x2fe/0x640
[  663.225696]  ? mark_held_locks+0x6f/0xa0
[  663.225699]  ? ktime_get+0x74/0x130
[  663.225700]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.225702]  ? trace_hardirqs_on+0xd/0x10
[  663.225703]  ? ktime_get+0x98/0x130
[  663.225705]  ? __delayacct_blkio_start+0x1a/0x30
[  663.225707]  io_schedule_timeout+0x9f/0x110
[  663.225708]  blk_mq_get_tag+0x158/0x260
[  663.225710]  ? remove_wait_queue+0x70/0x70
[  663.225712]  __blk_mq_alloc_request+0x16/0xe0
[  663.225714]  blk_mq_sched_get_request+0x279/0x370
[  663.225715]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.225718]  ? generic_make_request+0xca/0x290
[  663.225720]  blk_sq_make_request+0x111/0xc90
[  663.225722]  ? blk_queue_enter+0x2d/0x280
[  663.225724]  ? generic_make_request+0xca/0x290
[  663.225727]  generic_make_request+0xd7/0x290
[  663.225730]  submit_bio+0x5f/0x120
[  663.225731]  ? trace_hardirqs_on+0xd/0x10
[  663.225733]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.225735]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.225737]  ? _raw_spin_unlock+0x22/0x30
[  663.225739]  ? ext4_get_block_trans+0xe0/0xe0
[  663.225741]  __blockdev_direct_IO+0x35/0x40
[  663.225743]  ext4_direct_IO+0x19c/0x7b0
[  663.225745]  generic_file_direct_write+0xa6/0x150
[  663.225747]  __generic_file_write_iter+0xbb/0x1c0
[  663.225749]  ext4_file_write_iter+0x77/0x360
[  663.225750]  ? __sb_start_write+0xde/0x200
[  663.225752]  ? aio_write+0x14e/0x160
[  663.225753]  aio_write+0xd1/0x160
[  663.225755]  ? __might_fault+0x3e/0x90
[  663.225756]  do_io_submit+0x37d/0x900
[  663.225758]  ? do_io_submit+0x1ac/0x900
[  663.225759]  SyS_io_submit+0xb/0x10
[  663.225761]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225762] RIP: 0033:0x7fc269d02787
[  663.225763] RSP: 002b:00007fc24d04a948 EFLAGS: 00000206 ORIG_RAX: 000000=
00000000d1
[  663.225764] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.225765] RDX: 00007fc20804f260 RSI: 0000000000000001 RDI: 00007fc27bb=
fa000
[  663.225766] RBP: 00000000000000b8 R08: 0000000000000001 R09: 00007fc2080=
48520
[  663.225767] R10: 00007fc20801f000 R11: 0000000000000206 R12: 00007fc24f0=
b43b8
[  663.225768] R13: 00007fc20804f360 R14: 0000000000000000 R15: 00000000000=
00001
[  663.225770] fio             D    0  9273   8846 0x00000000
[  663.225772] Call Trace:
[  663.225774]  __schedule+0x2da/0xb00
[  663.225776]  schedule+0x38/0x90
[  663.225778]  schedule_timeout+0x2fe/0x640
[  663.225780]  ? mark_held_locks+0x6f/0xa0
[  663.225782]  ? ktime_get+0x74/0x130
[  663.225783]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.225784]  ? trace_hardirqs_on+0xd/0x10
[  663.225786]  ? ktime_get+0x98/0x130
[  663.225788]  ? __delayacct_blkio_start+0x1a/0x30
[  663.225790]  io_schedule_timeout+0x9f/0x110
[  663.225791]  blk_mq_get_tag+0x158/0x260
[  663.225793]  ? remove_wait_queue+0x70/0x70
[  663.225795]  __blk_mq_alloc_request+0x16/0xe0
[  663.225797]  blk_mq_sched_get_request+0x279/0x370
[  663.225798]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.225800]  ? generic_make_request+0xca/0x290
[  663.225803]  blk_sq_make_request+0x111/0xc90
[  663.225805]  ? blk_queue_enter+0x2d/0x280
[  663.225807]  ? generic_make_request+0xca/0x290
[  663.225809]  generic_make_request+0xd7/0x290
[  663.225811]  submit_bio+0x5f/0x120
[  663.225813]  ? trace_hardirqs_on+0xd/0x10
[  663.225814]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.225816]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.225818]  ? _raw_spin_unlock+0x22/0x30
[  663.225820]  ? ext4_get_block_trans+0xe0/0xe0
[  663.225822]  __blockdev_direct_IO+0x35/0x40
[  663.225824]  ext4_direct_IO+0x19c/0x7b0
[  663.225826]  generic_file_direct_write+0xa6/0x150
[  663.225828]  __generic_file_write_iter+0xbb/0x1c0
[  663.225829]  ext4_file_write_iter+0x77/0x360
[  663.225831]  ? __sb_start_write+0xde/0x200
[  663.225832]  ? aio_write+0x14e/0x160
[  663.225833]  aio_write+0xd1/0x160
[  663.225835]  ? __might_fault+0x3e/0x90
[  663.225837]  do_io_submit+0x37d/0x900
[  663.225838]  ? do_io_submit+0x1ac/0x900
[  663.225839]  SyS_io_submit+0xb/0x10
[  663.225841]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225842] RIP: 0033:0x7fc269d02787
[  663.225842] RSP: 002b:00007fc24c849948 EFLAGS: 00000202 ORIG_RAX: 000000=
00000000d1
[  663.225844] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.225845] RDX: 00007fc1d004f200 RSI: 0000000000000001 RDI: 00007fc27bb=
f2000
[  663.225846] RBP: 0000000000000050 R08: 0000000000000001 R09: 00007fc1d00=
4a7a0
[  663.225847] R10: 00007fc1d002b000 R11: 0000000000000202 R12: 00007fc24f0=
c2440
[  663.225848] R13: 00007fc1d004f360 R14: 0000000000000000 R15: 00000000000=
00001
[  663.225849] fio             D    0  9274   8846 0x00000000
[  663.225851] Call Trace:
[  663.225853]  __schedule+0x2da/0xb00
[  663.225855]  schedule+0x38/0x90
[  663.225857]  schedule_timeout+0x2fe/0x640
[  663.225858]  ? mark_held_locks+0x6f/0xa0
[  663.225860]  ? ktime_get+0x74/0x130
[  663.225861]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.225863]  ? trace_hardirqs_on+0xd/0x10
[  663.225864]  ? ktime_get+0x98/0x130
[  663.225866]  ? __delayacct_blkio_start+0x1a/0x30
[  663.225868]  io_schedule_timeout+0x9f/0x110
[  663.225869]  blk_mq_get_tag+0x158/0x260
[  663.225871]  ? remove_wait_queue+0x70/0x70
[  663.225873]  __blk_mq_alloc_request+0x16/0xe0
[  663.225875]  blk_mq_sched_get_request+0x279/0x370
[  663.225876]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.225878]  ? generic_make_request+0xca/0x290
[  663.225880]  blk_sq_make_request+0x111/0xc90
[  663.225882]  ? blk_queue_enter+0x2d/0x280
[  663.225885]  ? generic_make_request+0xca/0x290
[  663.225887]  generic_make_request+0xd7/0x290
[  663.225889]  submit_bio+0x5f/0x120
[  663.225890]  ? trace_hardirqs_on+0xd/0x10
[  663.225892]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.225894]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.225895]  ? _raw_spin_unlock+0x22/0x30
[  663.225898]  ? ext4_get_block_trans+0xe0/0xe0
[  663.225901]  __blockdev_direct_IO+0x35/0x40
[  663.225903]  ext4_direct_IO+0x19c/0x7b0
[  663.225905]  generic_file_direct_write+0xa6/0x150
[  663.225907]  __generic_file_write_iter+0xbb/0x1c0
[  663.225908]  ext4_file_write_iter+0x77/0x360
[  663.225910]  ? __sb_start_write+0xde/0x200
[  663.225911]  ? aio_write+0x14e/0x160
[  663.225913]  aio_write+0xd1/0x160
[  663.225915]  ? __might_fault+0x3e/0x90
[  663.225916]  do_io_submit+0x37d/0x900
[  663.225918]  ? do_io_submit+0x1ac/0x900
[  663.225920]  SyS_io_submit+0xb/0x10
[  663.225921]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.225922] RIP: 0033:0x7fc269d02787
[  663.225923] RSP: 002b:00007fc24c048948 EFLAGS: 00000206 ORIG_RAX: 000000=
00000000d1
[  663.225925] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.225926] RDX: 00007fc20004f1f0 RSI: 0000000000000001 RDI: 00007fc27bc=
02000
[  663.225927] RBP: 000000001d36c979 R08: 0000000000000001 R09: 00007fc2000=
4ad60
[  663.225927] R10: 00007fc20002d000 R11: 0000000000000206 R12: 00000000d32=
2e5e6
[  663.225928] R13: 000000002b3b7b00 R14: 00000000573a44d8 R15: 0000000001e=
412a3
[  663.225930] fio             D    0  9275   8846 0x00000000
[  663.225932] Call Trace:
[  663.225935]  __schedule+0x2da/0xb00
[  663.225936]  schedule+0x38/0x90
[  663.225939]  schedule_timeout+0x2fe/0x640
[  663.225940]  ? mark_held_locks+0x6f/0xa0
[  663.225942]  ? ktime_get+0x74/0x130
[  663.225943]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.225945]  ? trace_hardirqs_on+0xd/0x10
[  663.225946]  ? ktime_get+0x98/0x130
[  663.225948]  ? __delayacct_blkio_start+0x1a/0x30
[  663.225949]  io_schedule_timeout+0x9f/0x110
[  663.225951]  blk_mq_get_tag+0x158/0x260
[  663.225952]  ? remove_wait_queue+0x70/0x70
[  663.225954]  __blk_mq_alloc_request+0x16/0xe0
[  663.225956]  blk_mq_sched_get_request+0x279/0x370
[  663.225958]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.225960]  ? generic_make_request+0xca/0x290
[  663.225963]  blk_sq_make_request+0x111/0xc90
[  663.225965]  ? blk_queue_enter+0x2d/0x280
[  663.225967]  ? generic_make_request+0xca/0x290
[  663.225969]  generic_make_request+0xd7/0x290
[  663.225971]  submit_bio+0x5f/0x120
[  663.225973]  ? trace_hardirqs_on+0xd/0x10
[  663.225974]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.225976]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.225977]  ? _raw_spin_unlock+0x22/0x30
[  663.225980]  ? ext4_get_block_trans+0xe0/0xe0
[  663.225981]  __blockdev_direct_IO+0x35/0x40
[  663.225983]  ext4_direct_IO+0x19c/0x7b0
[  663.225985]  generic_file_direct_write+0xa6/0x150
[  663.225987]  __generic_file_write_iter+0xbb/0x1c0
[  663.225988]  ext4_file_write_iter+0x77/0x360
[  663.225990]  ? __sb_start_write+0xde/0x200
[  663.225991]  ? aio_write+0x14e/0x160
[  663.225992]  aio_write+0xd1/0x160
[  663.225994]  ? __might_fault+0x3e/0x90
[  663.225995]  do_io_submit+0x37d/0x900
[  663.225997]  ? do_io_submit+0x1ac/0x900
[  663.225998]  SyS_io_submit+0xb/0x10
[  663.226000]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226000] RIP: 0033:0x7fc269d02787
[  663.226001] RSP: 002b:00007fc24b847948 EFLAGS: 00000202 ORIG_RAX: 000000=
00000000d1
[  663.226003] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.226004] RDX: 00007fc1f804f1c0 RSI: 0000000000000001 RDI: 00007fc27bb=
f8000
[  663.226005] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc1f80=
4bea0
[  663.226006] R10: 00007fc1f8033000 R11: 0000000000000202 R12: 00000000000=
00020
[  663.226007] R13: 00007fc1f804ffe0 R14: 0000000000001000 R15: 00000000000=
00080
[  663.226009] fio             D    0  9276   8846 0x00000000
[  663.226011] Call Trace:
[  663.226013]  __schedule+0x2da/0xb00
[  663.226014]  schedule+0x38/0x90
[  663.226016]  schedule_timeout+0x2fe/0x640
[  663.226018]  ? mark_held_locks+0x6f/0xa0
[  663.226019]  ? ktime_get+0x74/0x130
[  663.226021]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226022]  ? trace_hardirqs_on+0xd/0x10
[  663.226024]  ? ktime_get+0x98/0x130
[  663.226025]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226027]  io_schedule_timeout+0x9f/0x110
[  663.226028]  blk_mq_get_tag+0x158/0x260
[  663.226030]  ? remove_wait_queue+0x70/0x70
[  663.226032]  __blk_mq_alloc_request+0x16/0xe0
[  663.226033]  blk_mq_sched_get_request+0x279/0x370
[  663.226035]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.226038]  ? generic_make_request+0xca/0x290
[  663.226040]  blk_sq_make_request+0x111/0xc90
[  663.226042]  ? blk_queue_enter+0x2d/0x280
[  663.226044]  ? generic_make_request+0xca/0x290
[  663.226046]  generic_make_request+0xd7/0x290
[  663.226048]  submit_bio+0x5f/0x120
[  663.226049]  ? trace_hardirqs_on+0xd/0x10
[  663.226051]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.226053]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.226054]  ? _raw_spin_unlock+0x22/0x30
[  663.226057]  ? ext4_get_block_trans+0xe0/0xe0
[  663.226058]  __blockdev_direct_IO+0x35/0x40
[  663.226060]  ext4_direct_IO+0x19c/0x7b0
[  663.226062]  generic_file_direct_write+0xa6/0x150
[  663.226064]  __generic_file_write_iter+0xbb/0x1c0
[  663.226066]  ext4_file_write_iter+0x77/0x360
[  663.226068]  ? __sb_start_write+0xde/0x200
[  663.226070]  ? aio_write+0x14e/0x160
[  663.226072]  aio_write+0xd1/0x160
[  663.226074]  ? __might_fault+0x3e/0x90
[  663.226076]  do_io_submit+0x37d/0x900
[  663.226077]  ? do_io_submit+0x1ac/0x900
[  663.226079]  SyS_io_submit+0xb/0x10
[  663.226080]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226081] RIP: 0033:0x7fc269d02787
[  663.226082] RSP: 002b:00007fc248e6d948 EFLAGS: 00000202 ORIG_RAX: 000000=
00000000d1
[  663.226084] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.226085] RDX: 00007fc1f004f158 RSI: 0000000000000001 RDI: 00007fc27bb=
fe000
[  663.226086] RBP: 0000000000000815 R08: 0000000000000001 R09: 00007fc1f00=
4e3e0
[  663.226087] R10: 00007fc1f0040000 R11: 0000000000000202 R12: 00000000000=
006d0
[  663.226088] R13: 00007fc1f004e930 R14: 0000000000001000 R15: 00000000000=
00830
[  663.226089] fio             D    0  9277   8846 0x00000000
[  663.226091] Call Trace:
[  663.226094]  __schedule+0x2da/0xb00
[  663.226095]  schedule+0x38/0x90
[  663.226097]  schedule_timeout+0x2fe/0x640
[  663.226099]  ? mark_held_locks+0x6f/0xa0
[  663.226101]  ? ktime_get+0x74/0x130
[  663.226102]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226103]  ? trace_hardirqs_on+0xd/0x10
[  663.226105]  ? ktime_get+0x98/0x130
[  663.226107]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226109]  io_schedule_timeout+0x9f/0x110
[  663.226110]  blk_mq_get_tag+0x158/0x260
[  663.226112]  ? remove_wait_queue+0x70/0x70
[  663.226114]  __blk_mq_alloc_request+0x16/0xe0
[  663.226115]  blk_mq_sched_get_request+0x279/0x370
[  663.226117]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.226119]  ? generic_make_request+0xca/0x290
[  663.226121]  blk_sq_make_request+0x111/0xc90
[  663.226123]  ? blk_queue_enter+0x2d/0x280
[  663.226125]  ? generic_make_request+0xca/0x290
[  663.226127]  generic_make_request+0xd7/0x290
[  663.226130]  submit_bio+0x5f/0x120
[  663.226131]  ? trace_hardirqs_on+0xd/0x10
[  663.226133]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.226135]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.226136]  ? _raw_spin_unlock+0x22/0x30
[  663.226138]  ? ext4_get_block_trans+0xe0/0xe0
[  663.226140]  __blockdev_direct_IO+0x35/0x40
[  663.226142]  ext4_direct_IO+0x19c/0x7b0
[  663.226144]  generic_file_direct_write+0xa6/0x150
[  663.226146]  __generic_file_write_iter+0xbb/0x1c0
[  663.226148]  ext4_file_write_iter+0x77/0x360
[  663.226149]  ? __sb_start_write+0xde/0x200
[  663.226150]  ? aio_write+0x14e/0x160
[  663.226152]  aio_write+0xd1/0x160
[  663.226153]  ? __might_fault+0x3e/0x90
[  663.226155]  do_io_submit+0x37d/0x900
[  663.226156]  ? do_io_submit+0x1ac/0x900
[  663.226157]  SyS_io_submit+0xb/0x10
[  663.226159]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226160] RIP: 0033:0x7fc269d02787
[  663.226160] RSP: 002b:00007fc24866c948 EFLAGS: 00000202 ORIG_RAX: 000000=
00000000d1
[  663.226162] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.226163] RDX: 00007fc1d804f1e8 RSI: 0000000000000001 RDI: 00007fc27bb=
f6000
[  663.226164] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc1d80=
4b020
[  663.226165] R10: 00007fc1d802e000 R11: 0000000000000202 R12: 00000000000=
00020
[  663.226166] R13: 00007fc1d804ffe0 R14: 0000000000001000 R15: 00000000000=
00080
[  663.226167] fio             D    0  9278   8846 0x00000000
[  663.226169] Call Trace:
[  663.226171]  __schedule+0x2da/0xb00
[  663.226172]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.226174]  schedule+0x38/0x90
[  663.226176]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226178]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226181]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226183]  ? ___slab_alloc+0x178/0x520
[  663.226186]  call_rwsem_down_write_failed+0x17/0x30
[  663.226188]  down_write+0x5a/0x70
[  663.226190]  ? ext4_file_write_iter+0x45/0x360
[  663.226191]  ext4_file_write_iter+0x45/0x360
[  663.226193]  ? __sb_start_write+0xde/0x200
[  663.226194]  ? aio_write+0x14e/0x160
[  663.226195]  aio_write+0xd1/0x160
[  663.226197]  ? __might_fault+0x3e/0x90
[  663.226198]  do_io_submit+0x37d/0x900
[  663.226201]  ? do_io_submit+0x1ac/0x900
[  663.226202]  SyS_io_submit+0xb/0x10
[  663.226204]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226205] RIP: 0033:0x7fc269d02787
[  663.226206] RSP: 002b:00007fc247e6b948 EFLAGS: 00000206 ORIG_RAX: 000000=
00000000d1
[  663.226208] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.226209] RDX: 00007fc23c04f288 RSI: 0000000000000001 RDI: 00007fc27bc=
0a000
[  663.226210] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc23c0=
476a0
[  663.226211] R10: 00007fc23c01a000 R11: 0000000000000206 R12: 00000000000=
00020
[  663.226212] R13: 00007fc23c04ffe0 R14: 0000000000001000 R15: 00000000000=
00080
[  663.226214] fio             D    0  9279   8846 0x00000000
[  663.226216] Call Trace:
[  663.226218]  __schedule+0x2da/0xb00
[  663.226220]  schedule+0x38/0x90
[  663.226222]  schedule_timeout+0x2fe/0x640
[  663.226223]  ? mark_held_locks+0x6f/0xa0
[  663.226225]  ? ktime_get+0x74/0x130
[  663.226227]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226228]  ? trace_hardirqs_on+0xd/0x10
[  663.226230]  ? ktime_get+0x98/0x130
[  663.226231]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226233]  io_schedule_timeout+0x9f/0x110
[  663.226235]  blk_mq_get_tag+0x158/0x260
[  663.226237]  ? remove_wait_queue+0x70/0x70
[  663.226239]  __blk_mq_alloc_request+0x16/0xe0
[  663.226241]  blk_mq_sched_get_request+0x279/0x370
[  663.226243]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.226245]  ? generic_make_request+0xca/0x290
[  663.226247]  blk_sq_make_request+0x111/0xc90
[  663.226249]  ? blk_queue_enter+0x2d/0x280
[  663.226251]  ? generic_make_request+0xca/0x290
[  663.226253]  generic_make_request+0xd7/0x290
[  663.226255]  submit_bio+0x5f/0x120
[  663.226257]  ? trace_hardirqs_on+0xd/0x10
[  663.226258]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.226261]  ? ext4_get_block_trans+0xe0/0xe0
[  663.226263]  __blockdev_direct_IO+0x35/0x40
[  663.226265]  ext4_direct_IO+0x19c/0x7b0
[  663.226267]  generic_file_direct_write+0xa6/0x150
[  663.226269]  __generic_file_write_iter+0xbb/0x1c0
[  663.226270]  ext4_file_write_iter+0x77/0x360
[  663.226272]  ? __sb_start_write+0xde/0x200
[  663.226274]  ? aio_write+0x14e/0x160
[  663.226275]  aio_write+0xd1/0x160
[  663.226276]  ? __might_fault+0x3e/0x90
[  663.226278]  do_io_submit+0x37d/0x900
[  663.226279]  ? do_io_submit+0x1ac/0x900
[  663.226281]  SyS_io_submit+0xb/0x10
[  663.226282]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226283] RIP: 0033:0x7fc269d02787
[  663.226284] RSP: 002b:00007fc24766a948 EFLAGS: 00000202 ORIG_RAX: 000000=
00000000d1
[  663.226285] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.226286] RDX: 00007fc1e804f248 RSI: 0000000000000001 RDI: 00007fc27bb=
f4000
[  663.226287] RBP: 0000000000000088 R08: 0000000000000001 R09: 00007fc1e80=
48da0
[  663.226288] R10: 00007fc1e8022000 R11: 0000000000000202 R12: 00007fc24f1=
16770
[  663.226289] R13: 00007fc1e804f360 R14: 0000000000000000 R15: 00000000000=
00001
[  663.226291] fio             D    0  9280   8846 0x00000000
[  663.226293] Call Trace:
[  663.226295]  __schedule+0x2da/0xb00
[  663.226297]  schedule+0x38/0x90
[  663.226298]  schedule_timeout+0x2fe/0x640
[  663.226300]  ? mark_held_locks+0x6f/0xa0
[  663.226302]  ? ktime_get+0x74/0x130
[  663.226303]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226305]  ? trace_hardirqs_on+0xd/0x10
[  663.226307]  ? ktime_get+0x98/0x130
[  663.226309]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226310]  io_schedule_timeout+0x9f/0x110
[  663.226312]  blk_mq_get_tag+0x158/0x260
[  663.226313]  ? remove_wait_queue+0x70/0x70
[  663.226315]  __blk_mq_alloc_request+0x16/0xe0
[  663.226317]  blk_mq_sched_get_request+0x279/0x370
[  663.226319]  ? blk_mq_sched_bypass_insert+0x70/0x70
[  663.226321]  ? generic_make_request+0xca/0x290
[  663.226323]  blk_sq_make_request+0x111/0xc90
[  663.226325]  ? blk_queue_enter+0x2d/0x280
[  663.226327]  ? generic_make_request+0xca/0x290
[  663.226329]  generic_make_request+0xd7/0x290
[  663.226331]  submit_bio+0x5f/0x120
[  663.226333]  ? trace_hardirqs_on+0xd/0x10
[  663.226334]  do_blockdev_direct_IO+0x280f/0x31f0
[  663.226337]  ? debug_lockdep_rcu_enabled+0x1d/0x20
[  663.226338]  ? _raw_spin_unlock+0x22/0x30
[  663.226341]  ? ext4_get_block_trans+0xe0/0xe0
[  663.226343]  __blockdev_direct_IO+0x35/0x40
[  663.226345]  ext4_direct_IO+0x19c/0x7b0
[  663.226347]  generic_file_direct_write+0xa6/0x150
[  663.226349]  __generic_file_write_iter+0xbb/0x1c0
[  663.226350]  ext4_file_write_iter+0x77/0x360
[  663.226352]  ? __sb_start_write+0xde/0x200
[  663.226354]  ? aio_write+0x14e/0x160
[  663.226355]  aio_write+0xd1/0x160
[  663.226357]  ? __might_fault+0x3e/0x90
[  663.226358]  do_io_submit+0x37d/0x900
[  663.226360]  ? do_io_submit+0x1ac/0x900
[  663.226362]  SyS_io_submit+0xb/0x10
[  663.226363]  entry_SYSCALL_64_fastpath+0x18/0xad
[  663.226364] RIP: 0033:0x7fc269d02787
[  663.226365] RSP: 002b:00007fc246e69948 EFLAGS: 00000202 ORIG_RAX: 000000=
00000000d1
[  663.226367] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fc269d=
02787
[  663.226368] RDX: 00007fc1e004f188 RSI: 0000000000000001 RDI: 00007fc27bc=
04000
[  663.226369] RBP: 0000000000000065 R08: 0000000000000001 R09: 00007fc1e00=
4d2a0
[  663.226370] R10: 00007fc1e0050000 R11: 0000000000000202 R12: 00000000000=
00020
[  663.226371] R13: 00007fc1e004ffe0 R14: 0000000000001000 R15: 00000000000=
00080
[  663.226374] kworker/10:4    D    0  9296      2 0x00000000
[  663.226377] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226378] Call Trace:
[  663.226381]  __schedule+0x2da/0xb00
[  663.226383]  ? bit_wait+0x50/0x50
[  663.226384]  schedule+0x38/0x90
[  663.226386]  schedule_timeout+0x2fe/0x640
[  663.226388]  ? mark_held_locks+0x6f/0xa0
[  663.226390]  ? ktime_get+0x74/0x130
[  663.226392]  ? bit_wait+0x50/0x50
[  663.226393]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226395]  ? trace_hardirqs_on+0xd/0x10
[  663.226396]  ? ktime_get+0x98/0x130
[  663.226398]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226400]  ? bit_wait+0x50/0x50
[  663.226401]  io_schedule_timeout+0x9f/0x110
[  663.226404]  bit_wait_io+0x16/0x60
[  663.226406]  __wait_on_bit+0x53/0x80
[  663.226407]  ? bit_wait+0x50/0x50
[  663.226409]  out_of_line_wait_on_bit+0x6e/0x80
[  663.226411]  ? prepare_to_wait_event+0x170/0x170
[  663.226413]  sync_mapping_buffers+0x22f/0x390
[  663.226415]  __generic_file_fsync+0x4d/0x90
[  663.226418]  ext4_sync_file+0x2b4/0x540
[  663.226420]  vfs_fsync_range+0x46/0xa0
[  663.226421]  dio_complete+0x181/0x1b0
[  663.226423]  dio_aio_complete_work+0x17/0x20
[  663.226424]  process_one_work+0x208/0x6a0
[  663.226425]  ? process_one_work+0x18d/0x6a0
[  663.226427]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226429]  worker_thread+0x49/0x4a0
[  663.226431]  kthread+0x107/0x140
[  663.226432]  ? process_one_work+0x6a0/0x6a0
[  663.226434]  ? kthread_create_on_node+0x40/0x40
[  663.226436]  ret_from_fork+0x2e/0x40
[  663.226438] kworker/10:5    D    0  9297      2 0x00000000
[  663.226441] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226442] Call Trace:
[  663.226444]  __schedule+0x2da/0xb00
[  663.226446]  schedule+0x38/0x90
[  663.226448]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226450]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226452]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226454]  call_rwsem_down_write_failed+0x17/0x30
[  663.226457]  down_write+0x5a/0x70
[  663.226458]  ? __generic_file_fsync+0x43/0x90
[  663.226460]  __generic_file_fsync+0x43/0x90
[  663.226462]  ext4_sync_file+0x2b4/0x540
[  663.226463]  vfs_fsync_range+0x46/0xa0
[  663.226465]  dio_complete+0x181/0x1b0
[  663.226466]  dio_aio_complete_work+0x17/0x20
[  663.226468]  process_one_work+0x208/0x6a0
[  663.226470]  ? process_one_work+0x18d/0x6a0
[  663.226472]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226473]  worker_thread+0x49/0x4a0
[  663.226475]  kthread+0x107/0x140
[  663.226477]  ? process_one_work+0x6a0/0x6a0
[  663.226479]  ? kthread_create_on_node+0x40/0x40
[  663.226480]  ret_from_fork+0x2e/0x40
[  663.226482] kworker/10:6    D    0  9298      2 0x00000000
[  663.226485] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226486] Call Trace:
[  663.226488]  __schedule+0x2da/0xb00
[  663.226490]  schedule+0x38/0x90
[  663.226492]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226494]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226496]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226498]  call_rwsem_down_write_failed+0x17/0x30
[  663.226500]  down_write+0x5a/0x70
[  663.226503]  ? __generic_file_fsync+0x43/0x90
[  663.226504]  __generic_file_fsync+0x43/0x90
[  663.226506]  ext4_sync_file+0x2b4/0x540
[  663.226508]  vfs_fsync_range+0x46/0xa0
[  663.226509]  dio_complete+0x181/0x1b0
[  663.226510]  dio_aio_complete_work+0x17/0x20
[  663.226512]  process_one_work+0x208/0x6a0
[  663.226513]  ? process_one_work+0x18d/0x6a0
[  663.226515]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226517]  worker_thread+0x49/0x4a0
[  663.226519]  kthread+0x107/0x140
[  663.226521]  ? process_one_work+0x6a0/0x6a0
[  663.226522]  ? kthread_create_on_node+0x40/0x40
[  663.226524]  ret_from_fork+0x2e/0x40
[  663.226525] kworker/10:7    D    0  9299      2 0x00000000
[  663.226528] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226529] Call Trace:
[  663.226531]  __schedule+0x2da/0xb00
[  663.226533]  schedule+0x38/0x90
[  663.226535]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226537]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226539]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226541]  call_rwsem_down_write_failed+0x17/0x30
[  663.226543]  down_write+0x5a/0x70
[  663.226545]  ? __generic_file_fsync+0x43/0x90
[  663.226546]  __generic_file_fsync+0x43/0x90
[  663.226548]  ext4_sync_file+0x2b4/0x540
[  663.226549]  vfs_fsync_range+0x46/0xa0
[  663.226550]  dio_complete+0x181/0x1b0
[  663.226552]  dio_aio_complete_work+0x17/0x20
[  663.226553]  process_one_work+0x208/0x6a0
[  663.226554]  ? process_one_work+0x18d/0x6a0
[  663.226556]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226557]  worker_thread+0x49/0x4a0
[  663.226559]  kthread+0x107/0x140
[  663.226560]  ? process_one_work+0x6a0/0x6a0
[  663.226562]  ? kthread_create_on_node+0x40/0x40
[  663.226563]  ret_from_fork+0x2e/0x40
[  663.226565] kworker/10:8    D    0  9300      2 0x00000000
[  663.226568] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226569] Call Trace:
[  663.226571]  __schedule+0x2da/0xb00
[  663.226573]  schedule+0x38/0x90
[  663.226575]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226577]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226578]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226581]  call_rwsem_down_write_failed+0x17/0x30
[  663.226583]  down_write+0x5a/0x70
[  663.226584]  ? __generic_file_fsync+0x43/0x90
[  663.226586]  __generic_file_fsync+0x43/0x90
[  663.226588]  ext4_sync_file+0x2b4/0x540
[  663.226589]  vfs_fsync_range+0x46/0xa0
[  663.226591]  dio_complete+0x181/0x1b0
[  663.226592]  dio_aio_complete_work+0x17/0x20
[  663.226594]  process_one_work+0x208/0x6a0
[  663.226595]  ? process_one_work+0x18d/0x6a0
[  663.226597]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226599]  worker_thread+0x49/0x4a0
[  663.226601]  kthread+0x107/0x140
[  663.226602]  ? process_one_work+0x6a0/0x6a0
[  663.226604]  ? kthread_create_on_node+0x40/0x40
[  663.226606]  ret_from_fork+0x2e/0x40
[  663.226607] kworker/10:9    D    0  9301      2 0x00000000
[  663.226610] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226611] Call Trace:
[  663.226613]  __schedule+0x2da/0xb00
[  663.226615]  schedule+0x38/0x90
[  663.226617]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226619]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226621]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226624]  call_rwsem_down_write_failed+0x17/0x30
[  663.226626]  down_write+0x5a/0x70
[  663.226628]  ? __generic_file_fsync+0x43/0x90
[  663.226629]  __generic_file_fsync+0x43/0x90
[  663.226632]  ext4_sync_file+0x2b4/0x540
[  663.226633]  vfs_fsync_range+0x46/0xa0
[  663.226635]  dio_complete+0x181/0x1b0
[  663.226636]  dio_aio_complete_work+0x17/0x20
[  663.226638]  process_one_work+0x208/0x6a0
[  663.226639]  ? process_one_work+0x18d/0x6a0
[  663.226641]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226642]  worker_thread+0x49/0x4a0
[  663.226644]  kthread+0x107/0x140
[  663.226646]  ? process_one_work+0x6a0/0x6a0
[  663.226647]  ? kthread_create_on_node+0x40/0x40
[  663.226649]  ret_from_fork+0x2e/0x40
[  663.226650] kworker/10:10   D    0  9302      2 0x00000000
[  663.226653] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226654] Call Trace:
[  663.226656]  __schedule+0x2da/0xb00
[  663.226658]  schedule+0x38/0x90
[  663.226660]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226662]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226664]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226667]  call_rwsem_down_write_failed+0x17/0x30
[  663.226669]  down_write+0x5a/0x70
[  663.226671]  ? __generic_file_fsync+0x43/0x90
[  663.226673]  __generic_file_fsync+0x43/0x90
[  663.226674]  ext4_sync_file+0x2b4/0x540
[  663.226676]  vfs_fsync_range+0x46/0xa0
[  663.226677]  dio_complete+0x181/0x1b0
[  663.226678]  dio_aio_complete_work+0x17/0x20
[  663.226680]  process_one_work+0x208/0x6a0
[  663.226681]  ? process_one_work+0x18d/0x6a0
[  663.226683]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226684]  worker_thread+0x49/0x4a0
[  663.226686]  kthread+0x107/0x140
[  663.226687]  ? process_one_work+0x6a0/0x6a0
[  663.226688]  ? kthread_create_on_node+0x40/0x40
[  663.226690]  ret_from_fork+0x2e/0x40
[  663.226692] kworker/10:11   D    0  9303      2 0x00000000
[  663.226695] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226696] Call Trace:
[  663.226698]  __schedule+0x2da/0xb00
[  663.226700]  schedule+0x38/0x90
[  663.226702]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226704]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226706]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226708]  call_rwsem_down_write_failed+0x17/0x30
[  663.226710]  down_write+0x5a/0x70
[  663.226712]  ? __generic_file_fsync+0x43/0x90
[  663.226713]  __generic_file_fsync+0x43/0x90
[  663.226715]  ext4_sync_file+0x2b4/0x540
[  663.226717]  vfs_fsync_range+0x46/0xa0
[  663.226718]  dio_complete+0x181/0x1b0
[  663.226719]  dio_aio_complete_work+0x17/0x20
[  663.226721]  process_one_work+0x208/0x6a0
[  663.226722]  ? process_one_work+0x18d/0x6a0
[  663.226724]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226726]  worker_thread+0x49/0x4a0
[  663.226728]  kthread+0x107/0x140
[  663.226729]  ? process_one_work+0x6a0/0x6a0
[  663.226731]  ? kthread_create_on_node+0x40/0x40
[  663.226732]  ret_from_fork+0x2e/0x40
[  663.226734] kworker/10:12   D    0  9304      2 0x00000000
[  663.226737] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226738] Call Trace:
[  663.226740]  __schedule+0x2da/0xb00
[  663.226742]  schedule+0x38/0x90
[  663.226744]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226746]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226748]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226751]  call_rwsem_down_write_failed+0x17/0x30
[  663.226753]  down_write+0x5a/0x70
[  663.226755]  ? __generic_file_fsync+0x43/0x90
[  663.226757]  __generic_file_fsync+0x43/0x90
[  663.226758]  ext4_sync_file+0x2b4/0x540
[  663.226760]  vfs_fsync_range+0x46/0xa0
[  663.226761]  dio_complete+0x181/0x1b0
[  663.226763]  dio_aio_complete_work+0x17/0x20
[  663.226764]  process_one_work+0x208/0x6a0
[  663.226765]  ? process_one_work+0x18d/0x6a0
[  663.226767]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226769]  worker_thread+0x49/0x4a0
[  663.226771]  kthread+0x107/0x140
[  663.226772]  ? process_one_work+0x6a0/0x6a0
[  663.226773]  ? kthread_create_on_node+0x40/0x40
[  663.226775]  ret_from_fork+0x2e/0x40
[  663.226777] kworker/10:13   D    0  9305      2 0x00000000
[  663.226779] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226781] Call Trace:
[  663.226783]  __schedule+0x2da/0xb00
[  663.226785]  ? bit_wait+0x50/0x50
[  663.226787]  schedule+0x38/0x90
[  663.226789]  schedule_timeout+0x2fe/0x640
[  663.226790]  ? mark_held_locks+0x6f/0xa0
[  663.226792]  ? ktime_get+0x74/0x130
[  663.226794]  ? bit_wait+0x50/0x50
[  663.226795]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.226796]  ? trace_hardirqs_on+0xd/0x10
[  663.226798]  ? ktime_get+0x98/0x130
[  663.226800]  ? __delayacct_blkio_start+0x1a/0x30
[  663.226801]  ? bit_wait+0x50/0x50
[  663.226803]  io_schedule_timeout+0x9f/0x110
[  663.226805]  bit_wait_io+0x16/0x60
[  663.226808]  __wait_on_bit+0x53/0x80
[  663.226810]  ? bit_wait+0x50/0x50
[  663.226811]  out_of_line_wait_on_bit+0x6e/0x80
[  663.226813]  ? prepare_to_wait_event+0x170/0x170
[  663.226816]  sync_mapping_buffers+0x22f/0x390
[  663.226818]  __generic_file_fsync+0x4d/0x90
[  663.226820]  ext4_sync_file+0x2b4/0x540
[  663.226822]  vfs_fsync_range+0x46/0xa0
[  663.226823]  dio_complete+0x181/0x1b0
[  663.226824]  dio_aio_complete_work+0x17/0x20
[  663.226826]  process_one_work+0x208/0x6a0
[  663.226827]  ? process_one_work+0x18d/0x6a0
[  663.226829]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226830]  worker_thread+0x49/0x4a0
[  663.226832]  kthread+0x107/0x140
[  663.226834]  ? process_one_work+0x6a0/0x6a0
[  663.226835]  ? kthread_create_on_node+0x40/0x40
[  663.226837]  ret_from_fork+0x2e/0x40
[  663.226838] kworker/10:14   D    0  9306      2 0x00000000
[  663.226841] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226842] Call Trace:
[  663.226844]  __schedule+0x2da/0xb00
[  663.226846]  schedule+0x38/0x90
[  663.226848]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226851]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226853]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226855]  ? trace_hardirqs_on+0xd/0x10
[  663.226857]  call_rwsem_down_write_failed+0x17/0x30
[  663.226859]  down_write+0x5a/0x70
[  663.226861]  ? __generic_file_fsync+0x43/0x90
[  663.226863]  __generic_file_fsync+0x43/0x90
[  663.226864]  ext4_sync_file+0x2b4/0x540
[  663.226866]  vfs_fsync_range+0x46/0xa0
[  663.226867]  dio_complete+0x181/0x1b0
[  663.226869]  dio_aio_complete_work+0x17/0x20
[  663.226870]  process_one_work+0x208/0x6a0
[  663.226872]  ? process_one_work+0x18d/0x6a0
[  663.226874]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226875]  worker_thread+0x49/0x4a0
[  663.226877]  kthread+0x107/0x140
[  663.226878]  ? process_one_work+0x6a0/0x6a0
[  663.226880]  ? kthread_create_on_node+0x40/0x40
[  663.226881]  ret_from_fork+0x2e/0x40
[  663.226883] kworker/10:15   D    0  9307      2 0x00000000
[  663.226885] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226886] Call Trace:
[  663.226888]  __schedule+0x2da/0xb00
[  663.226890]  schedule+0x38/0x90
[  663.226892]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226894]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226896]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226898]  call_rwsem_down_write_failed+0x17/0x30
[  663.226900]  down_write+0x5a/0x70
[  663.226901]  ? __generic_file_fsync+0x43/0x90
[  663.226903]  __generic_file_fsync+0x43/0x90
[  663.226905]  ext4_sync_file+0x2b4/0x540
[  663.226906]  vfs_fsync_range+0x46/0xa0
[  663.226907]  dio_complete+0x181/0x1b0
[  663.226909]  dio_aio_complete_work+0x17/0x20
[  663.226910]  process_one_work+0x208/0x6a0
[  663.226912]  ? process_one_work+0x18d/0x6a0
[  663.226914]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226916]  worker_thread+0x49/0x4a0
[  663.226917]  kthread+0x107/0x140
[  663.226918]  ? process_one_work+0x6a0/0x6a0
[  663.226920]  ? kthread_create_on_node+0x40/0x40
[  663.226921]  ret_from_fork+0x2e/0x40
[  663.226923] kworker/10:16   D    0  9308      2 0x00000000
[  663.226926] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226927] Call Trace:
[  663.226929]  __schedule+0x2da/0xb00
[  663.226931]  schedule+0x38/0x90
[  663.226933]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226935]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226937]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226939]  call_rwsem_down_write_failed+0x17/0x30
[  663.226941]  down_write+0x5a/0x70
[  663.226943]  ? __generic_file_fsync+0x43/0x90
[  663.226945]  __generic_file_fsync+0x43/0x90
[  663.226946]  ext4_sync_file+0x2b4/0x540
[  663.226948]  vfs_fsync_range+0x46/0xa0
[  663.226949]  dio_complete+0x181/0x1b0
[  663.226951]  dio_aio_complete_work+0x17/0x20
[  663.226952]  process_one_work+0x208/0x6a0
[  663.226954]  ? process_one_work+0x18d/0x6a0
[  663.226956]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.226957]  worker_thread+0x49/0x4a0
[  663.226959]  kthread+0x107/0x140
[  663.226960]  ? process_one_work+0x6a0/0x6a0
[  663.226962]  ? kthread_create_on_node+0x40/0x40
[  663.226964]  ret_from_fork+0x2e/0x40
[  663.226965] kworker/10:17   D    0  9309      2 0x00000000
[  663.226968] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.226969] Call Trace:
[  663.226971]  __schedule+0x2da/0xb00
[  663.226973]  schedule+0x38/0x90
[  663.226975]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.226978]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.226980]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.226982]  call_rwsem_down_write_failed+0x17/0x30
[  663.226984]  down_write+0x5a/0x70
[  663.226986]  ? __generic_file_fsync+0x43/0x90
[  663.226988]  __generic_file_fsync+0x43/0x90
[  663.226989]  ext4_sync_file+0x2b4/0x540
[  663.226992]  vfs_fsync_range+0x46/0xa0
[  663.226993]  dio_complete+0x181/0x1b0
[  663.226994]  dio_aio_complete_work+0x17/0x20
[  663.226995]  process_one_work+0x208/0x6a0
[  663.226996]  ? process_one_work+0x18d/0x6a0
[  663.226998]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227000]  worker_thread+0x49/0x4a0
[  663.227001]  kthread+0x107/0x140
[  663.227002]  ? process_one_work+0x6a0/0x6a0
[  663.227004]  ? kthread_create_on_node+0x40/0x40
[  663.227005]  ret_from_fork+0x2e/0x40
[  663.227007] kworker/10:18   D    0  9310      2 0x00000000
[  663.227010] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227011] Call Trace:
[  663.227013]  __schedule+0x2da/0xb00
[  663.227015]  schedule+0x38/0x90
[  663.227017]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227019]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227020]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227023]  call_rwsem_down_write_failed+0x17/0x30
[  663.227025]  down_write+0x5a/0x70
[  663.227027]  ? __generic_file_fsync+0x43/0x90
[  663.227028]  __generic_file_fsync+0x43/0x90
[  663.227031]  ext4_sync_file+0x2b4/0x540
[  663.227032]  vfs_fsync_range+0x46/0xa0
[  663.227034]  dio_complete+0x181/0x1b0
[  663.227035]  dio_aio_complete_work+0x17/0x20
[  663.227036]  process_one_work+0x208/0x6a0
[  663.227037]  ? process_one_work+0x18d/0x6a0
[  663.227039]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227041]  worker_thread+0x49/0x4a0
[  663.227043]  kthread+0x107/0x140
[  663.227044]  ? process_one_work+0x6a0/0x6a0
[  663.227046]  ? kthread_create_on_node+0x40/0x40
[  663.227047]  ret_from_fork+0x2e/0x40
[  663.227049] kworker/10:19   D    0  9311      2 0x00000000
[  663.227052] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227053] Call Trace:
[  663.227055]  __schedule+0x2da/0xb00
[  663.227057]  schedule+0x38/0x90
[  663.227059]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227061]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227063]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227066]  call_rwsem_down_write_failed+0x17/0x30
[  663.227068]  down_write+0x5a/0x70
[  663.227070]  ? __generic_file_fsync+0x43/0x90
[  663.227072]  __generic_file_fsync+0x43/0x90
[  663.227074]  ext4_sync_file+0x2b4/0x540
[  663.227075]  vfs_fsync_range+0x46/0xa0
[  663.227077]  dio_complete+0x181/0x1b0
[  663.227078]  dio_aio_complete_work+0x17/0x20
[  663.227079]  process_one_work+0x208/0x6a0
[  663.227080]  ? process_one_work+0x18d/0x6a0
[  663.227082]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227083]  worker_thread+0x49/0x4a0
[  663.227085]  kthread+0x107/0x140
[  663.227087]  ? process_one_work+0x6a0/0x6a0
[  663.227088]  ? kthread_create_on_node+0x40/0x40
[  663.227090]  ret_from_fork+0x2e/0x40
[  663.227092] kworker/10:20   D    0  9312      2 0x00000000
[  663.227094] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227095] Call Trace:
[  663.227098]  __schedule+0x2da/0xb00
[  663.227099]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.227101]  schedule+0x38/0x90
[  663.227103]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227105]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227106]  ? trace_hardirqs_on+0xd/0x10
[  663.227108]  call_rwsem_down_write_failed+0x17/0x30
[  663.227111]  down_write+0x5a/0x70
[  663.227112]  ? __generic_file_fsync+0x43/0x90
[  663.227114]  __generic_file_fsync+0x43/0x90
[  663.227115]  ext4_sync_file+0x2b4/0x540
[  663.227117]  vfs_fsync_range+0x46/0xa0
[  663.227118]  dio_complete+0x181/0x1b0
[  663.227119]  dio_aio_complete_work+0x17/0x20
[  663.227121]  process_one_work+0x208/0x6a0
[  663.227122]  ? process_one_work+0x18d/0x6a0
[  663.227123]  worker_thread+0x49/0x4a0
[  663.227125]  kthread+0x107/0x140
[  663.227126]  ? process_one_work+0x6a0/0x6a0
[  663.227128]  ? kthread_create_on_node+0x40/0x40
[  663.227129]  ret_from_fork+0x2e/0x40
[  663.227131] kworker/10:21   D    0  9313      2 0x00000000
[  663.227134] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227135] Call Trace:
[  663.227137]  __schedule+0x2da/0xb00
[  663.227139]  schedule+0x38/0x90
[  663.227141]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227143]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227145]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227147]  call_rwsem_down_write_failed+0x17/0x30
[  663.227149]  down_write+0x5a/0x70
[  663.227151]  ? __generic_file_fsync+0x43/0x90
[  663.227153]  __generic_file_fsync+0x43/0x90
[  663.227155]  ext4_sync_file+0x2b4/0x540
[  663.227156]  vfs_fsync_range+0x46/0xa0
[  663.227157]  dio_complete+0x181/0x1b0
[  663.227159]  dio_aio_complete_work+0x17/0x20
[  663.227160]  process_one_work+0x208/0x6a0
[  663.227161]  ? process_one_work+0x18d/0x6a0
[  663.227163]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227165]  worker_thread+0x49/0x4a0
[  663.227167]  kthread+0x107/0x140
[  663.227168]  ? process_one_work+0x6a0/0x6a0
[  663.227170]  ? kthread_create_on_node+0x40/0x40
[  663.227171]  ret_from_fork+0x2e/0x40
[  663.227173] kworker/10:22   D    0  9314      2 0x00000000
[  663.227176] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227177] Call Trace:
[  663.227179]  __schedule+0x2da/0xb00
[  663.227181]  schedule+0x38/0x90
[  663.227183]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227185]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227187]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227190]  call_rwsem_down_write_failed+0x17/0x30
[  663.227192]  down_write+0x5a/0x70
[  663.227194]  ? __generic_file_fsync+0x43/0x90
[  663.227195]  __generic_file_fsync+0x43/0x90
[  663.227197]  ext4_sync_file+0x2b4/0x540
[  663.227199]  vfs_fsync_range+0x46/0xa0
[  663.227200]  dio_complete+0x181/0x1b0
[  663.227202]  dio_aio_complete_work+0x17/0x20
[  663.227203]  process_one_work+0x208/0x6a0
[  663.227204]  ? process_one_work+0x18d/0x6a0
[  663.227206]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227207]  worker_thread+0x49/0x4a0
[  663.227209]  kthread+0x107/0x140
[  663.227210]  ? process_one_work+0x6a0/0x6a0
[  663.227212]  ? kthread_create_on_node+0x40/0x40
[  663.227213]  ret_from_fork+0x2e/0x40
[  663.227215] kworker/10:23   D    0  9315      2 0x00000000
[  663.227218] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227219] Call Trace:
[  663.227221]  __schedule+0x2da/0xb00
[  663.227223]  schedule+0x38/0x90
[  663.227225]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227226]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227228]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227231]  call_rwsem_down_write_failed+0x17/0x30
[  663.227233]  down_write+0x5a/0x70
[  663.227234]  ? __generic_file_fsync+0x43/0x90
[  663.227236]  __generic_file_fsync+0x43/0x90
[  663.227237]  ext4_sync_file+0x2b4/0x540
[  663.227239]  vfs_fsync_range+0x46/0xa0
[  663.227240]  dio_complete+0x181/0x1b0
[  663.227241]  dio_aio_complete_work+0x17/0x20
[  663.227242]  process_one_work+0x208/0x6a0
[  663.227243]  ? process_one_work+0x18d/0x6a0
[  663.227246]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227247]  worker_thread+0x49/0x4a0
[  663.227249]  kthread+0x107/0x140
[  663.227250]  ? process_one_work+0x6a0/0x6a0
[  663.227252]  ? kthread_create_on_node+0x40/0x40
[  663.227253]  ret_from_fork+0x2e/0x40
[  663.227255] kworker/10:24   D    0  9316      2 0x00000000
[  663.227257] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227258] Call Trace:
[  663.227260]  __schedule+0x2da/0xb00
[  663.227262]  schedule+0x38/0x90
[  663.227264]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227265]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227267]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227269]  ? trace_hardirqs_on+0xd/0x10
[  663.227271]  call_rwsem_down_write_failed+0x17/0x30
[  663.227273]  down_write+0x5a/0x70
[  663.227274]  ? __generic_file_fsync+0x43/0x90
[  663.227276]  __generic_file_fsync+0x43/0x90
[  663.227278]  ext4_sync_file+0x2b4/0x540
[  663.227280]  vfs_fsync_range+0x46/0xa0
[  663.227281]  dio_complete+0x181/0x1b0
[  663.227282]  dio_aio_complete_work+0x17/0x20
[  663.227284]  process_one_work+0x208/0x6a0
[  663.227285]  ? process_one_work+0x18d/0x6a0
[  663.227287]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227288]  worker_thread+0x49/0x4a0
[  663.227290]  kthread+0x107/0x140
[  663.227291]  ? process_one_work+0x6a0/0x6a0
[  663.227293]  ? kthread_create_on_node+0x40/0x40
[  663.227295]  ret_from_fork+0x2e/0x40
[  663.227296] kworker/10:25   D    0  9317      2 0x00000000
[  663.227299] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227300] Call Trace:
[  663.227302]  __schedule+0x2da/0xb00
[  663.227304]  schedule+0x38/0x90
[  663.227306]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227309]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227311]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227314]  call_rwsem_down_write_failed+0x17/0x30
[  663.227316]  down_write+0x5a/0x70
[  663.227318]  ? __generic_file_fsync+0x43/0x90
[  663.227320]  __generic_file_fsync+0x43/0x90
[  663.227321]  ext4_sync_file+0x2b4/0x540
[  663.227323]  vfs_fsync_range+0x46/0xa0
[  663.227324]  dio_complete+0x181/0x1b0
[  663.227326]  dio_aio_complete_work+0x17/0x20
[  663.227327]  process_one_work+0x208/0x6a0
[  663.227328]  ? process_one_work+0x18d/0x6a0
[  663.227330]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227332]  worker_thread+0x49/0x4a0
[  663.227334]  kthread+0x107/0x140
[  663.227335]  ? process_one_work+0x6a0/0x6a0
[  663.227337]  ? kthread_create_on_node+0x40/0x40
[  663.227338]  ret_from_fork+0x2e/0x40
[  663.227340] kworker/10:26   D    0  9318      2 0x00000000
[  663.227342] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227343] Call Trace:
[  663.227345]  __schedule+0x2da/0xb00
[  663.227347]  schedule+0x38/0x90
[  663.227349]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227351]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227353]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227356]  call_rwsem_down_write_failed+0x17/0x30
[  663.227358]  down_write+0x5a/0x70
[  663.227359]  ? __generic_file_fsync+0x43/0x90
[  663.227361]  __generic_file_fsync+0x43/0x90
[  663.227363]  ext4_sync_file+0x2b4/0x540
[  663.227365]  vfs_fsync_range+0x46/0xa0
[  663.227366]  dio_complete+0x181/0x1b0
[  663.227367]  dio_aio_complete_work+0x17/0x20
[  663.227368]  process_one_work+0x208/0x6a0
[  663.227369]  ? process_one_work+0x18d/0x6a0
[  663.227371]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227373]  worker_thread+0x49/0x4a0
[  663.227374]  kthread+0x107/0x140
[  663.227376]  ? process_one_work+0x6a0/0x6a0
[  663.227377]  ? kthread_create_on_node+0x40/0x40
[  663.227378]  ret_from_fork+0x2e/0x40
[  663.227380] kworker/10:27   D    0  9319      2 0x00000000
[  663.227383] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227383] Call Trace:
[  663.227385]  __schedule+0x2da/0xb00
[  663.227387]  schedule+0x38/0x90
[  663.227389]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227391]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227393]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227395]  call_rwsem_down_write_failed+0x17/0x30
[  663.227397]  down_write+0x5a/0x70
[  663.227400]  ? __generic_file_fsync+0x43/0x90
[  663.227401]  __generic_file_fsync+0x43/0x90
[  663.227403]  ext4_sync_file+0x2b4/0x540
[  663.227404]  vfs_fsync_range+0x46/0xa0
[  663.227406]  dio_complete+0x181/0x1b0
[  663.227407]  dio_aio_complete_work+0x17/0x20
[  663.227409]  process_one_work+0x208/0x6a0
[  663.227410]  ? process_one_work+0x18d/0x6a0
[  663.227412]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227414]  worker_thread+0x49/0x4a0
[  663.227416]  kthread+0x107/0x140
[  663.227417]  ? process_one_work+0x6a0/0x6a0
[  663.227419]  ? kthread_create_on_node+0x40/0x40
[  663.227420]  ret_from_fork+0x2e/0x40
[  663.227422] kworker/10:28   D    0  9320      2 0x00000000
[  663.227425] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227426] Call Trace:
[  663.227428]  __schedule+0x2da/0xb00
[  663.227430]  schedule+0x38/0x90
[  663.227432]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227435]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227437]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227440]  call_rwsem_down_write_failed+0x17/0x30
[  663.227442]  down_write+0x5a/0x70
[  663.227443]  ? __generic_file_fsync+0x43/0x90
[  663.227445]  __generic_file_fsync+0x43/0x90
[  663.227447]  ext4_sync_file+0x2b4/0x540
[  663.227448]  vfs_fsync_range+0x46/0xa0
[  663.227449]  dio_complete+0x181/0x1b0
[  663.227451]  dio_aio_complete_work+0x17/0x20
[  663.227452]  process_one_work+0x208/0x6a0
[  663.227454]  ? process_one_work+0x18d/0x6a0
[  663.227455]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227457]  worker_thread+0x49/0x4a0
[  663.227459]  kthread+0x107/0x140
[  663.227460]  ? process_one_work+0x6a0/0x6a0
[  663.227462]  ? kthread_create_on_node+0x40/0x40
[  663.227463]  ret_from_fork+0x2e/0x40
[  663.227466] kworker/10:29   D    0  9321      2 0x00000000
[  663.227468] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227469] Call Trace:
[  663.227471]  __schedule+0x2da/0xb00
[  663.227473]  schedule+0x38/0x90
[  663.227475]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227477]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227479]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227481]  call_rwsem_down_write_failed+0x17/0x30
[  663.227483]  down_write+0x5a/0x70
[  663.227485]  ? __generic_file_fsync+0x43/0x90
[  663.227486]  __generic_file_fsync+0x43/0x90
[  663.227488]  ext4_sync_file+0x2b4/0x540
[  663.227489]  vfs_fsync_range+0x46/0xa0
[  663.227490]  dio_complete+0x181/0x1b0
[  663.227492]  dio_aio_complete_work+0x17/0x20
[  663.227493]  process_one_work+0x208/0x6a0
[  663.227494]  ? process_one_work+0x18d/0x6a0
[  663.227496]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227497]  worker_thread+0x49/0x4a0
[  663.227499]  kthread+0x107/0x140
[  663.227501]  ? process_one_work+0x6a0/0x6a0
[  663.227502]  ? kthread_create_on_node+0x40/0x40
[  663.227504]  ret_from_fork+0x2e/0x40
[  663.227505] kworker/10:30   D    0  9322      2 0x00000000
[  663.227508] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227508] Call Trace:
[  663.227510]  __schedule+0x2da/0xb00
[  663.227512]  schedule+0x38/0x90
[  663.227514]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227516]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227518]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227520]  ? trace_hardirqs_on+0xd/0x10
[  663.227523]  call_rwsem_down_write_failed+0x17/0x30
[  663.227525]  down_write+0x5a/0x70
[  663.227527]  ? __generic_file_fsync+0x43/0x90
[  663.227528]  __generic_file_fsync+0x43/0x90
[  663.227530]  ext4_sync_file+0x2b4/0x540
[  663.227532]  vfs_fsync_range+0x46/0xa0
[  663.227533]  dio_complete+0x181/0x1b0
[  663.227535]  dio_aio_complete_work+0x17/0x20
[  663.227536]  process_one_work+0x208/0x6a0
[  663.227537]  ? process_one_work+0x18d/0x6a0
[  663.227539]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227540]  worker_thread+0x49/0x4a0
[  663.227542]  kthread+0x107/0x140
[  663.227544]  ? process_one_work+0x6a0/0x6a0
[  663.227545]  ? kthread_create_on_node+0x40/0x40
[  663.227547]  ret_from_fork+0x2e/0x40
[  663.227549] kworker/10:31   D    0  9323      2 0x00000000
[  663.227551] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227552] Call Trace:
[  663.227555]  __schedule+0x2da/0xb00
[  663.227556]  schedule+0x38/0x90
[  663.227559]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227561]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227563]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227565]  call_rwsem_down_write_failed+0x17/0x30
[  663.227568]  down_write+0x5a/0x70
[  663.227569]  ? __generic_file_fsync+0x43/0x90
[  663.227571]  __generic_file_fsync+0x43/0x90
[  663.227572]  ext4_sync_file+0x2b4/0x540
[  663.227574]  vfs_fsync_range+0x46/0xa0
[  663.227575]  dio_complete+0x181/0x1b0
[  663.227577]  dio_aio_complete_work+0x17/0x20
[  663.227578]  process_one_work+0x208/0x6a0
[  663.227579]  ? process_one_work+0x18d/0x6a0
[  663.227581]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227582]  worker_thread+0x49/0x4a0
[  663.227584]  kthread+0x107/0x140
[  663.227586]  ? process_one_work+0x6a0/0x6a0
[  663.227587]  ? kthread_create_on_node+0x40/0x40
[  663.227589]  ret_from_fork+0x2e/0x40
[  663.227591] kworker/10:32   D    0  9324      2 0x00000000
[  663.227593] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227594] Call Trace:
[  663.227596]  __schedule+0x2da/0xb00
[  663.227598]  schedule+0x38/0x90
[  663.227601]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227603]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227604]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227607]  call_rwsem_down_write_failed+0x17/0x30
[  663.227609]  down_write+0x5a/0x70
[  663.227610]  ? __generic_file_fsync+0x43/0x90
[  663.227612]  __generic_file_fsync+0x43/0x90
[  663.227614]  ext4_sync_file+0x2b4/0x540
[  663.227616]  vfs_fsync_range+0x46/0xa0
[  663.227617]  dio_complete+0x181/0x1b0
[  663.227618]  dio_aio_complete_work+0x17/0x20
[  663.227619]  process_one_work+0x208/0x6a0
[  663.227620]  ? process_one_work+0x18d/0x6a0
[  663.227622]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227624]  worker_thread+0x49/0x4a0
[  663.227625]  kthread+0x107/0x140
[  663.227626]  ? process_one_work+0x6a0/0x6a0
[  663.227628]  ? kthread_create_on_node+0x40/0x40
[  663.227629]  ret_from_fork+0x2e/0x40
[  663.227631] kworker/10:33   D    0  9325      2 0x00000000
[  663.227634] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227635] Call Trace:
[  663.227637]  __schedule+0x2da/0xb00
[  663.227639]  schedule+0x38/0x90
[  663.227641]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227643]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227645]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227647]  call_rwsem_down_write_failed+0x17/0x30
[  663.227649]  down_write+0x5a/0x70
[  663.227651]  ? __generic_file_fsync+0x43/0x90
[  663.227653]  __generic_file_fsync+0x43/0x90
[  663.227654]  ext4_sync_file+0x2b4/0x540
[  663.227656]  vfs_fsync_range+0x46/0xa0
[  663.227657]  dio_complete+0x181/0x1b0
[  663.227659]  dio_aio_complete_work+0x17/0x20
[  663.227660]  process_one_work+0x208/0x6a0
[  663.227661]  ? process_one_work+0x18d/0x6a0
[  663.227663]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227665]  worker_thread+0x49/0x4a0
[  663.227667]  kthread+0x107/0x140
[  663.227668]  ? process_one_work+0x6a0/0x6a0
[  663.227670]  ? kthread_create_on_node+0x40/0x40
[  663.227672]  ret_from_fork+0x2e/0x40
[  663.227673] kworker/10:34   D    0  9326      2 0x00000000
[  663.227676] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227677] Call Trace:
[  663.227679]  __schedule+0x2da/0xb00
[  663.227681]  schedule+0x38/0x90
[  663.227683]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227685]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227687]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227690]  call_rwsem_down_write_failed+0x17/0x30
[  663.227692]  down_write+0x5a/0x70
[  663.227693]  ? __generic_file_fsync+0x43/0x90
[  663.227695]  __generic_file_fsync+0x43/0x90
[  663.227697]  ext4_sync_file+0x2b4/0x540
[  663.227698]  vfs_fsync_range+0x46/0xa0
[  663.227700]  dio_complete+0x181/0x1b0
[  663.227701]  dio_aio_complete_work+0x17/0x20
[  663.227702]  process_one_work+0x208/0x6a0
[  663.227703]  ? process_one_work+0x18d/0x6a0
[  663.227705]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227707]  worker_thread+0x49/0x4a0
[  663.227709]  kthread+0x107/0x140
[  663.227710]  ? process_one_work+0x6a0/0x6a0
[  663.227711]  ? kthread_create_on_node+0x40/0x40
[  663.227713]  ret_from_fork+0x2e/0x40
[  663.227715] kworker/10:35   D    0  9327      2 0x00000000
[  663.227717] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227718] Call Trace:
[  663.227720]  __schedule+0x2da/0xb00
[  663.227722]  schedule+0x38/0x90
[  663.227724]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227726]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227728]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227730]  call_rwsem_down_write_failed+0x17/0x30
[  663.227733]  down_write+0x5a/0x70
[  663.227734]  ? __generic_file_fsync+0x43/0x90
[  663.227736]  __generic_file_fsync+0x43/0x90
[  663.227738]  ext4_sync_file+0x2b4/0x540
[  663.227739]  vfs_fsync_range+0x46/0xa0
[  663.227741]  dio_complete+0x181/0x1b0
[  663.227742]  dio_aio_complete_work+0x17/0x20
[  663.227743]  process_one_work+0x208/0x6a0
[  663.227745]  ? process_one_work+0x18d/0x6a0
[  663.227747]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227748]  worker_thread+0x49/0x4a0
[  663.227750]  kthread+0x107/0x140
[  663.227751]  ? process_one_work+0x6a0/0x6a0
[  663.227753]  ? kthread_create_on_node+0x40/0x40
[  663.227754]  ret_from_fork+0x2e/0x40
[  663.227756] kworker/10:36   D    0  9328      2 0x00000000
[  663.227759] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227760] Call Trace:
[  663.227762]  __schedule+0x2da/0xb00
[  663.227764]  schedule+0x38/0x90
[  663.227767]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227769]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227771]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227773]  call_rwsem_down_write_failed+0x17/0x30
[  663.227775]  down_write+0x5a/0x70
[  663.227777]  ? __generic_file_fsync+0x43/0x90
[  663.227779]  __generic_file_fsync+0x43/0x90
[  663.227780]  ext4_sync_file+0x2b4/0x540
[  663.227782]  vfs_fsync_range+0x46/0xa0
[  663.227783]  dio_complete+0x181/0x1b0
[  663.227785]  dio_aio_complete_work+0x17/0x20
[  663.227786]  process_one_work+0x208/0x6a0
[  663.227787]  ? process_one_work+0x18d/0x6a0
[  663.227789]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227790]  worker_thread+0x49/0x4a0
[  663.227792]  kthread+0x107/0x140
[  663.227794]  ? process_one_work+0x6a0/0x6a0
[  663.227795]  ? kthread_create_on_node+0x40/0x40
[  663.227797]  ret_from_fork+0x2e/0x40
[  663.227799] kworker/10:37   D    0  9329      2 0x00000000
[  663.227802] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227803] Call Trace:
[  663.227805]  __schedule+0x2da/0xb00
[  663.227807]  schedule+0x38/0x90
[  663.227809]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227810]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227812]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227815]  call_rwsem_down_write_failed+0x17/0x30
[  663.227817]  down_write+0x5a/0x70
[  663.227818]  ? __generic_file_fsync+0x43/0x90
[  663.227820]  __generic_file_fsync+0x43/0x90
[  663.227821]  ext4_sync_file+0x2b4/0x540
[  663.227823]  vfs_fsync_range+0x46/0xa0
[  663.227824]  dio_complete+0x181/0x1b0
[  663.227825]  dio_aio_complete_work+0x17/0x20
[  663.227826]  process_one_work+0x208/0x6a0
[  663.227828]  ? process_one_work+0x18d/0x6a0
[  663.227829]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227831]  worker_thread+0x49/0x4a0
[  663.227833]  kthread+0x107/0x140
[  663.227834]  ? process_one_work+0x6a0/0x6a0
[  663.227836]  ? kthread_create_on_node+0x40/0x40
[  663.227837]  ret_from_fork+0x2e/0x40
[  663.227838] kworker/10:38   D    0  9330      2 0x00000000
[  663.227841] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227842] Call Trace:
[  663.227844]  __schedule+0x2da/0xb00
[  663.227845]  schedule+0x38/0x90
[  663.227847]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227849]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227851]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227853]  call_rwsem_down_write_failed+0x17/0x30
[  663.227855]  down_write+0x5a/0x70
[  663.227857]  ? __generic_file_fsync+0x43/0x90
[  663.227858]  __generic_file_fsync+0x43/0x90
[  663.227860]  ext4_sync_file+0x2b4/0x540
[  663.227861]  vfs_fsync_range+0x46/0xa0
[  663.227863]  dio_complete+0x181/0x1b0
[  663.227864]  dio_aio_complete_work+0x17/0x20
[  663.227865]  process_one_work+0x208/0x6a0
[  663.227866]  ? process_one_work+0x18d/0x6a0
[  663.227868]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227869]  worker_thread+0x49/0x4a0
[  663.227871]  kthread+0x107/0x140
[  663.227872]  ? process_one_work+0x6a0/0x6a0
[  663.227874]  ? kthread_create_on_node+0x40/0x40
[  663.227876]  ret_from_fork+0x2e/0x40
[  663.227877] kworker/10:39   D    0  9331      2 0x00000000
[  663.227880] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227881] Call Trace:
[  663.227883]  __schedule+0x2da/0xb00
[  663.227885]  schedule+0x38/0x90
[  663.227887]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227889]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227891]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227894]  call_rwsem_down_write_failed+0x17/0x30
[  663.227896]  down_write+0x5a/0x70
[  663.227897]  ? __generic_file_fsync+0x43/0x90
[  663.227899]  __generic_file_fsync+0x43/0x90
[  663.227901]  ext4_sync_file+0x2b4/0x540
[  663.227902]  vfs_fsync_range+0x46/0xa0
[  663.227904]  dio_complete+0x181/0x1b0
[  663.227905]  dio_aio_complete_work+0x17/0x20
[  663.227907]  process_one_work+0x208/0x6a0
[  663.227908]  ? process_one_work+0x18d/0x6a0
[  663.227910]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227911]  worker_thread+0x49/0x4a0
[  663.227913]  kthread+0x107/0x140
[  663.227915]  ? process_one_work+0x6a0/0x6a0
[  663.227916]  ? kthread_create_on_node+0x40/0x40
[  663.227918]  ret_from_fork+0x2e/0x40
[  663.227920] kworker/10:40   D    0  9332      2 0x00000000
[  663.227922] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227923] Call Trace:
[  663.227925]  __schedule+0x2da/0xb00
[  663.227927]  schedule+0x38/0x90
[  663.227929]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227931]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227933]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227936]  call_rwsem_down_write_failed+0x17/0x30
[  663.227938]  down_write+0x5a/0x70
[  663.227939]  ? __generic_file_fsync+0x43/0x90
[  663.227941]  __generic_file_fsync+0x43/0x90
[  663.227942]  ext4_sync_file+0x2b4/0x540
[  663.227944]  vfs_fsync_range+0x46/0xa0
[  663.227945]  dio_complete+0x181/0x1b0
[  663.227947]  dio_aio_complete_work+0x17/0x20
[  663.227948]  process_one_work+0x208/0x6a0
[  663.227949]  ? process_one_work+0x18d/0x6a0
[  663.227951]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227953]  worker_thread+0x49/0x4a0
[  663.227954]  kthread+0x107/0x140
[  663.227956]  ? process_one_work+0x6a0/0x6a0
[  663.227957]  ? kthread_create_on_node+0x40/0x40
[  663.227959]  ret_from_fork+0x2e/0x40
[  663.227960] kworker/10:41   D    0  9333      2 0x00000000
[  663.227963] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.227964] Call Trace:
[  663.227966]  __schedule+0x2da/0xb00
[  663.227968]  schedule+0x38/0x90
[  663.227969]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.227971]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.227973]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.227976]  call_rwsem_down_write_failed+0x17/0x30
[  663.227978]  down_write+0x5a/0x70
[  663.227979]  ? __generic_file_fsync+0x43/0x90
[  663.227981]  __generic_file_fsync+0x43/0x90
[  663.227982]  ext4_sync_file+0x2b4/0x540
[  663.227983]  vfs_fsync_range+0x46/0xa0
[  663.227985]  dio_complete+0x181/0x1b0
[  663.227986]  dio_aio_complete_work+0x17/0x20
[  663.227987]  process_one_work+0x208/0x6a0
[  663.227988]  ? process_one_work+0x18d/0x6a0
[  663.227990]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.227991]  worker_thread+0x49/0x4a0
[  663.227993]  kthread+0x107/0x140
[  663.227994]  ? process_one_work+0x6a0/0x6a0
[  663.227996]  ? kthread_create_on_node+0x40/0x40
[  663.227997]  ret_from_fork+0x2e/0x40
[  663.227999] kworker/10:42   D    0  9334      2 0x00000000
[  663.228001] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228002] Call Trace:
[  663.228004]  __schedule+0x2da/0xb00
[  663.228006]  schedule+0x38/0x90
[  663.228008]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228011]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228013]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228015]  call_rwsem_down_write_failed+0x17/0x30
[  663.228017]  down_write+0x5a/0x70
[  663.228019]  ? __generic_file_fsync+0x43/0x90
[  663.228020]  __generic_file_fsync+0x43/0x90
[  663.228022]  ext4_sync_file+0x2b4/0x540
[  663.228024]  vfs_fsync_range+0x46/0xa0
[  663.228025]  dio_complete+0x181/0x1b0
[  663.228027]  dio_aio_complete_work+0x17/0x20
[  663.228028]  process_one_work+0x208/0x6a0
[  663.228029]  ? process_one_work+0x18d/0x6a0
[  663.228031]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228033]  worker_thread+0x49/0x4a0
[  663.228035]  kthread+0x107/0x140
[  663.228036]  ? process_one_work+0x6a0/0x6a0
[  663.228038]  ? kthread_create_on_node+0x40/0x40
[  663.228039]  ret_from_fork+0x2e/0x40
[  663.228042] kworker/10:43   D    0  9335      2 0x00000000
[  663.228044] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228045] Call Trace:
[  663.228048]  __schedule+0x2da/0xb00
[  663.228050]  schedule+0x38/0x90
[  663.228052]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228054]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228056]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228058]  call_rwsem_down_write_failed+0x17/0x30
[  663.228060]  down_write+0x5a/0x70
[  663.228062]  ? __generic_file_fsync+0x43/0x90
[  663.228064]  __generic_file_fsync+0x43/0x90
[  663.228065]  ext4_sync_file+0x2b4/0x540
[  663.228067]  vfs_fsync_range+0x46/0xa0
[  663.228068]  dio_complete+0x181/0x1b0
[  663.228069]  dio_aio_complete_work+0x17/0x20
[  663.228071]  process_one_work+0x208/0x6a0
[  663.228072]  ? process_one_work+0x18d/0x6a0
[  663.228074]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228075]  worker_thread+0x49/0x4a0
[  663.228077]  kthread+0x107/0x140
[  663.228079]  ? process_one_work+0x6a0/0x6a0
[  663.228081]  ? kthread_create_on_node+0x40/0x40
[  663.228082]  ret_from_fork+0x2e/0x40
[  663.228084] kworker/10:44   D    0  9336      2 0x00000000
[  663.228087] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228088] Call Trace:
[  663.228090]  __schedule+0x2da/0xb00
[  663.228092]  schedule+0x38/0x90
[  663.228093]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228095]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228097]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228100]  call_rwsem_down_write_failed+0x17/0x30
[  663.228101]  down_write+0x5a/0x70
[  663.228103]  ? __generic_file_fsync+0x43/0x90
[  663.228104]  __generic_file_fsync+0x43/0x90
[  663.228106]  ext4_sync_file+0x2b4/0x540
[  663.228108]  vfs_fsync_range+0x46/0xa0
[  663.228109]  dio_complete+0x181/0x1b0
[  663.228110]  dio_aio_complete_work+0x17/0x20
[  663.228112]  process_one_work+0x208/0x6a0
[  663.228114]  ? process_one_work+0x18d/0x6a0
[  663.228115]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228117]  worker_thread+0x49/0x4a0
[  663.228118]  kthread+0x107/0x140
[  663.228120]  ? process_one_work+0x6a0/0x6a0
[  663.228122]  ? kthread_create_on_node+0x40/0x40
[  663.228123]  ret_from_fork+0x2e/0x40
[  663.228125] kworker/10:45   D    0  9337      2 0x00000000
[  663.228127] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228128] Call Trace:
[  663.228130]  __schedule+0x2da/0xb00
[  663.228132]  schedule+0x38/0x90
[  663.228134]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228136]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228138]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228141]  call_rwsem_down_write_failed+0x17/0x30
[  663.228143]  down_write+0x5a/0x70
[  663.228145]  ? __generic_file_fsync+0x43/0x90
[  663.228146]  __generic_file_fsync+0x43/0x90
[  663.228148]  ext4_sync_file+0x2b4/0x540
[  663.228149]  vfs_fsync_range+0x46/0xa0
[  663.228151]  dio_complete+0x181/0x1b0
[  663.228152]  dio_aio_complete_work+0x17/0x20
[  663.228153]  process_one_work+0x208/0x6a0
[  663.228155]  ? process_one_work+0x18d/0x6a0
[  663.228157]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228158]  worker_thread+0x49/0x4a0
[  663.228160]  kthread+0x107/0x140
[  663.228162]  ? process_one_work+0x6a0/0x6a0
[  663.228164]  ? kthread_create_on_node+0x40/0x40
[  663.228166]  ? ___slab_alloc+0x4db/0x520
[  663.228168]  ? mempool_alloc_slab+0x10/0x20
[  663.228169]  ret_from_fork+0x2e/0x40
[  663.228171] kworker/10:46   D    0  9338      2 0x00000000
[  663.228174] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228175] Call Trace:
[  663.228177]  __schedule+0x2da/0xb00
[  663.228179]  schedule+0x38/0x90
[  663.228181]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228183]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228185]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228188]  call_rwsem_down_write_failed+0x17/0x30
[  663.228190]  down_write+0x5a/0x70
[  663.228192]  ? __generic_file_fsync+0x43/0x90
[  663.228193]  __generic_file_fsync+0x43/0x90
[  663.228195]  ext4_sync_file+0x2b4/0x540
[  663.228196]  vfs_fsync_range+0x46/0xa0
[  663.228198]  dio_complete+0x181/0x1b0
[  663.228199]  dio_aio_complete_work+0x17/0x20
[  663.228200]  process_one_work+0x208/0x6a0
[  663.228202]  ? process_one_work+0x18d/0x6a0
[  663.228204]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228206]  worker_thread+0x49/0x4a0
[  663.228207]  kthread+0x107/0x140
[  663.228208]  ? process_one_work+0x6a0/0x6a0
[  663.228210]  ? kthread_create_on_node+0x40/0x40
[  663.228212]  ? ___slab_alloc+0x4db/0x520
[  663.228214]  ? mempool_alloc_slab+0x10/0x20
[  663.228215]  ret_from_fork+0x2e/0x40
[  663.228217] kworker/10:47   D    0  9339      2 0x00000000
[  663.228219] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228220] Call Trace:
[  663.228222]  __schedule+0x2da/0xb00
[  663.228224]  schedule+0x38/0x90
[  663.228226]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228228]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228230]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228232]  call_rwsem_down_write_failed+0x17/0x30
[  663.228234]  down_write+0x5a/0x70
[  663.228236]  ? __generic_file_fsync+0x43/0x90
[  663.228238]  __generic_file_fsync+0x43/0x90
[  663.228239]  ext4_sync_file+0x2b4/0x540
[  663.228241]  vfs_fsync_range+0x46/0xa0
[  663.228243]  dio_complete+0x181/0x1b0
[  663.228244]  dio_aio_complete_work+0x17/0x20
[  663.228245]  process_one_work+0x208/0x6a0
[  663.228246]  ? process_one_work+0x18d/0x6a0
[  663.228248]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228249]  worker_thread+0x49/0x4a0
[  663.228251]  kthread+0x107/0x140
[  663.228253]  ? process_one_work+0x6a0/0x6a0
[  663.228254]  ? kthread_create_on_node+0x40/0x40
[  663.228256]  ret_from_fork+0x2e/0x40
[  663.228258] kworker/10:48   D    0  9340      2 0x00000000
[  663.228261] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228262] Call Trace:
[  663.228264]  __schedule+0x2da/0xb00
[  663.228266]  schedule+0x38/0x90
[  663.228268]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228270]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228272]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228274]  call_rwsem_down_write_failed+0x17/0x30
[  663.228276]  down_write+0x5a/0x70
[  663.228278]  ? __generic_file_fsync+0x43/0x90
[  663.228280]  __generic_file_fsync+0x43/0x90
[  663.228282]  ext4_sync_file+0x2b4/0x540
[  663.228283]  vfs_fsync_range+0x46/0xa0
[  663.228285]  dio_complete+0x181/0x1b0
[  663.228286]  dio_aio_complete_work+0x17/0x20
[  663.228287]  process_one_work+0x208/0x6a0
[  663.228289]  ? process_one_work+0x18d/0x6a0
[  663.228291]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228292]  worker_thread+0x49/0x4a0
[  663.228294]  kthread+0x107/0x140
[  663.228295]  ? process_one_work+0x6a0/0x6a0
[  663.228297]  ? kthread_create_on_node+0x40/0x40
[  663.228298]  ret_from_fork+0x2e/0x40
[  663.228300] kworker/10:49   D    0  9341      2 0x00000000
[  663.228303] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228304] Call Trace:
[  663.228306]  __schedule+0x2da/0xb00
[  663.228308]  schedule+0x38/0x90
[  663.228310]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228312]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228313]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228316]  call_rwsem_down_write_failed+0x17/0x30
[  663.228318]  down_write+0x5a/0x70
[  663.228319]  ? __generic_file_fsync+0x43/0x90
[  663.228321]  __generic_file_fsync+0x43/0x90
[  663.228323]  ext4_sync_file+0x2b4/0x540
[  663.228324]  vfs_fsync_range+0x46/0xa0
[  663.228326]  dio_complete+0x181/0x1b0
[  663.228327]  dio_aio_complete_work+0x17/0x20
[  663.228328]  process_one_work+0x208/0x6a0
[  663.228329]  ? process_one_work+0x18d/0x6a0
[  663.228331]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228332]  worker_thread+0x49/0x4a0
[  663.228334]  kthread+0x107/0x140
[  663.228335]  ? process_one_work+0x6a0/0x6a0
[  663.228337]  ? kthread_create_on_node+0x40/0x40
[  663.228338]  ret_from_fork+0x2e/0x40
[  663.228340] kworker/10:50   D    0  9342      2 0x00000000
[  663.228342] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228343] Call Trace:
[  663.228345]  __schedule+0x2da/0xb00
[  663.228347]  schedule+0x38/0x90
[  663.228348]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228350]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228352]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228355]  call_rwsem_down_write_failed+0x17/0x30
[  663.228357]  down_write+0x5a/0x70
[  663.228359]  ? __generic_file_fsync+0x43/0x90
[  663.228360]  __generic_file_fsync+0x43/0x90
[  663.228362]  ext4_sync_file+0x2b4/0x540
[  663.228363]  vfs_fsync_range+0x46/0xa0
[  663.228364]  dio_complete+0x181/0x1b0
[  663.228366]  dio_aio_complete_work+0x17/0x20
[  663.228367]  process_one_work+0x208/0x6a0
[  663.228368]  ? process_one_work+0x18d/0x6a0
[  663.228370]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228372]  worker_thread+0x49/0x4a0
[  663.228373]  kthread+0x107/0x140
[  663.228375]  ? process_one_work+0x6a0/0x6a0
[  663.228376]  ? kthread_create_on_node+0x40/0x40
[  663.228378]  ret_from_fork+0x2e/0x40
[  663.228379] kworker/10:51   D    0  9343      2 0x00000000
[  663.228382] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228383] Call Trace:
[  663.228385]  __schedule+0x2da/0xb00
[  663.228387]  schedule+0x38/0x90
[  663.228390]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228392]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228394]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228396]  call_rwsem_down_write_failed+0x17/0x30
[  663.228399]  down_write+0x5a/0x70
[  663.228400]  ? __generic_file_fsync+0x43/0x90
[  663.228402]  __generic_file_fsync+0x43/0x90
[  663.228404]  ext4_sync_file+0x2b4/0x540
[  663.228405]  vfs_fsync_range+0x46/0xa0
[  663.228407]  dio_complete+0x181/0x1b0
[  663.228408]  dio_aio_complete_work+0x17/0x20
[  663.228410]  process_one_work+0x208/0x6a0
[  663.228411]  ? process_one_work+0x18d/0x6a0
[  663.228413]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228414]  worker_thread+0x49/0x4a0
[  663.228416]  kthread+0x107/0x140
[  663.228417]  ? process_one_work+0x6a0/0x6a0
[  663.228419]  ? kthread_create_on_node+0x40/0x40
[  663.228422]  ? kmem_cache_alloc+0x230/0x2c0
[  663.228424]  ? __slab_alloc+0x3e/0x70
[  663.228426]  ? ___slab_alloc+0x4db/0x520
[  663.228427]  ret_from_fork+0x2e/0x40
[  663.228429] kworker/10:52   D    0  9344      2 0x00000000
[  663.228432] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228433] Call Trace:
[  663.228435]  __schedule+0x2da/0xb00
[  663.228436]  schedule+0x38/0x90
[  663.228438]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228440]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228442]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228445]  call_rwsem_down_write_failed+0x17/0x30
[  663.228447]  down_write+0x5a/0x70
[  663.228449]  ? __generic_file_fsync+0x43/0x90
[  663.228450]  __generic_file_fsync+0x43/0x90
[  663.228452]  ext4_sync_file+0x2b4/0x540
[  663.228454]  vfs_fsync_range+0x46/0xa0
[  663.228455]  dio_complete+0x181/0x1b0
[  663.228456]  dio_aio_complete_work+0x17/0x20
[  663.228458]  process_one_work+0x208/0x6a0
[  663.228459]  ? process_one_work+0x18d/0x6a0
[  663.228460]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228462]  worker_thread+0x49/0x4a0
[  663.228464]  kthread+0x107/0x140
[  663.228465]  ? process_one_work+0x6a0/0x6a0
[  663.228466]  ? kthread_create_on_node+0x40/0x40
[  663.228468]  ? ___slab_alloc+0x4db/0x520
[  663.228470]  ? mempool_alloc_slab+0x10/0x20
[  663.228471]  ret_from_fork+0x2e/0x40
[  663.228473] kworker/10:53   D    0  9345      2 0x00000000
[  663.228475] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228476] Call Trace:
[  663.228478]  __schedule+0x2da/0xb00
[  663.228480]  schedule+0x38/0x90
[  663.228482]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228484]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228486]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228488]  ? trace_hardirqs_on+0xd/0x10
[  663.228491]  call_rwsem_down_write_failed+0x17/0x30
[  663.228493]  down_write+0x5a/0x70
[  663.228495]  ? __generic_file_fsync+0x43/0x90
[  663.228497]  __generic_file_fsync+0x43/0x90
[  663.228498]  ext4_sync_file+0x2b4/0x540
[  663.228500]  vfs_fsync_range+0x46/0xa0
[  663.228501]  dio_complete+0x181/0x1b0
[  663.228503]  dio_aio_complete_work+0x17/0x20
[  663.228504]  process_one_work+0x208/0x6a0
[  663.228505]  ? process_one_work+0x18d/0x6a0
[  663.228507]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228508]  worker_thread+0x49/0x4a0
[  663.228510]  kthread+0x107/0x140
[  663.228511]  ? process_one_work+0x6a0/0x6a0
[  663.228513]  ? kthread_create_on_node+0x40/0x40
[  663.228515]  ret_from_fork+0x2e/0x40
[  663.228517] kworker/10:54   D    0  9346      2 0x00000000
[  663.228520] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228521] Call Trace:
[  663.228523]  __schedule+0x2da/0xb00
[  663.228525]  schedule+0x38/0x90
[  663.228527]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228529]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228531]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228533]  ? trace_hardirqs_on+0xd/0x10
[  663.228535]  call_rwsem_down_write_failed+0x17/0x30
[  663.228537]  down_write+0x5a/0x70
[  663.228539]  ? __generic_file_fsync+0x43/0x90
[  663.228540]  __generic_file_fsync+0x43/0x90
[  663.228542]  ext4_sync_file+0x2b4/0x540
[  663.228544]  vfs_fsync_range+0x46/0xa0
[  663.228545]  dio_complete+0x181/0x1b0
[  663.228546]  dio_aio_complete_work+0x17/0x20
[  663.228548]  process_one_work+0x208/0x6a0
[  663.228549]  ? process_one_work+0x18d/0x6a0
[  663.228551]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228552]  worker_thread+0x49/0x4a0
[  663.228554]  kthread+0x107/0x140
[  663.228555]  ? process_one_work+0x6a0/0x6a0
[  663.228557]  ? kthread_create_on_node+0x40/0x40
[  663.228559]  ret_from_fork+0x2e/0x40
[  663.228561] kworker/10:55   D    0  9347      2 0x00000000
[  663.228564] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228565] Call Trace:
[  663.228567]  __schedule+0x2da/0xb00
[  663.228568]  schedule+0x38/0x90
[  663.228570]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228572]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228574]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228576]  call_rwsem_down_write_failed+0x17/0x30
[  663.228578]  down_write+0x5a/0x70
[  663.228580]  ? __generic_file_fsync+0x43/0x90
[  663.228581]  __generic_file_fsync+0x43/0x90
[  663.228583]  ext4_sync_file+0x2b4/0x540
[  663.228584]  vfs_fsync_range+0x46/0xa0
[  663.228586]  dio_complete+0x181/0x1b0
[  663.228587]  dio_aio_complete_work+0x17/0x20
[  663.228589]  process_one_work+0x208/0x6a0
[  663.228590]  ? process_one_work+0x18d/0x6a0
[  663.228592]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228593]  worker_thread+0x49/0x4a0
[  663.228595]  kthread+0x107/0x140
[  663.228597]  ? process_one_work+0x6a0/0x6a0
[  663.228598]  ? kthread_create_on_node+0x40/0x40
[  663.228600]  ret_from_fork+0x2e/0x40
[  663.228602] kworker/10:56   D    0  9348      2 0x00000000
[  663.228604] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228605] Call Trace:
[  663.228607]  __schedule+0x2da/0xb00
[  663.228609]  schedule+0x38/0x90
[  663.228611]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228613]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228615]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228617]  call_rwsem_down_write_failed+0x17/0x30
[  663.228619]  down_write+0x5a/0x70
[  663.228621]  ? __generic_file_fsync+0x43/0x90
[  663.228623]  __generic_file_fsync+0x43/0x90
[  663.228625]  ext4_sync_file+0x2b4/0x540
[  663.228626]  vfs_fsync_range+0x46/0xa0
[  663.228627]  dio_complete+0x181/0x1b0
[  663.228629]  dio_aio_complete_work+0x17/0x20
[  663.228630]  process_one_work+0x208/0x6a0
[  663.228632]  ? process_one_work+0x18d/0x6a0
[  663.228633]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228635]  worker_thread+0x49/0x4a0
[  663.228637]  kthread+0x107/0x140
[  663.228638]  ? process_one_work+0x6a0/0x6a0
[  663.228640]  ? kthread_create_on_node+0x40/0x40
[  663.228641]  ret_from_fork+0x2e/0x40
[  663.228643] kworker/10:57   D    0  9349      2 0x00000000
[  663.228645] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228647] Call Trace:
[  663.228649]  __schedule+0x2da/0xb00
[  663.228651]  schedule+0x38/0x90
[  663.228653]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228655]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228657]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228660]  call_rwsem_down_write_failed+0x17/0x30
[  663.228662]  down_write+0x5a/0x70
[  663.228663]  ? __generic_file_fsync+0x43/0x90
[  663.228665]  __generic_file_fsync+0x43/0x90
[  663.228666]  ext4_sync_file+0x2b4/0x540
[  663.228668]  vfs_fsync_range+0x46/0xa0
[  663.228669]  dio_complete+0x181/0x1b0
[  663.228670]  dio_aio_complete_work+0x17/0x20
[  663.228671]  process_one_work+0x208/0x6a0
[  663.228672]  ? process_one_work+0x18d/0x6a0
[  663.228674]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228675]  worker_thread+0x49/0x4a0
[  663.228677]  kthread+0x107/0x140
[  663.228678]  ? process_one_work+0x6a0/0x6a0
[  663.228680]  ? kthread_create_on_node+0x40/0x40
[  663.228681]  ret_from_fork+0x2e/0x40
[  663.228683] kworker/10:58   D    0  9350      2 0x00000000
[  663.228686] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228687] Call Trace:
[  663.228689]  __schedule+0x2da/0xb00
[  663.228690]  schedule+0x38/0x90
[  663.228692]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228694]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228696]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228698]  call_rwsem_down_write_failed+0x17/0x30
[  663.228701]  down_write+0x5a/0x70
[  663.228702]  ? __generic_file_fsync+0x43/0x90
[  663.228704]  __generic_file_fsync+0x43/0x90
[  663.228706]  ext4_sync_file+0x2b4/0x540
[  663.228707]  vfs_fsync_range+0x46/0xa0
[  663.228709]  dio_complete+0x181/0x1b0
[  663.228710]  dio_aio_complete_work+0x17/0x20
[  663.228711]  process_one_work+0x208/0x6a0
[  663.228712]  ? process_one_work+0x18d/0x6a0
[  663.228714]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228716]  worker_thread+0x49/0x4a0
[  663.228718]  kthread+0x107/0x140
[  663.228719]  ? process_one_work+0x6a0/0x6a0
[  663.228721]  ? kthread_create_on_node+0x40/0x40
[  663.228722]  ret_from_fork+0x2e/0x40
[  663.228724] kworker/10:59   D    0  9351      2 0x00000000
[  663.228727] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228728] Call Trace:
[  663.228730]  __schedule+0x2da/0xb00
[  663.228732]  schedule+0x38/0x90
[  663.228734]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228736]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228738]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228740]  call_rwsem_down_write_failed+0x17/0x30
[  663.228742]  down_write+0x5a/0x70
[  663.228744]  ? __generic_file_fsync+0x43/0x90
[  663.228746]  __generic_file_fsync+0x43/0x90
[  663.228748]  ext4_sync_file+0x2b4/0x540
[  663.228750]  vfs_fsync_range+0x46/0xa0
[  663.228751]  dio_complete+0x181/0x1b0
[  663.228752]  dio_aio_complete_work+0x17/0x20
[  663.228754]  process_one_work+0x208/0x6a0
[  663.228755]  ? process_one_work+0x18d/0x6a0
[  663.228757]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228758]  worker_thread+0x49/0x4a0
[  663.228760]  kthread+0x107/0x140
[  663.228761]  ? process_one_work+0x6a0/0x6a0
[  663.228763]  ? kthread_create_on_node+0x40/0x40
[  663.228764]  ret_from_fork+0x2e/0x40
[  663.228766] kworker/10:60   D    0  9352      2 0x00000000
[  663.228769] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228770] Call Trace:
[  663.228772]  __schedule+0x2da/0xb00
[  663.228774]  schedule+0x38/0x90
[  663.228776]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228778]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228780]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228782]  call_rwsem_down_write_failed+0x17/0x30
[  663.228784]  down_write+0x5a/0x70
[  663.228786]  ? __generic_file_fsync+0x43/0x90
[  663.228788]  __generic_file_fsync+0x43/0x90
[  663.228790]  ext4_sync_file+0x2b4/0x540
[  663.228791]  vfs_fsync_range+0x46/0xa0
[  663.228792]  dio_complete+0x181/0x1b0
[  663.228794]  dio_aio_complete_work+0x17/0x20
[  663.228795]  process_one_work+0x208/0x6a0
[  663.228796]  ? process_one_work+0x18d/0x6a0
[  663.228798]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228799]  worker_thread+0x49/0x4a0
[  663.228801]  kthread+0x107/0x140
[  663.228802]  ? process_one_work+0x6a0/0x6a0
[  663.228804]  ? kthread_create_on_node+0x40/0x40
[  663.228805]  ret_from_fork+0x2e/0x40
[  663.228806] kworker/10:61   D    0  9353      2 0x00000000
[  663.228809] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228811] Call Trace:
[  663.228813]  __schedule+0x2da/0xb00
[  663.228814]  schedule+0x38/0x90
[  663.228816]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228818]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228820]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228823]  call_rwsem_down_write_failed+0x17/0x30
[  663.228825]  down_write+0x5a/0x70
[  663.228826]  ? __generic_file_fsync+0x43/0x90
[  663.228828]  __generic_file_fsync+0x43/0x90
[  663.228830]  ext4_sync_file+0x2b4/0x540
[  663.228831]  vfs_fsync_range+0x46/0xa0
[  663.228832]  dio_complete+0x181/0x1b0
[  663.228834]  dio_aio_complete_work+0x17/0x20
[  663.228835]  process_one_work+0x208/0x6a0
[  663.228836]  ? process_one_work+0x18d/0x6a0
[  663.228838]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228840]  worker_thread+0x49/0x4a0
[  663.228842]  kthread+0x107/0x140
[  663.228843]  ? process_one_work+0x6a0/0x6a0
[  663.228845]  ? kthread_create_on_node+0x40/0x40
[  663.228847]  ret_from_fork+0x2e/0x40
[  663.228848] kworker/10:62   D    0  9354      2 0x00000000
[  663.228851] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228852] Call Trace:
[  663.228854]  __schedule+0x2da/0xb00
[  663.228856]  schedule+0x38/0x90
[  663.228858]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228860]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228862]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228865]  call_rwsem_down_write_failed+0x17/0x30
[  663.228867]  down_write+0x5a/0x70
[  663.228868]  ? __generic_file_fsync+0x43/0x90
[  663.228870]  __generic_file_fsync+0x43/0x90
[  663.228872]  ext4_sync_file+0x2b4/0x540
[  663.228873]  vfs_fsync_range+0x46/0xa0
[  663.228876]  dio_complete+0x181/0x1b0
[  663.228877]  dio_aio_complete_work+0x17/0x20
[  663.228879]  process_one_work+0x208/0x6a0
[  663.228880]  ? process_one_work+0x18d/0x6a0
[  663.228882]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228883]  worker_thread+0x49/0x4a0
[  663.228885]  kthread+0x107/0x140
[  663.228886]  ? process_one_work+0x6a0/0x6a0
[  663.228888]  ? kthread_create_on_node+0x40/0x40
[  663.228889]  ret_from_fork+0x2e/0x40
[  663.228891] kworker/10:63   D    0  9355      2 0x00000000
[  663.228893] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228894] Call Trace:
[  663.228896]  __schedule+0x2da/0xb00
[  663.228898]  schedule+0x38/0x90
[  663.228900]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228902]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228904]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228905]  ? trace_hardirqs_on+0xd/0x10
[  663.228907]  call_rwsem_down_write_failed+0x17/0x30
[  663.228909]  down_write+0x5a/0x70
[  663.228911]  ? __generic_file_fsync+0x43/0x90
[  663.228913]  __generic_file_fsync+0x43/0x90
[  663.228914]  ext4_sync_file+0x2b4/0x540
[  663.228916]  vfs_fsync_range+0x46/0xa0
[  663.228917]  dio_complete+0x181/0x1b0
[  663.228918]  dio_aio_complete_work+0x17/0x20
[  663.228920]  process_one_work+0x208/0x6a0
[  663.228921]  ? process_one_work+0x18d/0x6a0
[  663.228923]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228924]  worker_thread+0x49/0x4a0
[  663.228926]  kthread+0x107/0x140
[  663.228927]  ? process_one_work+0x6a0/0x6a0
[  663.228929]  ? kthread_create_on_node+0x40/0x40
[  663.228931]  ret_from_fork+0x2e/0x40
[  663.228933] kworker/10:64   D    0  9356      2 0x00000000
[  663.228935] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228936] Call Trace:
[  663.228938]  __schedule+0x2da/0xb00
[  663.228940]  schedule+0x38/0x90
[  663.228943]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228945]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228947]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228949]  call_rwsem_down_write_failed+0x17/0x30
[  663.228951]  down_write+0x5a/0x70
[  663.228953]  ? __generic_file_fsync+0x43/0x90
[  663.228955]  __generic_file_fsync+0x43/0x90
[  663.228956]  ext4_sync_file+0x2b4/0x540
[  663.228958]  vfs_fsync_range+0x46/0xa0
[  663.228959]  dio_complete+0x181/0x1b0
[  663.228961]  dio_aio_complete_work+0x17/0x20
[  663.228962]  process_one_work+0x208/0x6a0
[  663.228963]  ? process_one_work+0x18d/0x6a0
[  663.228965]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.228967]  worker_thread+0x49/0x4a0
[  663.228969]  kthread+0x107/0x140
[  663.228970]  ? process_one_work+0x6a0/0x6a0
[  663.228972]  ? kthread_create_on_node+0x40/0x40
[  663.228974]  ret_from_fork+0x2e/0x40
[  663.228976] kworker/10:65   D    0  9357      2 0x00000000
[  663.228978] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.228979] Call Trace:
[  663.228981]  __schedule+0x2da/0xb00
[  663.228983]  schedule+0x38/0x90
[  663.228985]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.228987]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.228989]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.228992]  call_rwsem_down_write_failed+0x17/0x30
[  663.228994]  down_write+0x5a/0x70
[  663.228995]  ? __generic_file_fsync+0x43/0x90
[  663.228997]  __generic_file_fsync+0x43/0x90
[  663.228998]  ext4_sync_file+0x2b4/0x540
[  663.229000]  vfs_fsync_range+0x46/0xa0
[  663.229001]  dio_complete+0x181/0x1b0
[  663.229002]  dio_aio_complete_work+0x17/0x20
[  663.229003]  process_one_work+0x208/0x6a0
[  663.229004]  ? process_one_work+0x18d/0x6a0
[  663.229007]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229008]  worker_thread+0x49/0x4a0
[  663.229010]  kthread+0x107/0x140
[  663.229011]  ? process_one_work+0x6a0/0x6a0
[  663.229013]  ? kthread_create_on_node+0x40/0x40
[  663.229014]  ret_from_fork+0x2e/0x40
[  663.229016] kworker/10:66   D    0  9358      2 0x00000000
[  663.229018] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229019] Call Trace:
[  663.229021]  __schedule+0x2da/0xb00
[  663.229023]  schedule+0x38/0x90
[  663.229025]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229027]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229029]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229032]  call_rwsem_down_write_failed+0x17/0x30
[  663.229034]  down_write+0x5a/0x70
[  663.229036]  ? __generic_file_fsync+0x43/0x90
[  663.229038]  __generic_file_fsync+0x43/0x90
[  663.229040]  ext4_sync_file+0x2b4/0x540
[  663.229041]  vfs_fsync_range+0x46/0xa0
[  663.229043]  dio_complete+0x181/0x1b0
[  663.229044]  dio_aio_complete_work+0x17/0x20
[  663.229045]  process_one_work+0x208/0x6a0
[  663.229047]  ? process_one_work+0x18d/0x6a0
[  663.229049]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229050]  worker_thread+0x49/0x4a0
[  663.229052]  kthread+0x107/0x140
[  663.229053]  ? process_one_work+0x6a0/0x6a0
[  663.229055]  ? kthread_create_on_node+0x40/0x40
[  663.229057]  ret_from_fork+0x2e/0x40
[  663.229058] kworker/10:67   D    0  9359      2 0x00000000
[  663.229061] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229062] Call Trace:
[  663.229064]  __schedule+0x2da/0xb00
[  663.229066]  schedule+0x38/0x90
[  663.229068]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229070]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229072]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229074]  ? trace_hardirqs_on+0xd/0x10
[  663.229076]  call_rwsem_down_write_failed+0x17/0x30
[  663.229079]  down_write+0x5a/0x70
[  663.229081]  ? __generic_file_fsync+0x43/0x90
[  663.229083]  __generic_file_fsync+0x43/0x90
[  663.229085]  ext4_sync_file+0x2b4/0x540
[  663.229086]  vfs_fsync_range+0x46/0xa0
[  663.229087]  dio_complete+0x181/0x1b0
[  663.229089]  dio_aio_complete_work+0x17/0x20
[  663.229090]  process_one_work+0x208/0x6a0
[  663.229092]  ? process_one_work+0x18d/0x6a0
[  663.229094]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229095]  worker_thread+0x49/0x4a0
[  663.229097]  kthread+0x107/0x140
[  663.229098]  ? process_one_work+0x6a0/0x6a0
[  663.229099]  ? kthread_create_on_node+0x40/0x40
[  663.229101]  ret_from_fork+0x2e/0x40
[  663.229103] kworker/10:68   D    0  9360      2 0x00000000
[  663.229105] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229106] Call Trace:
[  663.229108]  __schedule+0x2da/0xb00
[  663.229110]  schedule+0x38/0x90
[  663.229112]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229113]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229115]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229118]  call_rwsem_down_write_failed+0x17/0x30
[  663.229120]  down_write+0x5a/0x70
[  663.229121]  ? __generic_file_fsync+0x43/0x90
[  663.229123]  __generic_file_fsync+0x43/0x90
[  663.229124]  ext4_sync_file+0x2b4/0x540
[  663.229126]  vfs_fsync_range+0x46/0xa0
[  663.229127]  dio_complete+0x181/0x1b0
[  663.229128]  dio_aio_complete_work+0x17/0x20
[  663.229130]  process_one_work+0x208/0x6a0
[  663.229131]  ? process_one_work+0x18d/0x6a0
[  663.229132]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229134]  worker_thread+0x49/0x4a0
[  663.229136]  kthread+0x107/0x140
[  663.229137]  ? process_one_work+0x6a0/0x6a0
[  663.229138]  ? kthread_create_on_node+0x40/0x40
[  663.229140]  ret_from_fork+0x2e/0x40
[  663.229141] kworker/10:69   D    0  9361      2 0x00000000
[  663.229144] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229145] Call Trace:
[  663.229147]  __schedule+0x2da/0xb00
[  663.229148]  schedule+0x38/0x90
[  663.229150]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229152]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229154]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229156]  ? trace_hardirqs_on+0xd/0x10
[  663.229158]  call_rwsem_down_write_failed+0x17/0x30
[  663.229161]  down_write+0x5a/0x70
[  663.229163]  ? __generic_file_fsync+0x43/0x90
[  663.229164]  __generic_file_fsync+0x43/0x90
[  663.229166]  ext4_sync_file+0x2b4/0x540
[  663.229167]  vfs_fsync_range+0x46/0xa0
[  663.229169]  dio_complete+0x181/0x1b0
[  663.229170]  dio_aio_complete_work+0x17/0x20
[  663.229172]  process_one_work+0x208/0x6a0
[  663.229173]  ? process_one_work+0x18d/0x6a0
[  663.229175]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229176]  worker_thread+0x49/0x4a0
[  663.229178]  kthread+0x107/0x140
[  663.229179]  ? process_one_work+0x6a0/0x6a0
[  663.229181]  ? kthread_create_on_node+0x40/0x40
[  663.229182]  ret_from_fork+0x2e/0x40
[  663.229184] kworker/10:70   D    0  9362      2 0x00000000
[  663.229187] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229188] Call Trace:
[  663.229190]  __schedule+0x2da/0xb00
[  663.229192]  schedule+0x38/0x90
[  663.229194]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229197]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229199]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229200]  ? trace_hardirqs_on+0xd/0x10
[  663.229202]  call_rwsem_down_write_failed+0x17/0x30
[  663.229205]  down_write+0x5a/0x70
[  663.229206]  ? __generic_file_fsync+0x43/0x90
[  663.229208]  __generic_file_fsync+0x43/0x90
[  663.229210]  ext4_sync_file+0x2b4/0x540
[  663.229211]  vfs_fsync_range+0x46/0xa0
[  663.229213]  dio_complete+0x181/0x1b0
[  663.229214]  dio_aio_complete_work+0x17/0x20
[  663.229215]  process_one_work+0x208/0x6a0
[  663.229216]  ? process_one_work+0x18d/0x6a0
[  663.229218]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229219]  worker_thread+0x49/0x4a0
[  663.229221]  kthread+0x107/0x140
[  663.229223]  ? process_one_work+0x6a0/0x6a0
[  663.229224]  ? kthread_create_on_node+0x40/0x40
[  663.229225]  ret_from_fork+0x2e/0x40
[  663.229227] kworker/10:71   D    0  9363      2 0x00000000
[  663.229230] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229231] Call Trace:
[  663.229233]  __schedule+0x2da/0xb00
[  663.229235]  schedule+0x38/0x90
[  663.229237]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229239]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229241]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229243]  ? trace_hardirqs_on+0xd/0x10
[  663.229245]  call_rwsem_down_write_failed+0x17/0x30
[  663.229247]  down_write+0x5a/0x70
[  663.229248]  ? __generic_file_fsync+0x43/0x90
[  663.229250]  __generic_file_fsync+0x43/0x90
[  663.229251]  ext4_sync_file+0x2b4/0x540
[  663.229253]  vfs_fsync_range+0x46/0xa0
[  663.229254]  dio_complete+0x181/0x1b0
[  663.229256]  dio_aio_complete_work+0x17/0x20
[  663.229257]  process_one_work+0x208/0x6a0
[  663.229259]  ? process_one_work+0x18d/0x6a0
[  663.229260]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229262]  worker_thread+0x49/0x4a0
[  663.229264]  kthread+0x107/0x140
[  663.229265]  ? process_one_work+0x6a0/0x6a0
[  663.229266]  ? kthread_create_on_node+0x40/0x40
[  663.229268]  ret_from_fork+0x2e/0x40
[  663.229269] kworker/10:72   D    0  9364      2 0x00000000
[  663.229272] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229273] Call Trace:
[  663.229275]  __schedule+0x2da/0xb00
[  663.229277]  schedule+0x38/0x90
[  663.229279]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229281]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229283]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229285]  ? trace_hardirqs_on+0xd/0x10
[  663.229287]  call_rwsem_down_write_failed+0x17/0x30
[  663.229289]  down_write+0x5a/0x70
[  663.229291]  ? __generic_file_fsync+0x43/0x90
[  663.229292]  __generic_file_fsync+0x43/0x90
[  663.229294]  ext4_sync_file+0x2b4/0x540
[  663.229296]  vfs_fsync_range+0x46/0xa0
[  663.229297]  dio_complete+0x181/0x1b0
[  663.229298]  dio_aio_complete_work+0x17/0x20
[  663.229299]  process_one_work+0x208/0x6a0
[  663.229301]  ? process_one_work+0x18d/0x6a0
[  663.229303]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229304]  worker_thread+0x49/0x4a0
[  663.229306]  kthread+0x107/0x140
[  663.229308]  ? process_one_work+0x6a0/0x6a0
[  663.229310]  ? kthread_create_on_node+0x40/0x40
[  663.229311]  ret_from_fork+0x2e/0x40
[  663.229313] kworker/10:73   D    0  9365      2 0x00000000
[  663.229316] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229317] Call Trace:
[  663.229319]  __schedule+0x2da/0xb00
[  663.229321]  schedule+0x38/0x90
[  663.229323]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229325]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229327]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229328]  ? trace_hardirqs_on+0xd/0x10
[  663.229330]  call_rwsem_down_write_failed+0x17/0x30
[  663.229333]  down_write+0x5a/0x70
[  663.229334]  ? __generic_file_fsync+0x43/0x90
[  663.229336]  __generic_file_fsync+0x43/0x90
[  663.229338]  ext4_sync_file+0x2b4/0x540
[  663.229340]  vfs_fsync_range+0x46/0xa0
[  663.229341]  dio_complete+0x181/0x1b0
[  663.229342]  dio_aio_complete_work+0x17/0x20
[  663.229343]  process_one_work+0x208/0x6a0
[  663.229345]  ? process_one_work+0x18d/0x6a0
[  663.229347]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229348]  worker_thread+0x49/0x4a0
[  663.229350]  kthread+0x107/0x140
[  663.229353]  ? process_one_work+0x6a0/0x6a0
[  663.229354]  ? kthread_create_on_node+0x40/0x40
[  663.229356]  ret_from_fork+0x2e/0x40
[  663.229357] kworker/10:74   D    0  9366      2 0x00000000
[  663.229360] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229361] Call Trace:
[  663.229363]  __schedule+0x2da/0xb00
[  663.229365]  schedule+0x38/0x90
[  663.229366]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229369]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229371]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229373]  call_rwsem_down_write_failed+0x17/0x30
[  663.229375]  down_write+0x5a/0x70
[  663.229377]  ? __generic_file_fsync+0x43/0x90
[  663.229378]  __generic_file_fsync+0x43/0x90
[  663.229380]  ext4_sync_file+0x2b4/0x540
[  663.229381]  vfs_fsync_range+0x46/0xa0
[  663.229383]  dio_complete+0x181/0x1b0
[  663.229384]  dio_aio_complete_work+0x17/0x20
[  663.229385]  process_one_work+0x208/0x6a0
[  663.229387]  ? process_one_work+0x18d/0x6a0
[  663.229388]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229390]  worker_thread+0x49/0x4a0
[  663.229392]  kthread+0x107/0x140
[  663.229393]  ? process_one_work+0x6a0/0x6a0
[  663.229395]  ? kthread_create_on_node+0x40/0x40
[  663.229396]  ret_from_fork+0x2e/0x40
[  663.229398] kworker/10:75   D    0  9367      2 0x00000000
[  663.229401] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229403] Call Trace:
[  663.229405]  __schedule+0x2da/0xb00
[  663.229407]  schedule+0x38/0x90
[  663.229409]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229411]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229413]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229415]  call_rwsem_down_write_failed+0x17/0x30
[  663.229417]  down_write+0x5a/0x70
[  663.229419]  ? __generic_file_fsync+0x43/0x90
[  663.229421]  __generic_file_fsync+0x43/0x90
[  663.229423]  ext4_sync_file+0x2b4/0x540
[  663.229424]  vfs_fsync_range+0x46/0xa0
[  663.229425]  dio_complete+0x181/0x1b0
[  663.229427]  dio_aio_complete_work+0x17/0x20
[  663.229428]  process_one_work+0x208/0x6a0
[  663.229429]  ? process_one_work+0x18d/0x6a0
[  663.229431]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229433]  worker_thread+0x49/0x4a0
[  663.229435]  kthread+0x107/0x140
[  663.229436]  ? process_one_work+0x6a0/0x6a0
[  663.229438]  ? kthread_create_on_node+0x40/0x40
[  663.229439]  ret_from_fork+0x2e/0x40
[  663.229441] kworker/10:76   D    0  9368      2 0x00000000
[  663.229443] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229444] Call Trace:
[  663.229446]  __schedule+0x2da/0xb00
[  663.229448]  schedule+0x38/0x90
[  663.229450]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229453]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229455]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229457]  call_rwsem_down_write_failed+0x17/0x30
[  663.229459]  down_write+0x5a/0x70
[  663.229461]  ? __generic_file_fsync+0x43/0x90
[  663.229462]  __generic_file_fsync+0x43/0x90
[  663.229464]  ext4_sync_file+0x2b4/0x540
[  663.229465]  vfs_fsync_range+0x46/0xa0
[  663.229467]  dio_complete+0x181/0x1b0
[  663.229468]  dio_aio_complete_work+0x17/0x20
[  663.229470]  process_one_work+0x208/0x6a0
[  663.229471]  ? process_one_work+0x18d/0x6a0
[  663.229473]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229474]  worker_thread+0x49/0x4a0
[  663.229476]  kthread+0x107/0x140
[  663.229477]  ? process_one_work+0x6a0/0x6a0
[  663.229479]  ? kthread_create_on_node+0x40/0x40
[  663.229481]  ret_from_fork+0x2e/0x40
[  663.229482] kworker/10:77   D    0  9369      2 0x00000000
[  663.229485] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229486] Call Trace:
[  663.229488]  __schedule+0x2da/0xb00
[  663.229490]  schedule+0x38/0x90
[  663.229491]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229493]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229495]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229498]  call_rwsem_down_write_failed+0x17/0x30
[  663.229499]  down_write+0x5a/0x70
[  663.229502]  ? __generic_file_fsync+0x43/0x90
[  663.229503]  __generic_file_fsync+0x43/0x90
[  663.229505]  ext4_sync_file+0x2b4/0x540
[  663.229506]  vfs_fsync_range+0x46/0xa0
[  663.229507]  dio_complete+0x181/0x1b0
[  663.229509]  dio_aio_complete_work+0x17/0x20
[  663.229510]  process_one_work+0x208/0x6a0
[  663.229511]  ? process_one_work+0x18d/0x6a0
[  663.229513]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229514]  worker_thread+0x49/0x4a0
[  663.229516]  kthread+0x107/0x140
[  663.229517]  ? process_one_work+0x6a0/0x6a0
[  663.229519]  ? kthread_create_on_node+0x40/0x40
[  663.229520]  ret_from_fork+0x2e/0x40
[  663.229522] kworker/10:78   D    0  9370      2 0x00000000
[  663.229524] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229525] Call Trace:
[  663.229527]  __schedule+0x2da/0xb00
[  663.229529]  schedule+0x38/0x90
[  663.229530]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229532]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229535]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229536]  ? trace_hardirqs_on_caller+0xf9/0x1b0
[  663.229539]  call_rwsem_down_write_failed+0x17/0x30
[  663.229541]  down_write+0x5a/0x70
[  663.229543]  ? __generic_file_fsync+0x43/0x90
[  663.229545]  __generic_file_fsync+0x43/0x90
[  663.229546]  ext4_sync_file+0x2b4/0x540
[  663.229548]  vfs_fsync_range+0x46/0xa0
[  663.229549]  dio_complete+0x181/0x1b0
[  663.229551]  dio_aio_complete_work+0x17/0x20
[  663.229552]  process_one_work+0x208/0x6a0
[  663.229553]  ? process_one_work+0x18d/0x6a0
[  663.229555]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229556]  worker_thread+0x49/0x4a0
[  663.229558]  kthread+0x107/0x140
[  663.229560]  ? process_one_work+0x6a0/0x6a0
[  663.229561]  ? kthread_create_on_node+0x40/0x40
[  663.229563]  ret_from_fork+0x2e/0x40
[  663.229565] kworker/10:79   D    0  9371      2 0x00000000
[  663.229568] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229569] Call Trace:
[  663.229571]  __schedule+0x2da/0xb00
[  663.229573]  schedule+0x38/0x90
[  663.229575]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229577]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229579]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229582]  call_rwsem_down_write_failed+0x17/0x30
[  663.229584]  down_write+0x5a/0x70
[  663.229586]  ? __generic_file_fsync+0x43/0x90
[  663.229587]  __generic_file_fsync+0x43/0x90
[  663.229589]  ext4_sync_file+0x2b4/0x540
[  663.229591]  vfs_fsync_range+0x46/0xa0
[  663.229592]  dio_complete+0x181/0x1b0
[  663.229593]  dio_aio_complete_work+0x17/0x20
[  663.229595]  process_one_work+0x208/0x6a0
[  663.229596]  ? process_one_work+0x18d/0x6a0
[  663.229597]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229599]  worker_thread+0x49/0x4a0
[  663.229601]  kthread+0x107/0x140
[  663.229602]  ? process_one_work+0x6a0/0x6a0
[  663.229604]  ? kthread_create_on_node+0x40/0x40
[  663.229605]  ret_from_fork+0x2e/0x40
[  663.229607] kworker/10:80   D    0  9372      2 0x00000000
[  663.229609] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229610] Call Trace:
[  663.229612]  __schedule+0x2da/0xb00
[  663.229614]  schedule+0x38/0x90
[  663.229616]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229618]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229620]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229622]  call_rwsem_down_write_failed+0x17/0x30
[  663.229624]  down_write+0x5a/0x70
[  663.229626]  ? __generic_file_fsync+0x43/0x90
[  663.229628]  __generic_file_fsync+0x43/0x90
[  663.229629]  ext4_sync_file+0x2b4/0x540
[  663.229631]  vfs_fsync_range+0x46/0xa0
[  663.229633]  dio_complete+0x181/0x1b0
[  663.229634]  dio_aio_complete_work+0x17/0x20
[  663.229635]  process_one_work+0x208/0x6a0
[  663.229636]  ? process_one_work+0x18d/0x6a0
[  663.229638]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229639]  worker_thread+0x49/0x4a0
[  663.229641]  kthread+0x107/0x140
[  663.229643]  ? process_one_work+0x6a0/0x6a0
[  663.229644]  ? kthread_create_on_node+0x40/0x40
[  663.229646]  ret_from_fork+0x2e/0x40
[  663.229648] kworker/10:81   D    0  9373      2 0x00000000
[  663.229650] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229651] Call Trace:
[  663.229653]  __schedule+0x2da/0xb00
[  663.229655]  schedule+0x38/0x90
[  663.229657]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229659]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229661]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229664]  call_rwsem_down_write_failed+0x17/0x30
[  663.229666]  down_write+0x5a/0x70
[  663.229669]  ? __generic_file_fsync+0x43/0x90
[  663.229671]  __generic_file_fsync+0x43/0x90
[  663.229672]  ext4_sync_file+0x2b4/0x540
[  663.229674]  vfs_fsync_range+0x46/0xa0
[  663.229675]  dio_complete+0x181/0x1b0
[  663.229677]  dio_aio_complete_work+0x17/0x20
[  663.229678]  process_one_work+0x208/0x6a0
[  663.229679]  ? process_one_work+0x18d/0x6a0
[  663.229681]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229683]  worker_thread+0x49/0x4a0
[  663.229685]  kthread+0x107/0x140
[  663.229686]  ? process_one_work+0x6a0/0x6a0
[  663.229688]  ? kthread_create_on_node+0x40/0x40
[  663.229689]  ret_from_fork+0x2e/0x40
[  663.229691] kworker/10:82   D    0  9374      2 0x00000000
[  663.229693] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229694] Call Trace:
[  663.229697]  __schedule+0x2da/0xb00
[  663.229699]  schedule+0x38/0x90
[  663.229701]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229703]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229705]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229708]  call_rwsem_down_write_failed+0x17/0x30
[  663.229710]  down_write+0x5a/0x70
[  663.229711]  ? __generic_file_fsync+0x43/0x90
[  663.229713]  __generic_file_fsync+0x43/0x90
[  663.229715]  ext4_sync_file+0x2b4/0x540
[  663.229716]  vfs_fsync_range+0x46/0xa0
[  663.229717]  dio_complete+0x181/0x1b0
[  663.229719]  dio_aio_complete_work+0x17/0x20
[  663.229720]  process_one_work+0x208/0x6a0
[  663.229721]  ? process_one_work+0x18d/0x6a0
[  663.229723]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229724]  worker_thread+0x49/0x4a0
[  663.229726]  kthread+0x107/0x140
[  663.229727]  ? process_one_work+0x6a0/0x6a0
[  663.229729]  ? kthread_create_on_node+0x40/0x40
[  663.229731]  ret_from_fork+0x2e/0x40
[  663.229732] kworker/10:83   D    0  9375      2 0x00000000
[  663.229735] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229736] Call Trace:
[  663.229738]  __schedule+0x2da/0xb00
[  663.229739]  schedule+0x38/0x90
[  663.229741]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229743]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229745]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229747]  call_rwsem_down_write_failed+0x17/0x30
[  663.229749]  down_write+0x5a/0x70
[  663.229751]  ? __generic_file_fsync+0x43/0x90
[  663.229752]  __generic_file_fsync+0x43/0x90
[  663.229754]  ext4_sync_file+0x2b4/0x540
[  663.229755]  vfs_fsync_range+0x46/0xa0
[  663.229757]  dio_complete+0x181/0x1b0
[  663.229759]  dio_aio_complete_work+0x17/0x20
[  663.229760]  process_one_work+0x208/0x6a0
[  663.229762]  ? process_one_work+0x18d/0x6a0
[  663.229763]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229765]  worker_thread+0x49/0x4a0
[  663.229767]  kthread+0x107/0x140
[  663.229768]  ? process_one_work+0x6a0/0x6a0
[  663.229770]  ? kthread_create_on_node+0x40/0x40
[  663.229771]  ret_from_fork+0x2e/0x40
[  663.229773] kworker/10:84   D    0  9376      2 0x00000000
[  663.229775] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229777] Call Trace:
[  663.229779]  __schedule+0x2da/0xb00
[  663.229780]  schedule+0x38/0x90
[  663.229783]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229785]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229787]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229789]  call_rwsem_down_write_failed+0x17/0x30
[  663.229791]  down_write+0x5a/0x70
[  663.229793]  ? __generic_file_fsync+0x43/0x90
[  663.229795]  __generic_file_fsync+0x43/0x90
[  663.229797]  ext4_sync_file+0x2b4/0x540
[  663.229798]  vfs_fsync_range+0x46/0xa0
[  663.229800]  dio_complete+0x181/0x1b0
[  663.229801]  dio_aio_complete_work+0x17/0x20
[  663.229803]  process_one_work+0x208/0x6a0
[  663.229804]  ? process_one_work+0x18d/0x6a0
[  663.229806]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229807]  worker_thread+0x49/0x4a0
[  663.229809]  kthread+0x107/0x140
[  663.229810]  ? process_one_work+0x6a0/0x6a0
[  663.229812]  ? kthread_create_on_node+0x40/0x40
[  663.229814]  ret_from_fork+0x2e/0x40
[  663.229815] kworker/10:85   D    0  9377      2 0x00000000
[  663.229818] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229819] Call Trace:
[  663.229821]  __schedule+0x2da/0xb00
[  663.229823]  schedule+0x38/0x90
[  663.229825]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229828]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229830]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229832]  call_rwsem_down_write_failed+0x17/0x30
[  663.229834]  down_write+0x5a/0x70
[  663.229835]  ? __generic_file_fsync+0x43/0x90
[  663.229837]  __generic_file_fsync+0x43/0x90
[  663.229838]  ext4_sync_file+0x2b4/0x540
[  663.229840]  vfs_fsync_range+0x46/0xa0
[  663.229841]  dio_complete+0x181/0x1b0
[  663.229842]  dio_aio_complete_work+0x17/0x20
[  663.229844]  process_one_work+0x208/0x6a0
[  663.229845]  ? process_one_work+0x18d/0x6a0
[  663.229847]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229849]  worker_thread+0x49/0x4a0
[  663.229850]  kthread+0x107/0x140
[  663.229852]  ? process_one_work+0x6a0/0x6a0
[  663.229853]  ? kthread_create_on_node+0x40/0x40
[  663.229854]  ret_from_fork+0x2e/0x40
[  663.229856] kworker/10:86   D    0  9378      2 0x00000000
[  663.229859] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229860] Call Trace:
[  663.229862]  __schedule+0x2da/0xb00
[  663.229864]  schedule+0x38/0x90
[  663.229866]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229868]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229870]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229873]  call_rwsem_down_write_failed+0x17/0x30
[  663.229875]  down_write+0x5a/0x70
[  663.229876]  ? __generic_file_fsync+0x43/0x90
[  663.229878]  __generic_file_fsync+0x43/0x90
[  663.229880]  ext4_sync_file+0x2b4/0x540
[  663.229881]  vfs_fsync_range+0x46/0xa0
[  663.229883]  dio_complete+0x181/0x1b0
[  663.229884]  dio_aio_complete_work+0x17/0x20
[  663.229885]  process_one_work+0x208/0x6a0
[  663.229887]  ? process_one_work+0x18d/0x6a0
[  663.229888]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229890]  worker_thread+0x49/0x4a0
[  663.229893]  kthread+0x107/0x140
[  663.229894]  ? process_one_work+0x6a0/0x6a0
[  663.229896]  ? kthread_create_on_node+0x40/0x40
[  663.229897]  ret_from_fork+0x2e/0x40
[  663.229899] kworker/10:87   D    0  9379      2 0x00000000
[  663.229902] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229903] Call Trace:
[  663.229905]  __schedule+0x2da/0xb00
[  663.229906]  schedule+0x38/0x90
[  663.229908]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229910]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229912]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229915]  call_rwsem_down_write_failed+0x17/0x30
[  663.229917]  down_write+0x5a/0x70
[  663.229919]  ? __generic_file_fsync+0x43/0x90
[  663.229920]  __generic_file_fsync+0x43/0x90
[  663.229922]  ext4_sync_file+0x2b4/0x540
[  663.229924]  vfs_fsync_range+0x46/0xa0
[  663.229926]  dio_complete+0x181/0x1b0
[  663.229927]  dio_aio_complete_work+0x17/0x20
[  663.229928]  process_one_work+0x208/0x6a0
[  663.229929]  ? process_one_work+0x18d/0x6a0
[  663.229931]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229932]  worker_thread+0x49/0x4a0
[  663.229934]  kthread+0x107/0x140
[  663.229936]  ? process_one_work+0x6a0/0x6a0
[  663.229938]  ? kthread_create_on_node+0x40/0x40
[  663.229939]  ret_from_fork+0x2e/0x40
[  663.229941] kworker/10:88   D    0  9380      2 0x00000000
[  663.229943] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229944] Call Trace:
[  663.229946]  __schedule+0x2da/0xb00
[  663.229948]  schedule+0x38/0x90
[  663.229950]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229952]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229954]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229956]  call_rwsem_down_write_failed+0x17/0x30
[  663.229958]  down_write+0x5a/0x70
[  663.229960]  ? __generic_file_fsync+0x43/0x90
[  663.229962]  __generic_file_fsync+0x43/0x90
[  663.229963]  ext4_sync_file+0x2b4/0x540
[  663.229965]  vfs_fsync_range+0x46/0xa0
[  663.229966]  dio_complete+0x181/0x1b0
[  663.229967]  dio_aio_complete_work+0x17/0x20
[  663.229968]  process_one_work+0x208/0x6a0
[  663.229970]  ? process_one_work+0x18d/0x6a0
[  663.229971]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.229973]  worker_thread+0x49/0x4a0
[  663.229975]  kthread+0x107/0x140
[  663.229976]  ? process_one_work+0x6a0/0x6a0
[  663.229978]  ? kthread_create_on_node+0x40/0x40
[  663.229979]  ret_from_fork+0x2e/0x40
[  663.229981] kworker/10:89   D    0  9381      2 0x00000000
[  663.229983] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.229984] Call Trace:
[  663.229986]  __schedule+0x2da/0xb00
[  663.229988]  schedule+0x38/0x90
[  663.229990]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.229992]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.229994]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.229997]  call_rwsem_down_write_failed+0x17/0x30
[  663.229999]  down_write+0x5a/0x70
[  663.230001]  ? __generic_file_fsync+0x43/0x90
[  663.230002]  __generic_file_fsync+0x43/0x90
[  663.230004]  ext4_sync_file+0x2b4/0x540
[  663.230006]  vfs_fsync_range+0x46/0xa0
[  663.230007]  dio_complete+0x181/0x1b0
[  663.230008]  dio_aio_complete_work+0x17/0x20
[  663.230010]  process_one_work+0x208/0x6a0
[  663.230011]  ? process_one_work+0x18d/0x6a0
[  663.230013]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230015]  worker_thread+0x49/0x4a0
[  663.230016]  kthread+0x107/0x140
[  663.230018]  ? process_one_work+0x6a0/0x6a0
[  663.230019]  ? kthread_create_on_node+0x40/0x40
[  663.230021]  ret_from_fork+0x2e/0x40
[  663.230023] kworker/10:90   D    0  9382      2 0x00000000
[  663.230026] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230027] Call Trace:
[  663.230029]  __schedule+0x2da/0xb00
[  663.230031]  schedule+0x38/0x90
[  663.230033]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230035]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230037]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230039]  call_rwsem_down_write_failed+0x17/0x30
[  663.230041]  down_write+0x5a/0x70
[  663.230042]  ? __generic_file_fsync+0x43/0x90
[  663.230044]  __generic_file_fsync+0x43/0x90
[  663.230046]  ext4_sync_file+0x2b4/0x540
[  663.230047]  vfs_fsync_range+0x46/0xa0
[  663.230049]  dio_complete+0x181/0x1b0
[  663.230050]  dio_aio_complete_work+0x17/0x20
[  663.230051]  process_one_work+0x208/0x6a0
[  663.230053]  ? process_one_work+0x18d/0x6a0
[  663.230054]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230056]  worker_thread+0x49/0x4a0
[  663.230058]  kthread+0x107/0x140
[  663.230059]  ? process_one_work+0x6a0/0x6a0
[  663.230060]  ? kthread_create_on_node+0x40/0x40
[  663.230062]  ret_from_fork+0x2e/0x40
[  663.230063] kworker/10:91   D    0  9383      2 0x00000000
[  663.230066] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230067] Call Trace:
[  663.230069]  __schedule+0x2da/0xb00
[  663.230070]  schedule+0x38/0x90
[  663.230072]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230074]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230076]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230079]  call_rwsem_down_write_failed+0x17/0x30
[  663.230081]  down_write+0x5a/0x70
[  663.230083]  ? __generic_file_fsync+0x43/0x90
[  663.230084]  __generic_file_fsync+0x43/0x90
[  663.230086]  ext4_sync_file+0x2b4/0x540
[  663.230087]  vfs_fsync_range+0x46/0xa0
[  663.230089]  dio_complete+0x181/0x1b0
[  663.230090]  dio_aio_complete_work+0x17/0x20
[  663.230092]  process_one_work+0x208/0x6a0
[  663.230093]  ? process_one_work+0x18d/0x6a0
[  663.230095]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230096]  worker_thread+0x49/0x4a0
[  663.230098]  kthread+0x107/0x140
[  663.230099]  ? process_one_work+0x6a0/0x6a0
[  663.230101]  ? kthread_create_on_node+0x40/0x40
[  663.230102]  ret_from_fork+0x2e/0x40
[  663.230104] kworker/10:92   D    0  9384      2 0x00000000
[  663.230107] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230108] Call Trace:
[  663.230110]  __schedule+0x2da/0xb00
[  663.230112]  schedule+0x38/0x90
[  663.230114]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230116]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230118]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230121]  call_rwsem_down_write_failed+0x17/0x30
[  663.230123]  down_write+0x5a/0x70
[  663.230125]  ? __generic_file_fsync+0x43/0x90
[  663.230126]  __generic_file_fsync+0x43/0x90
[  663.230128]  ext4_sync_file+0x2b4/0x540
[  663.230130]  vfs_fsync_range+0x46/0xa0
[  663.230131]  dio_complete+0x181/0x1b0
[  663.230132]  dio_aio_complete_work+0x17/0x20
[  663.230134]  process_one_work+0x208/0x6a0
[  663.230135]  ? process_one_work+0x18d/0x6a0
[  663.230137]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230138]  worker_thread+0x49/0x4a0
[  663.230140]  kthread+0x107/0x140
[  663.230142]  ? process_one_work+0x6a0/0x6a0
[  663.230143]  ? kthread_create_on_node+0x40/0x40
[  663.230145]  ret_from_fork+0x2e/0x40
[  663.230146] kworker/10:93   D    0  9385      2 0x00000000
[  663.230149] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230150] Call Trace:
[  663.230152]  __schedule+0x2da/0xb00
[  663.230153]  schedule+0x38/0x90
[  663.230155]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230157]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230159]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230161]  call_rwsem_down_write_failed+0x17/0x30
[  663.230163]  down_write+0x5a/0x70
[  663.230165]  ? __generic_file_fsync+0x43/0x90
[  663.230166]  __generic_file_fsync+0x43/0x90
[  663.230168]  ext4_sync_file+0x2b4/0x540
[  663.230169]  vfs_fsync_range+0x46/0xa0
[  663.230171]  dio_complete+0x181/0x1b0
[  663.230172]  dio_aio_complete_work+0x17/0x20
[  663.230173]  process_one_work+0x208/0x6a0
[  663.230174]  ? process_one_work+0x18d/0x6a0
[  663.230176]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230177]  worker_thread+0x49/0x4a0
[  663.230179]  kthread+0x107/0x140
[  663.230180]  ? process_one_work+0x6a0/0x6a0
[  663.230182]  ? kthread_create_on_node+0x40/0x40
[  663.230183]  ret_from_fork+0x2e/0x40
[  663.230185] kworker/10:94   D    0  9386      2 0x00000000
[  663.230187] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230188] Call Trace:
[  663.230190]  __schedule+0x2da/0xb00
[  663.230192]  schedule+0x38/0x90
[  663.230194]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230195]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230197]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230199]  ? trace_hardirqs_on+0xd/0x10
[  663.230201]  call_rwsem_down_write_failed+0x17/0x30
[  663.230203]  down_write+0x5a/0x70
[  663.230205]  ? __generic_file_fsync+0x43/0x90
[  663.230206]  __generic_file_fsync+0x43/0x90
[  663.230208]  ext4_sync_file+0x2b4/0x540
[  663.230209]  vfs_fsync_range+0x46/0xa0
[  663.230211]  dio_complete+0x181/0x1b0
[  663.230212]  dio_aio_complete_work+0x17/0x20
[  663.230214]  process_one_work+0x208/0x6a0
[  663.230215]  ? process_one_work+0x18d/0x6a0
[  663.230217]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230218]  worker_thread+0x49/0x4a0
[  663.230220]  kthread+0x107/0x140
[  663.230221]  ? process_one_work+0x6a0/0x6a0
[  663.230223]  ? kthread_create_on_node+0x40/0x40
[  663.230224]  ret_from_fork+0x2e/0x40
[  663.230226] kworker/10:95   D    0  9387      2 0x00000000
[  663.230230] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230231] Call Trace:
[  663.230233]  __schedule+0x2da/0xb00
[  663.230235]  schedule+0x38/0x90
[  663.230237]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230239]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230241]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230243]  call_rwsem_down_write_failed+0x17/0x30
[  663.230246]  down_write+0x5a/0x70
[  663.230247]  ? __generic_file_fsync+0x43/0x90
[  663.230249]  __generic_file_fsync+0x43/0x90
[  663.230251]  ext4_sync_file+0x2b4/0x540
[  663.230252]  vfs_fsync_range+0x46/0xa0
[  663.230254]  dio_complete+0x181/0x1b0
[  663.230255]  dio_aio_complete_work+0x17/0x20
[  663.230256]  process_one_work+0x208/0x6a0
[  663.230258]  ? process_one_work+0x18d/0x6a0
[  663.230259]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230261]  worker_thread+0x49/0x4a0
[  663.230262]  kthread+0x107/0x140
[  663.230264]  ? process_one_work+0x6a0/0x6a0
[  663.230265]  ? kthread_create_on_node+0x40/0x40
[  663.230267]  ret_from_fork+0x2e/0x40
[  663.230268] kworker/10:96   D    0  9388      2 0x00000000
[  663.230271] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230272] Call Trace:
[  663.230274]  __schedule+0x2da/0xb00
[  663.230276]  schedule+0x38/0x90
[  663.230278]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230280]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230282]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230285]  call_rwsem_down_write_failed+0x17/0x30
[  663.230287]  down_write+0x5a/0x70
[  663.230288]  ? __generic_file_fsync+0x43/0x90
[  663.230290]  __generic_file_fsync+0x43/0x90
[  663.230291]  ext4_sync_file+0x2b4/0x540
[  663.230293]  vfs_fsync_range+0x46/0xa0
[  663.230294]  dio_complete+0x181/0x1b0
[  663.230295]  dio_aio_complete_work+0x17/0x20
[  663.230296]  process_one_work+0x208/0x6a0
[  663.230298]  ? process_one_work+0x18d/0x6a0
[  663.230299]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230301]  worker_thread+0x49/0x4a0
[  663.230303]  kthread+0x107/0x140
[  663.230304]  ? process_one_work+0x6a0/0x6a0
[  663.230305]  ? kthread_create_on_node+0x40/0x40
[  663.230307]  ret_from_fork+0x2e/0x40
[  663.230308] kworker/10:97   D    0  9389      2 0x00000000
[  663.230311] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230312] Call Trace:
[  663.230314]  __schedule+0x2da/0xb00
[  663.230316]  schedule+0x38/0x90
[  663.230317]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230319]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230321]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230324]  call_rwsem_down_write_failed+0x17/0x30
[  663.230326]  down_write+0x5a/0x70
[  663.230327]  ? __generic_file_fsync+0x43/0x90
[  663.230329]  __generic_file_fsync+0x43/0x90
[  663.230331]  ext4_sync_file+0x2b4/0x540
[  663.230333]  vfs_fsync_range+0x46/0xa0
[  663.230335]  dio_complete+0x181/0x1b0
[  663.230336]  dio_aio_complete_work+0x17/0x20
[  663.230337]  process_one_work+0x208/0x6a0
[  663.230338]  ? process_one_work+0x18d/0x6a0
[  663.230340]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230342]  worker_thread+0x49/0x4a0
[  663.230344]  kthread+0x107/0x140
[  663.230345]  ? process_one_work+0x6a0/0x6a0
[  663.230347]  ? kthread_create_on_node+0x40/0x40
[  663.230348]  ret_from_fork+0x2e/0x40
[  663.230350] kworker/10:98   D    0  9390      2 0x00000000
[  663.230353] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230354] Call Trace:
[  663.230356]  __schedule+0x2da/0xb00
[  663.230358]  schedule+0x38/0x90
[  663.230360]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230362]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230364]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230367]  call_rwsem_down_write_failed+0x17/0x30
[  663.230369]  down_write+0x5a/0x70
[  663.230371]  ? __generic_file_fsync+0x43/0x90
[  663.230372]  __generic_file_fsync+0x43/0x90
[  663.230374]  ext4_sync_file+0x2b4/0x540
[  663.230376]  vfs_fsync_range+0x46/0xa0
[  663.230377]  dio_complete+0x181/0x1b0
[  663.230378]  dio_aio_complete_work+0x17/0x20
[  663.230380]  process_one_work+0x208/0x6a0
[  663.230381]  ? process_one_work+0x18d/0x6a0
[  663.230383]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230384]  worker_thread+0x49/0x4a0
[  663.230386]  kthread+0x107/0x140
[  663.230387]  ? process_one_work+0x6a0/0x6a0
[  663.230390]  ? kthread_create_on_node+0x40/0x40
[  663.230391]  ret_from_fork+0x2e/0x40
[  663.230393] kworker/10:99   D    0  9391      2 0x00000000
[  663.230396] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230397] Call Trace:
[  663.230399]  __schedule+0x2da/0xb00
[  663.230401]  schedule+0x38/0x90
[  663.230403]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230405]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230407]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230409]  call_rwsem_down_write_failed+0x17/0x30
[  663.230411]  down_write+0x5a/0x70
[  663.230413]  ? __generic_file_fsync+0x43/0x90
[  663.230414]  __generic_file_fsync+0x43/0x90
[  663.230416]  ext4_sync_file+0x2b4/0x540
[  663.230417]  vfs_fsync_range+0x46/0xa0
[  663.230419]  dio_complete+0x181/0x1b0
[  663.230420]  dio_aio_complete_work+0x17/0x20
[  663.230421]  process_one_work+0x208/0x6a0
[  663.230422]  ? process_one_work+0x18d/0x6a0
[  663.230424]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230425]  worker_thread+0x49/0x4a0
[  663.230427]  kthread+0x107/0x140
[  663.230428]  ? process_one_work+0x6a0/0x6a0
[  663.230431]  ? kthread_create_on_node+0x40/0x40
[  663.230432]  ret_from_fork+0x2e/0x40
[  663.230434] kworker/10:100  D    0  9392      2 0x00000000
[  663.230436] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230437] Call Trace:
[  663.230440]  __schedule+0x2da/0xb00
[  663.230442]  schedule+0x38/0x90
[  663.230445]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230447]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230448]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230451]  call_rwsem_down_write_failed+0x17/0x30
[  663.230453]  down_write+0x5a/0x70
[  663.230455]  ? __generic_file_fsync+0x43/0x90
[  663.230456]  __generic_file_fsync+0x43/0x90
[  663.230458]  ext4_sync_file+0x2b4/0x540
[  663.230460]  vfs_fsync_range+0x46/0xa0
[  663.230461]  dio_complete+0x181/0x1b0
[  663.230463]  dio_aio_complete_work+0x17/0x20
[  663.230464]  process_one_work+0x208/0x6a0
[  663.230465]  ? process_one_work+0x18d/0x6a0
[  663.230467]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230469]  worker_thread+0x49/0x4a0
[  663.230471]  kthread+0x107/0x140
[  663.230473]  ? process_one_work+0x6a0/0x6a0
[  663.230474]  ? kthread_create_on_node+0x40/0x40
[  663.230476]  ret_from_fork+0x2e/0x40
[  663.230478] kworker/10:101  D    0  9393      2 0x00000000
[  663.230480] Workqueue: dio/dm-0 dio_aio_complete_work
[  663.230481] Call Trace:
[  663.230483]  __schedule+0x2da/0xb00
[  663.230485]  ? _raw_spin_unlock_irq+0x27/0x40
[  663.230487]  schedule+0x38/0x90
[  663.230489]  ? rwsem_down_write_failed+0x2ac/0x4b0
[  663.230491]  rwsem_down_write_failed+0x2b1/0x4b0
[  663.230493]  ? rwsem_down_write_failed+0x50/0x4b0
[  663.230494]  ? trace_hardirqs_on+0xd/0x10
[  663.230496]  call_rwsem_down_write_failed+0x17/0x30
[  663.230499]  down_write+0x5a/0x70
[  663.230500]  ? __generic_file_fsync+0x43/0x90
[  663.230502]  __generic_file_fsync+0x43/0x90
[  663.230504]  ext4_sync_file+0x2b4/0x540
[  663.230506]  vfs_fsync_range+0x46/0xa0
[  663.230507]  dio_complete+0x181/0x1b0
[  663.230508]  dio_aio_complete_work+0x17/0x20
[  663.230509]  process_one_work+0x208/0x6a0
[  663.230510]  ? process_one_work+0x18d/0x6a0
[  663.230512]  ? __mutex_unlock_slowpath+0x3e/0x290
[  663.230514]  worker_thread+0x49/0x4a0
[  663.230515]  kthread+0x107/0x140
[  663.230517]  ? process_one_work+0x6a0/0x6a0
[  663.230518]  ? kthread_create_on_node+0x40/0x40
[  663.230519]  ret_from_fork+0x2e/0x40=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request
  2017-01-25 17:25 ` [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request Christoph Hellwig
@ 2017-01-26 23:18     ` Bart Van Assche
  2017-01-26 23:18     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 23:18 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> No need for the local variables, the bio is still live and we can just
> assigned the bits we want directly.  Make me wonder why we can't assign
> all the bio flags to start with.

I assume that you ment "assign" in the patch description? Anyway:

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request
@ 2017-01-26 23:18     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 23:18 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> No need for the local variables, the bio is still live and we can just
> assigned the bits we want directly.  Make me wonder why we can't assign
> all the bio flags to start with.

I assume that you ment "assign" in the patch description? Anyway:

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 03/18] block: fix elevator init check
  2017-01-25 17:25 ` [PATCH 03/18] block: fix elevator init check Christoph Hellwig
@ 2017-01-26 23:21     ` Bart Van Assche
  2017-01-26 23:21     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 23:21 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> We can't initalize the elevator fields for flushes as flush share space
> in struct request with the elevator data.  But currently we can't
> commnicate that a request is a flush through blk_get_request as we
> can only pass READ or WRITE, and the low-level code looks at the
> possible NULL bio to check for a flush.
> 
> Fix this by allowing to pass any block op and flags, and by checking for
> the flush flags in __get_request.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 03/18] block: fix elevator init check
@ 2017-01-26 23:21     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 23:21 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> We can't initalize the elevator fields for flushes as flush share space
> in struct request with the elevator data.  But currently we can't
> commnicate that a request is a flush through blk_get_request as we
> can only pass READ or WRITE, and the low-level code looks at the
> possible NULL bio to check for a flush.
>=20
> Fix this by allowing to pass any block op and flags, and by checking for
> the flush flags in __get_request.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 23:14                           ` Bart Van Assche
  (?)
@ 2017-01-26 23:26                             ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 23:26 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On 01/26/2017 04:14 PM, Bart Van Assche wrote:
> On Thu, 2017-01-26 at 14:51 -0700, Jens Axboe wrote:
>> That is exactly what it means, looks like that one path doesn't handle
>> that.  You'd have to exhaust the pool with atomic allocs for this to
>> trigger, we don't do that at all in the normal IO path. So good catch,
>> must be the dm part that enables this since it does NOWAIT allocations.
>>
>>
>> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
>> index 3136696f4991..c27613de80c5 100644
>> --- a/block/blk-mq-sched.c
>> +++ b/block/blk-mq-sched.c
>> @@ -134,7 +134,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
>>  			rq = __blk_mq_alloc_request(data, op);
>>  	} else {
>>  		rq = __blk_mq_alloc_request(data, op);
>> -		data->hctx->tags->rqs[rq->tag] = rq;
>> +		if (rq)
>> +			data->hctx->tags->rqs[rq->tag] = rq;
>>  	}
>>  
>>  	if (rq) {
> 
> Hello Jens,
> 
> With these two patches applied the scheduling-while-atomic complaint and
> the oops are gone. However, some tasks get stuck. Is the console output
> below enough to figure out what is going on or do you want me to bisect
> this? I don't think that any requests got stuck since no pending requests
> are shown in /sys/block/*/mq/*/{pending,*/rq_list}.

What device is stuck? Is it running with an mq scheduler attached, or
with "none"?

Would also be great to see the output of /sys/block/*/mq/*/tags and
sched_tags so we can see if they have anything pending.

From a quick look at the below, it looks like a request leak. Bisection
would most likely be very helpful.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-26 23:26                             ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 23:26 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/26/2017 04:14 PM, Bart Van Assche wrote:
> On Thu, 2017-01-26 at 14:51 -0700, Jens Axboe wrote:
>> That is exactly what it means, looks like that one path doesn't handle
>> that.  You'd have to exhaust the pool with atomic allocs for this to
>> trigger, we don't do that at all in the normal IO path. So good catch,
>> must be the dm part that enables this since it does NOWAIT allocations.
>>
>>
>> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
>> index 3136696f4991..c27613de80c5 100644
>> --- a/block/blk-mq-sched.c
>> +++ b/block/blk-mq-sched.c
>> @@ -134,7 +134,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
>>  			rq = __blk_mq_alloc_request(data, op);
>>  	} else {
>>  		rq = __blk_mq_alloc_request(data, op);
>> -		data->hctx->tags->rqs[rq->tag] = rq;
>> +		if (rq)
>> +			data->hctx->tags->rqs[rq->tag] = rq;
>>  	}
>>  
>>  	if (rq) {
> 
> Hello Jens,
> 
> With these two patches applied the scheduling-while-atomic complaint and
> the oops are gone. However, some tasks get stuck. Is the console output
> below enough to figure out what is going on or do you want me to bisect
> this? I don't think that any requests got stuck since no pending requests
> are shown in /sys/block/*/mq/*/{pending,*/rq_list}.

What device is stuck? Is it running with an mq scheduler attached, or
with "none"?

Would also be great to see the output of /sys/block/*/mq/*/tags and
sched_tags so we can see if they have anything pending.

>From a quick look at the below, it looks like a request leak. Bisection
would most likely be very helpful.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-26 23:26                             ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 23:26 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On 01/26/2017 04:14 PM, Bart Van Assche wrote:
> On Thu, 2017-01-26 at 14:51 -0700, Jens Axboe wrote:
>> That is exactly what it means, looks like that one path doesn't handle
>> that.  You'd have to exhaust the pool with atomic allocs for this to
>> trigger, we don't do that at all in the normal IO path. So good catch,
>> must be the dm part that enables this since it does NOWAIT allocations.
>>
>>
>> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
>> index 3136696f4991..c27613de80c5 100644
>> --- a/block/blk-mq-sched.c
>> +++ b/block/blk-mq-sched.c
>> @@ -134,7 +134,8 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
>>  			rq = __blk_mq_alloc_request(data, op);
>>  	} else {
>>  		rq = __blk_mq_alloc_request(data, op);
>> -		data->hctx->tags->rqs[rq->tag] = rq;
>> +		if (rq)
>> +			data->hctx->tags->rqs[rq->tag] = rq;
>>  	}
>>  
>>  	if (rq) {
> 
> Hello Jens,
> 
> With these two patches applied the scheduling-while-atomic complaint and
> the oops are gone. However, some tasks get stuck. Is the console output
> below enough to figure out what is going on or do you want me to bisect
> this? I don't think that any requests got stuck since no pending requests
> are shown in /sys/block/*/mq/*/{pending,*/rq_list}.

What device is stuck? Is it running with an mq scheduler attached, or
with "none"?

Would also be great to see the output of /sys/block/*/mq/*/tags and
sched_tags so we can see if they have anything pending.

>From a quick look at the below, it looks like a request leak. Bisection
would most likely be very helpful.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 04/18] block: simplify blk_init_allocated_queue
  2017-01-25 17:25 ` [PATCH 04/18] block: simplify blk_init_allocated_queue Christoph Hellwig
@ 2017-01-26 23:27     ` Bart Van Assche
  2017-01-26 23:27     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 23:27 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> Return an errno value instead of the passed in queue so that the callers
> don't have to keep track of two queues, and move the assignment of the
> request_fn and lock to the caller as passing them as argument doesn't
> simplify anything.  While we're at it also remove two pointless NULL
> assignments, given that the request structure is zeroed on allocation.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 04/18] block: simplify blk_init_allocated_queue
@ 2017-01-26 23:27     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 23:27 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> Return an errno value instead of the passed in queue so that the callers
> don't have to keep track of two queues, and move the assignment of the
> request_fn and lock to the caller as passing them as argument doesn't
> simplify anything.  While we're at it also remove two pointless NULL
> assignments, given that the request structure is zeroed on allocation.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 23:26                             ` [dm-devel] " Jens Axboe
@ 2017-01-26 23:47                               ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 23:47 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Thu, 2017-01-26 at 16:26 -0700, Jens Axboe wrote:
> What device is stuck? Is it running with an mq scheduler attached, or
> with "none"?
> 
> Would also be great to see the output of /sys/block/*/mq/*/tags and
> sched_tags so we can see if they have anything pending.
> 
> From a quick look at the below, it looks like a request leak. Bisection
> would most likely be very helpful.

Hello Jens,

This happens with and without scheduler attached. The most recent test I ran
was with the deadline scheduler configured as default scheduler for all blk-mq
devices (CONFIG_DEFAULT_SQ_IOSCHED="mq-deadline" and
CONFIG_DEFAULT_MQ_IOSCHED="mq-deadline"). The block devices that hang are
/dev/dm-0 and /dev/dm-1. The tags and sched_tags data is as follows:

# (cd /sys/class/block && grep -aH '' dm*/mq/*/tags)
dm-0/mq/0/tags:nr_tags=2048, reserved_tags=0, bits_per_word=64
dm-0/mq/0/tags:nr_free=1795, nr_reserved=0
dm-0/mq/0/tags:active_queues=0
dm-1/mq/0/tags:nr_tags=2048, reserved_tags=0, bits_per_word=64
dm-1/mq/0/tags:nr_free=2047, nr_reserved=0
dm-1/mq/0/tags:active_queues=0
# (cd /sys/class/block && grep -aH '' dm*/mq/*/sched_tags)
dm-0/mq/0/sched_tags:nr_tags=256, reserved_tags=0, bits_per_word=64
dm-0/mq/0/sched_tags:nr_free=0, nr_reserved=0
dm-0/mq/0/sched_tags:active_queues=0
dm-1/mq/0/sched_tags:nr_tags=256, reserved_tags=0, bits_per_word=64
dm-1/mq/0/sched_tags:nr_free=254, nr_reserved=0
dm-1/mq/0/sched_tags:active_queues=0

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-26 23:47                               ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-26 23:47 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 16:26 -0700, Jens Axboe wrote:
> What device is stuck? Is it running with an mq scheduler attached, or
> with "none"?
>=20
> Would also be great to see the output of /sys/block/*/mq/*/tags and
> sched_tags so we can see if they have anything pending.
>=20
> From a quick look at the below, it looks like a request leak. Bisection
> would most likely be very helpful.

Hello Jens,

This happens with and without scheduler attached. The most recent test I ra=
n
was with the deadline scheduler configured as default scheduler for all blk=
-mq
devices (CONFIG_DEFAULT_SQ_IOSCHED=3D"mq-deadline" and
CONFIG_DEFAULT_MQ_IOSCHED=3D"mq-deadline"). The block devices that hang are
/dev/dm-0 and /dev/dm-1. The tags and sched_tags data is as follows:

# (cd /sys/class/block && grep -aH '' dm*/mq/*/tags)
dm-0/mq/0/tags:nr_tags=3D2048, reserved_tags=3D0, bits_per_word=3D64
dm-0/mq/0/tags:nr_free=3D1795, nr_reserved=3D0
dm-0/mq/0/tags:active_queues=3D0
dm-1/mq/0/tags:nr_tags=3D2048, reserved_tags=3D0, bits_per_word=3D64
dm-1/mq/0/tags:nr_free=3D2047, nr_reserved=3D0
dm-1/mq/0/tags:active_queues=3D0
# (cd /sys/class/block && grep -aH '' dm*/mq/*/sched_tags)
dm-0/mq/0/sched_tags:nr_tags=3D256, reserved_tags=3D0, bits_per_word=3D64
dm-0/mq/0/sched_tags:nr_free=3D0, nr_reserved=3D0
dm-0/mq/0/sched_tags:active_queues=3D0
dm-1/mq/0/sched_tags:nr_tags=3D256, reserved_tags=3D0, bits_per_word=3D64
dm-1/mq/0/sched_tags:nr_free=3D254, nr_reserved=3D0
dm-1/mq/0/sched_tags:active_queues=3D0

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-26 23:47                               ` [dm-devel] " Bart Van Assche
  (?)
@ 2017-01-26 23:50                               ` Jens Axboe
  2017-01-27  0:33                                   ` [dm-devel] " Jens Axboe
  2017-01-27  0:38                                   ` Bart Van Assche
  -1 siblings, 2 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-26 23:50 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/26/2017 04:47 PM, Bart Van Assche wrote:
> On Thu, 2017-01-26 at 16:26 -0700, Jens Axboe wrote:
>> What device is stuck? Is it running with an mq scheduler attached, or
>> with "none"?
>>
>> Would also be great to see the output of /sys/block/*/mq/*/tags and
>> sched_tags so we can see if they have anything pending.
>>
>> From a quick look at the below, it looks like a request leak. Bisection
>> would most likely be very helpful.
> 
> Hello Jens,
> 
> This happens with and without scheduler attached. The most recent test I ran
> was with the deadline scheduler configured as default scheduler for all blk-mq
> devices (CONFIG_DEFAULT_SQ_IOSCHED="mq-deadline" and
> CONFIG_DEFAULT_MQ_IOSCHED="mq-deadline"). The block devices that hang are
> /dev/dm-0 and /dev/dm-1. The tags and sched_tags data is as follows:
> 
> # (cd /sys/class/block && grep -aH '' dm*/mq/*/tags)
> dm-0/mq/0/tags:nr_tags=2048, reserved_tags=0, bits_per_word=64
> dm-0/mq/0/tags:nr_free=1795, nr_reserved=0
> dm-0/mq/0/tags:active_queues=0
> dm-1/mq/0/tags:nr_tags=2048, reserved_tags=0, bits_per_word=64
> dm-1/mq/0/tags:nr_free=2047, nr_reserved=0
> dm-1/mq/0/tags:active_queues=0
> # (cd /sys/class/block && grep -aH '' dm*/mq/*/sched_tags)
> dm-0/mq/0/sched_tags:nr_tags=256, reserved_tags=0, bits_per_word=64
> dm-0/mq/0/sched_tags:nr_free=0, nr_reserved=0
> dm-0/mq/0/sched_tags:active_queues=0
> dm-1/mq/0/sched_tags:nr_tags=256, reserved_tags=0, bits_per_word=64
> dm-1/mq/0/sched_tags:nr_free=254, nr_reserved=0
> dm-1/mq/0/sched_tags:active_queues=0

Clearly we are missing some requests. How do I setup dm similarly to
you?

Does it reproduce without Christoph's patchset?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-26 23:50                               ` Jens Axboe
@ 2017-01-27  0:33                                   ` Jens Axboe
  2017-01-27  0:38                                   ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27  0:33 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On 01/26/2017 04:50 PM, Jens Axboe wrote:
> On 01/26/2017 04:47 PM, Bart Van Assche wrote:
>> On Thu, 2017-01-26 at 16:26 -0700, Jens Axboe wrote:
>>> What device is stuck? Is it running with an mq scheduler attached, or
>>> with "none"?
>>>
>>> Would also be great to see the output of /sys/block/*/mq/*/tags and
>>> sched_tags so we can see if they have anything pending.
>>>
>>> From a quick look at the below, it looks like a request leak. Bisection
>>> would most likely be very helpful.
>>
>> Hello Jens,
>>
>> This happens with and without scheduler attached. The most recent test I ran
>> was with the deadline scheduler configured as default scheduler for all blk-mq
>> devices (CONFIG_DEFAULT_SQ_IOSCHED="mq-deadline" and
>> CONFIG_DEFAULT_MQ_IOSCHED="mq-deadline"). The block devices that hang are
>> /dev/dm-0 and /dev/dm-1. The tags and sched_tags data is as follows:
>>
>> # (cd /sys/class/block && grep -aH '' dm*/mq/*/tags)
>> dm-0/mq/0/tags:nr_tags=2048, reserved_tags=0, bits_per_word=64
>> dm-0/mq/0/tags:nr_free=1795, nr_reserved=0
>> dm-0/mq/0/tags:active_queues=0
>> dm-1/mq/0/tags:nr_tags=2048, reserved_tags=0, bits_per_word=64
>> dm-1/mq/0/tags:nr_free=2047, nr_reserved=0
>> dm-1/mq/0/tags:active_queues=0
>> # (cd /sys/class/block && grep -aH '' dm*/mq/*/sched_tags)
>> dm-0/mq/0/sched_tags:nr_tags=256, reserved_tags=0, bits_per_word=64
>> dm-0/mq/0/sched_tags:nr_free=0, nr_reserved=0
>> dm-0/mq/0/sched_tags:active_queues=0
>> dm-1/mq/0/sched_tags:nr_tags=256, reserved_tags=0, bits_per_word=64
>> dm-1/mq/0/sched_tags:nr_free=254, nr_reserved=0
>> dm-1/mq/0/sched_tags:active_queues=0
> 
> Clearly we are missing some requests. How do I setup dm similarly to
> you?
> 
> Does it reproduce without Christoph's patchset?

I have dm-mpath running using blk_mq and with mq-deadline on both dm and
the lower level device, and it seems to be running just fine here.
Note, this is without Christoph's patchset, I'll try that next once
xfstest finishes.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-27  0:33                                   ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27  0:33 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/26/2017 04:50 PM, Jens Axboe wrote:
> On 01/26/2017 04:47 PM, Bart Van Assche wrote:
>> On Thu, 2017-01-26 at 16:26 -0700, Jens Axboe wrote:
>>> What device is stuck? Is it running with an mq scheduler attached, or
>>> with "none"?
>>>
>>> Would also be great to see the output of /sys/block/*/mq/*/tags and
>>> sched_tags so we can see if they have anything pending.
>>>
>>> From a quick look at the below, it looks like a request leak. Bisection
>>> would most likely be very helpful.
>>
>> Hello Jens,
>>
>> This happens with and without scheduler attached. The most recent test I ran
>> was with the deadline scheduler configured as default scheduler for all blk-mq
>> devices (CONFIG_DEFAULT_SQ_IOSCHED="mq-deadline" and
>> CONFIG_DEFAULT_MQ_IOSCHED="mq-deadline"). The block devices that hang are
>> /dev/dm-0 and /dev/dm-1. The tags and sched_tags data is as follows:
>>
>> # (cd /sys/class/block && grep -aH '' dm*/mq/*/tags)
>> dm-0/mq/0/tags:nr_tags=2048, reserved_tags=0, bits_per_word=64
>> dm-0/mq/0/tags:nr_free=1795, nr_reserved=0
>> dm-0/mq/0/tags:active_queues=0
>> dm-1/mq/0/tags:nr_tags=2048, reserved_tags=0, bits_per_word=64
>> dm-1/mq/0/tags:nr_free=2047, nr_reserved=0
>> dm-1/mq/0/tags:active_queues=0
>> # (cd /sys/class/block && grep -aH '' dm*/mq/*/sched_tags)
>> dm-0/mq/0/sched_tags:nr_tags=256, reserved_tags=0, bits_per_word=64
>> dm-0/mq/0/sched_tags:nr_free=0, nr_reserved=0
>> dm-0/mq/0/sched_tags:active_queues=0
>> dm-1/mq/0/sched_tags:nr_tags=256, reserved_tags=0, bits_per_word=64
>> dm-1/mq/0/sched_tags:nr_free=254, nr_reserved=0
>> dm-1/mq/0/sched_tags:active_queues=0
> 
> Clearly we are missing some requests. How do I setup dm similarly to
> you?
> 
> Does it reproduce without Christoph's patchset?

I have dm-mpath running using blk_mq and with mq-deadline on both dm and
the lower level device, and it seems to be running just fine here.
Note, this is without Christoph's patchset, I'll try that next once
xfstest finishes.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-26 23:50                               ` Jens Axboe
@ 2017-01-27  0:38                                   ` Bart Van Assche
  2017-01-27  0:38                                   ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27  0:38 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 16:50 -0700, Jens Axboe wrote:
> Clearly we are missing some requests. How do I setup dm similarly to
> you?
> 
> Does it reproduce without Christoph's patchset?

Hello Jens,

I see similar behavior with the blk-mq-sched branch of
git://git.kernel.dk/linux-block.git (git commit ID 0efe27068ecf):
booting happens much slower than usual and I/O hangs if I run the
srp-test software.

Regarding creating a similar dm setup: I hope that in the future it
will become possible to run the srp-test software without any special
hardware and with in-tree drivers. Today running the srp-test software
with in-tree drivers namely requires IB hardware. This is how to run the
srp-test software today with in-tree drivers:
* Find a system with at least two InfiniBand ports.
* Make sure that the appropriate IB driver in the kernel is enabled and
  also that LIO (CONFIG_TARGET_CORE=m and CONFIG_TCM_FILEIO=m), ib_srp,
  ib_srpt and dm-mpath are built as kernel modules.
* If none of the IB ports are connected to an IB switch, connect the
  two ports to each other and configure and start the opensm software
  such that the port states change from "Initializing" to "Active".
* Check with "ibstat | grep State: Active" that at least one port is
  in the active state.
* Configure multipathd as explained in
  https://github.com/bvanassche/srp-test/blob/master/README.md.
* Restart multipathd to make sure it picks up /etc/multipath.conf.
* Clone https://github.com/bvanassche/srp-test and start it as follows:
  srp-test/run_tests -t 02-mq

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-27  0:38                                   ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27  0:38 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 16:50 -0700, Jens Axboe wrote:
> Clearly we are missing some requests. How do I setup dm similarly to
> you?
>=20
> Does it reproduce without Christoph's patchset?

Hello Jens,

I see similar behavior with the blk-mq-sched branch of
git://git.kernel.dk/linux-block.git (git commit ID 0efe27068ecf):
booting happens much slower than usual and I/O hangs if I run the
srp-test software.

Regarding creating a similar dm setup: I hope that in the future it
will become possible to run the srp-test software without any special
hardware and with in-tree drivers. Today running the srp-test software
with in-tree drivers namely requires IB hardware. This is how to run the
srp-test software today with in-tree drivers:
* Find a system with at least two InfiniBand ports.
* Make sure that the appropriate IB driver in the kernel is enabled and
  also that LIO (CONFIG_TARGET_CORE=3Dm and CONFIG_TCM_FILEIO=3Dm), ib_srp,
  ib_srpt and dm-mpath are built as kernel modules.
* If none of the IB ports are connected to an IB switch, connect the
  two ports to each other and configure and start the opensm software
  such that the port states change from "Initializing" to "Active".
* Check with "ibstat | grep State: Active" that at least one port is
  in the active state.
* Configure multipathd as explained in
  https://github.com/bvanassche/srp-test/blob/master/README.md.
* Restart multipathd to make sure it picks up /etc/multipath.conf.
* Clone https://github.com/bvanassche/srp-test and start it as follows:
  srp-test/run_tests -t 02-mq

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-27  0:38                                   ` Bart Van Assche
  (?)
@ 2017-01-27  0:41                                   ` Jens Axboe
  2017-01-27  1:15                                       ` [dm-devel] " Bart Van Assche
  -1 siblings, 1 reply; 172+ messages in thread
From: Jens Axboe @ 2017-01-27  0:41 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/26/2017 05:38 PM, Bart Van Assche wrote:
> On Thu, 2017-01-26 at 16:50 -0700, Jens Axboe wrote:
>> Clearly we are missing some requests. How do I setup dm similarly to
>> you?
>>
>> Does it reproduce without Christoph's patchset?
> 
> Hello Jens,
> 
> I see similar behavior with the blk-mq-sched branch of
> git://git.kernel.dk/linux-block.git (git commit ID 0efe27068ecf):
> booting happens much slower than usual and I/O hangs if I run the
> srp-test software.

Please don't run that, run for-4.11/block and merge it to master.
Same behavior?

> Regarding creating a similar dm setup: I hope that in the future it
> will become possible to run the srp-test software without any special
> hardware and with in-tree drivers. Today running the srp-test software
> with in-tree drivers namely requires IB hardware. This is how to run the
> srp-test software today with in-tree drivers:
> * Find a system with at least two InfiniBand ports.
> * Make sure that the appropriate IB driver in the kernel is enabled and
>   also that LIO (CONFIG_TARGET_CORE=m and CONFIG_TCM_FILEIO=m), ib_srp,
>   ib_srpt and dm-mpath are built as kernel modules.
> * If none of the IB ports are connected to an IB switch, connect the
>   two ports to each other and configure and start the opensm software
>   such that the port states change from "Initializing" to "Active".
> * Check with "ibstat | grep State: Active" that at least one port is
>   in the active state.
> * Configure multipathd as explained in
>   https://github.com/bvanassche/srp-test/blob/master/README.md.
> * Restart multipathd to make sure it picks up /etc/multipath.conf.
> * Clone https://github.com/bvanassche/srp-test and start it as follows:
>   srp-test/run_tests -t 02-mq

I can't run that. Any chance of a test case that doesn't require IB?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27  0:41                                   ` Jens Axboe
@ 2017-01-27  1:15                                       ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27  1:15 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Thu, 2017-01-26 at 17:41 -0700, Jens Axboe wrote:
> On 01/26/2017 05:38 PM, Bart Van Assche wrote:
> > I see similar behavior with the blk-mq-sched branch of
> > git://git.kernel.dk/linux-block.git (git commit ID 0efe27068ecf):
> > booting happens much slower than usual and I/O hangs if I run the
> > srp-test software.
> 
> Please don't run that, run for-4.11/block and merge it to master.
> Same behavior?

I have not yet had the chance to run the srp-test software against that
kernel. But I already see that booting takes more than ten times longer
than usual. Note: as far as I know the dm-mpath driver is not involved
in the boot process of my test system.

> > Regarding creating a similar dm setup: I hope that in the future it
> > will become possible to run the srp-test software without any special
> > hardware and with in-tree drivers. Today running the srp-test software
> > with in-tree drivers namely requires IB hardware. This is how to run the
> > srp-test software today with in-tree drivers:
> > * Find a system with at least two InfiniBand ports.
> > * Make sure that the appropriate IB driver in the kernel is enabled and
> >   also that LIO (CONFIG_TARGET_CORE=m and CONFIG_TCM_FILEIO=m), ib_srp,
> >   ib_srpt and dm-mpath are built as kernel modules.
> > * If none of the IB ports are connected to an IB switch, connect the
> >   two ports to each other and configure and start the opensm software
> >   such that the port states change from "Initializing" to "Active".
> > * Check with "ibstat | grep State: Active" that at least one port is
> >   in the active state.
> > * Configure multipathd as explained in
> >   https://github.com/bvanassche/srp-test/blob/master/README.md.
> > * Restart multipathd to make sure it picks up /etc/multipath.conf.
> > * Clone https://github.com/bvanassche/srp-test and start it as follows:
> >   srp-test/run_tests -t 02-mq
> 
> I can't run that. Any chance of a test case that doesn't require IB?

It is possible to run that test on top of the SoftRoCE driver. I will first
check myself whether the latest version of the SoftRoCE driver is stable
enough to run srp-test on top of it (see also
https://github.com/dledford/linux/commits/k.o/for-4.11).

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-27  1:15                                       ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27  1:15 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 17:41 -0700, Jens Axboe wrote:
> On 01/26/2017 05:38 PM, Bart Van Assche wrote:
> > I see similar behavior with the blk-mq-sched branch of
> > git://git.kernel.dk/linux-block.git (git commit ID 0efe27068ecf):
> > booting happens much slower than usual and I/O hangs if I run the
> > srp-test software.
>=20
> Please don't run that, run for-4.11/block and merge it to master.
> Same behavior?

I have not yet had the chance to run the srp-test software against that
kernel. But I already see that booting takes more than ten times longer
than usual. Note: as far as I know the dm-mpath driver is not involved
in the boot process of my test system.

> > Regarding creating a similar dm setup: I hope that in the future it
> > will become possible to run the srp-test software without any special
> > hardware and with in-tree drivers. Today running the srp-test software
> > with in-tree drivers namely requires IB hardware. This is how to run th=
e
> > srp-test software today with in-tree drivers:
> > * Find a system with at least two InfiniBand ports.
> > * Make sure that the appropriate IB driver in the kernel is enabled and
> >   also that LIO (CONFIG_TARGET_CORE=3Dm and CONFIG_TCM_FILEIO=3Dm), ib_=
srp,
> >   ib_srpt and dm-mpath are built as kernel modules.
> > * If none of the IB ports are connected to an IB switch, connect the
> >   two ports to each other and configure and start the opensm software
> >   such that the port states change from "Initializing" to "Active".
> > * Check with "ibstat | grep State: Active" that at least one port is
> >   in the active state.
> > * Configure multipathd as explained in
> >   https://github.com/bvanassche/srp-test/blob/master/README.md.
> > * Restart multipathd to make sure it picks up /etc/multipath.conf.
> > * Clone https://github.com/bvanassche/srp-test and start it as follows:
> >   srp-test/run_tests -t 02-mq
>=20
> I can't run that. Any chance of a test case that doesn't require IB?

It is possible to run that test on top of the SoftRoCE driver. I will first
check myself whether the latest version of the SoftRoCE driver is stable
enough to run srp-test on top of it (see also
https://github.com/dledford/linux/commits/k.o/for-4.11).

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27  1:15                                       ` [dm-devel] " Bart Van Assche
@ 2017-01-27  1:22                                         ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27  1:22 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On 01/26/2017 06:15 PM, Bart Van Assche wrote:
> On Thu, 2017-01-26 at 17:41 -0700, Jens Axboe wrote:
>> On 01/26/2017 05:38 PM, Bart Van Assche wrote:
>>> I see similar behavior with the blk-mq-sched branch of
>>> git://git.kernel.dk/linux-block.git (git commit ID 0efe27068ecf):
>>> booting happens much slower than usual and I/O hangs if I run the
>>> srp-test software.
>>
>> Please don't run that, run for-4.11/block and merge it to master.
>> Same behavior?
> 
> I have not yet had the chance to run the srp-test software against that
> kernel. But I already see that booting takes more than ten times longer
> than usual. Note: as far as I know the dm-mpath driver is not involved
> in the boot process of my test system.

What's your boot device? I've been booting this on a variety of setups,
no problems observed. It's booting my laptop, and on SCSI and SATA as
well. What is your root drive? What is the queue depth of it?
Controller?

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-27  1:22                                         ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27  1:22 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/26/2017 06:15 PM, Bart Van Assche wrote:
> On Thu, 2017-01-26 at 17:41 -0700, Jens Axboe wrote:
>> On 01/26/2017 05:38 PM, Bart Van Assche wrote:
>>> I see similar behavior with the blk-mq-sched branch of
>>> git://git.kernel.dk/linux-block.git (git commit ID 0efe27068ecf):
>>> booting happens much slower than usual and I/O hangs if I run the
>>> srp-test software.
>>
>> Please don't run that, run for-4.11/block and merge it to master.
>> Same behavior?
> 
> I have not yet had the chance to run the srp-test software against that
> kernel. But I already see that booting takes more than ten times longer
> than usual. Note: as far as I know the dm-mpath driver is not involved
> in the boot process of my test system.

What's your boot device? I've been booting this on a variety of setups,
no problems observed. It's booting my laptop, and on SCSI and SATA as
well. What is your root drive? What is the queue depth of it?
Controller?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-27  1:22                                         ` [dm-devel] " Jens Axboe
  (?)
@ 2017-01-27  6:40                                         ` Jens Axboe
  2017-01-27  8:04                                             ` [dm-devel] " Jens Axboe
  -1 siblings, 1 reply; 172+ messages in thread
From: Jens Axboe @ 2017-01-27  6:40 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/26/2017 06:22 PM, Jens Axboe wrote:
> On 01/26/2017 06:15 PM, Bart Van Assche wrote:
>> On Thu, 2017-01-26 at 17:41 -0700, Jens Axboe wrote:
>>> On 01/26/2017 05:38 PM, Bart Van Assche wrote:
>>>> I see similar behavior with the blk-mq-sched branch of
>>>> git://git.kernel.dk/linux-block.git (git commit ID 0efe27068ecf):
>>>> booting happens much slower than usual and I/O hangs if I run the
>>>> srp-test software.
>>>
>>> Please don't run that, run for-4.11/block and merge it to master.
>>> Same behavior?
>>
>> I have not yet had the chance to run the srp-test software against that
>> kernel. But I already see that booting takes more than ten times longer
>> than usual. Note: as far as I know the dm-mpath driver is not involved
>> in the boot process of my test system.
> 
> What's your boot device? I've been booting this on a variety of setups,
> no problems observed. It's booting my laptop, and on SCSI and SATA as
> well. What is your root drive? What is the queue depth of it?
> Controller?

Are you using dm for your root device?

I think I see what is going on. The scheduler framework put the
insertion of flushes on the side, whereas it's integrated "nicely"
on the legacy side.

Can you try with this applied? This is on top of the previous two that
we already went through. Or, you can just pull:

git://git.kernel.dk/linux-block for-4.11/next

which is for-4.11/block with the next set of fixes on top that I haven't
pulled in yet.


commit 995447bfd14dd871e0c8771261ed7d1f2b5b4c86
Author: Jens Axboe <axboe@fb.com>
Date:   Thu Jan 26 23:34:56 2017 -0700

    blk-mq-sched: integrate flush insertion into blk_mq_sched_get_request()
    
    Instead of letting the caller check this and handle the details
    of inserting a flush request, put the logic in the scheduler
    insertion function.
    
    Outside of cleaning up the code, this handles the case where
    outside callers insert a flush, like through
    blk_insert_cloned_request().
    
    Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index a61f1407f4f6..78daf5b6d7cb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2129,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 	if (q->mq_ops) {
 		if (blk_queue_io_stat(q))
 			blk_account_io_start(rq, true);
-		blk_mq_sched_insert_request(rq, false, true, false);
+		blk_mq_sched_insert_request(rq, false, true, false, false);
 		return 0;
 	}
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 86656fdfa637..ed1f10165268 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -66,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	 * be reused after dying flag is set
 	 */
 	if (q->mq_ops) {
-		blk_mq_sched_insert_request(rq, at_head, true, false);
+		blk_mq_sched_insert_request(rq, at_head, true, false, false);
 		return;
 	}
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index d7de34ee39c2..4427896641ac 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -456,7 +456,7 @@ void blk_insert_flush(struct request *rq)
 	if ((policy & REQ_FSEQ_DATA) &&
 	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
 		if (q->mq_ops)
-			blk_mq_sched_insert_request(rq, false, true, false);
+			blk_mq_sched_insert_request(rq, false, true, false, false);
 		else
 			list_add_tail(&rq->queuelist, &q->queue_head);
 		return;
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c27613de80c5..fa2ff0f458fa 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -336,6 +336,64 @@ void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
 	}
 }
 
+/*
+ * Add flush/fua to the queue. If we fail getting a driver tag, then
+ * punt to the requeue list. Requeue will re-invoke us from a context
+ * that's safe to block from.
+ */
+static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
+				      struct request *rq, bool can_block)
+{
+	if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
+		blk_insert_flush(rq);
+		blk_mq_run_hw_queue(hctx, !can_block);
+	} else
+		blk_mq_add_to_requeue_list(rq, true, true);
+}
+
+void blk_mq_sched_insert_request(struct request *rq, bool at_head,
+				 bool run_queue, bool async, bool can_block)
+{
+	struct request_queue *q = rq->q;
+	struct elevator_queue *e = q->elevator;
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+
+	if (rq->tag == -1 && (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))) {
+		blk_mq_sched_insert_flush(hctx, rq, can_block);
+		return;
+	}
+
+	if (e && e->type->ops.mq.insert_requests) {
+		LIST_HEAD(list);
+
+		list_add(&rq->queuelist, &list);
+		e->type->ops.mq.insert_requests(hctx, &list, at_head);
+	} else {
+		spin_lock(&ctx->lock);
+		__blk_mq_insert_request(hctx, rq, at_head);
+		spin_unlock(&ctx->lock);
+	}
+
+	if (run_queue)
+		blk_mq_run_hw_queue(hctx, async);
+}
+
+void blk_mq_sched_insert_requests(struct request_queue *q,
+				  struct blk_mq_ctx *ctx,
+				  struct list_head *list, bool run_queue_async)
+{
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+	struct elevator_queue *e = hctx->queue->elevator;
+
+	if (e && e->type->ops.mq.insert_requests)
+		e->type->ops.mq.insert_requests(hctx, list, false);
+	else
+		blk_mq_insert_requests(hctx, ctx, list);
+
+	blk_mq_run_hw_queue(hctx, run_queue_async);
+}
+
 static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
 				   struct blk_mq_hw_ctx *hctx,
 				   unsigned int hctx_idx)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index becbc7840364..9478aaeb48c5 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -21,6 +21,12 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
 void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
 
+void blk_mq_sched_insert_request(struct request *rq, bool at_head,
+				 bool run_queue, bool async, bool can_block);
+void blk_mq_sched_insert_requests(struct request_queue *q,
+				  struct blk_mq_ctx *ctx,
+				  struct list_head *list, bool run_queue_async);
+
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
 void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
 			struct list_head *rq_list,
@@ -62,45 +68,6 @@ static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
 		e->type->ops.mq.put_rq_priv(q, rq);
 }
 
-static inline void
-blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue,
-			    bool async)
-{
-	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
-	struct blk_mq_ctx *ctx = rq->mq_ctx;
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-
-	if (e && e->type->ops.mq.insert_requests) {
-		LIST_HEAD(list);
-
-		list_add(&rq->queuelist, &list);
-		e->type->ops.mq.insert_requests(hctx, &list, at_head);
-	} else {
-		spin_lock(&ctx->lock);
-		__blk_mq_insert_request(hctx, rq, at_head);
-		spin_unlock(&ctx->lock);
-	}
-
-	if (run_queue)
-		blk_mq_run_hw_queue(hctx, async);
-}
-
-static inline void
-blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_ctx *ctx,
-			     struct list_head *list, bool run_queue_async)
-{
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-	struct elevator_queue *e = hctx->queue->elevator;
-
-	if (e && e->type->ops.mq.insert_requests)
-		e->type->ops.mq.insert_requests(hctx, list, false);
-	else
-		blk_mq_insert_requests(hctx, ctx, list);
-
-	blk_mq_run_hw_queue(hctx, run_queue_async);
-}
-
 static inline bool
 blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 			 struct bio *bio)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 1b156ca79af6..78bbacd129c9 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -106,6 +106,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 	struct sbq_wait_state *ws;
 	DEFINE_WAIT(wait);
 	unsigned int tag_offset;
+	bool drop_ctx;
 	int tag;
 
 	if (data->flags & BLK_MQ_REQ_RESERVED) {
@@ -128,6 +129,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		return BLK_MQ_TAG_FAIL;
 
 	ws = bt_wait_ptr(bt, data->hctx);
+	drop_ctx = data->ctx == NULL;
 	do {
 		prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
 
@@ -150,7 +152,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		if (tag != -1)
 			break;
 
-		blk_mq_put_ctx(data->ctx);
+		if (data->ctx)
+			blk_mq_put_ctx(data->ctx);
 
 		io_schedule();
 
@@ -166,6 +169,9 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		ws = bt_wait_ptr(bt, data->hctx);
 	} while (1);
 
+	if (drop_ctx && data->ctx)
+		blk_mq_put_ctx(data->ctx);
+
 	finish_wait(&ws->wait, &wait);
 
 found_tag:
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4df397910251..9046f7802de3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -568,13 +568,13 @@ static void blk_mq_requeue_work(struct work_struct *work)
 
 		rq->rq_flags &= ~RQF_SOFTBARRIER;
 		list_del_init(&rq->queuelist);
-		blk_mq_sched_insert_request(rq, true, false, false);
+		blk_mq_sched_insert_request(rq, true, false, false, true);
 	}
 
 	while (!list_empty(&rq_list)) {
 		rq = list_entry(rq_list.next, struct request, queuelist);
 		list_del_init(&rq->queuelist);
-		blk_mq_sched_insert_request(rq, false, false, false);
+		blk_mq_sched_insert_request(rq, false, false, false, true);
 	}
 
 	blk_mq_run_hw_queues(q, false);
@@ -847,12 +847,11 @@ static inline unsigned int queued_to_index(unsigned int queued)
 	return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
 }
 
-static bool blk_mq_get_driver_tag(struct request *rq,
-				  struct blk_mq_hw_ctx **hctx, bool wait)
+bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
+			   bool wait)
 {
 	struct blk_mq_alloc_data data = {
 		.q = rq->q,
-		.ctx = rq->mq_ctx,
 		.hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu),
 		.flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
 	};
@@ -1395,7 +1394,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
 	}
 
 insert:
-	blk_mq_sched_insert_request(rq, false, true, true);
+	blk_mq_sched_insert_request(rq, false, true, true, false);
 }
 
 /*
@@ -1445,12 +1444,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	cookie = request_to_qc_t(data.hctx, rq);
 
-	if (unlikely(is_flush_fua)) {
-		blk_mq_bio_to_request(rq, bio);
-		blk_mq_get_driver_tag(rq, NULL, true);
-		blk_insert_flush(rq);
-		goto run_queue;
-	}
+	if (unlikely(is_flush_fua))
+		goto insert;
 
 	plug = current->plug;
 	/*
@@ -1499,10 +1494,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	}
 
 	if (q->elevator) {
+insert:
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_sched_insert_request(rq, false, true,
-						!is_sync || is_flush_fua);
+						!is_sync || is_flush_fua, true);
 		goto done;
 	}
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1512,7 +1508,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		 * latter allows for merging opportunities and more efficient
 		 * dispatching.
 		 */
-run_queue:
 		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
 	}
 	blk_mq_put_ctx(data.ctx);
@@ -1567,12 +1562,8 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 
 	cookie = request_to_qc_t(data.hctx, rq);
 
-	if (unlikely(is_flush_fua)) {
-		blk_mq_bio_to_request(rq, bio);
-		blk_mq_get_driver_tag(rq, NULL, true);
-		blk_insert_flush(rq);
-		goto run_queue;
-	}
+	if (unlikely(is_flush_fua))
+		goto insert;
 
 	/*
 	 * A task plug currently exists. Since this is completely lockless,
@@ -1609,10 +1600,11 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	}
 
 	if (q->elevator) {
+insert:
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_sched_insert_request(rq, false, true,
-						!is_sync || is_flush_fua);
+						!is_sync || is_flush_fua, true);
 		goto done;
 	}
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1622,7 +1614,6 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 		 * latter allows for merging opportunities and more efficient
 		 * dispatching.
 		 */
-run_queue:
 		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
 	}
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d19b0e75a129..d34929968071 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -34,6 +34,8 @@ void blk_mq_wake_waiters(struct request_queue *q);
 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
 bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
+bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
+				bool wait);
 
 /*
  * Internal helpers for allocating/freeing the request map

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27  6:40                                         ` Jens Axboe
@ 2017-01-27  8:04                                             ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27  8:04 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On 01/26/2017 11:40 PM, Jens Axboe wrote:
> On 01/26/2017 06:22 PM, Jens Axboe wrote:
>> On 01/26/2017 06:15 PM, Bart Van Assche wrote:
>>> On Thu, 2017-01-26 at 17:41 -0700, Jens Axboe wrote:
>>>> On 01/26/2017 05:38 PM, Bart Van Assche wrote:
>>>>> I see similar behavior with the blk-mq-sched branch of
>>>>> git://git.kernel.dk/linux-block.git (git commit ID 0efe27068ecf):
>>>>> booting happens much slower than usual and I/O hangs if I run the
>>>>> srp-test software.
>>>>
>>>> Please don't run that, run for-4.11/block and merge it to master.
>>>> Same behavior?
>>>
>>> I have not yet had the chance to run the srp-test software against that
>>> kernel. But I already see that booting takes more than ten times longer
>>> than usual. Note: as far as I know the dm-mpath driver is not involved
>>> in the boot process of my test system.
>>
>> What's your boot device? I've been booting this on a variety of setups,
>> no problems observed. It's booting my laptop, and on SCSI and SATA as
>> well. What is your root drive? What is the queue depth of it?
>> Controller?
> 
> Are you using dm for your root device?
> 
> I think I see what is going on. The scheduler framework put the
> insertion of flushes on the side, whereas it's integrated "nicely"
> on the legacy side.
> 
> Can you try with this applied? This is on top of the previous two that
> we already went through. Or, you can just pull:
> 
> git://git.kernel.dk/linux-block for-4.11/next
> 
> which is for-4.11/block with the next set of fixes on top that I haven't
> pulled in yet.

The previous patch had a bug if you didn't use a scheduler, here's a
version that should work fine in both cases. I've also updated the
above mentioned branch, so feel free to pull that as well and merge to
master like before.

commit 2f54ba92a274a7c1a5ceb34a56565f84f7b994b7
Author: Jens Axboe <axboe@fb.com>
Date:   Fri Jan 27 01:00:47 2017 -0700

    blk-mq-sched: add flush insertion into blk_mq_sched_insert_request()
    
    Instead of letting the caller check this and handle the details
    of inserting a flush request, put the logic in the scheduler
    insertion function. This fixes direct flush insertion outside
    of the usual make_request_fn calls, like from dm via
    blk_insert_cloned_request().
    
    Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index a61f1407f4f6..78daf5b6d7cb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2129,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 	if (q->mq_ops) {
 		if (blk_queue_io_stat(q))
 			blk_account_io_start(rq, true);
-		blk_mq_sched_insert_request(rq, false, true, false);
+		blk_mq_sched_insert_request(rq, false, true, false, false);
 		return 0;
 	}
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 86656fdfa637..ed1f10165268 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -66,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	 * be reused after dying flag is set
 	 */
 	if (q->mq_ops) {
-		blk_mq_sched_insert_request(rq, at_head, true, false);
+		blk_mq_sched_insert_request(rq, at_head, true, false, false);
 		return;
 	}
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index d7de34ee39c2..4427896641ac 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -456,7 +456,7 @@ void blk_insert_flush(struct request *rq)
 	if ((policy & REQ_FSEQ_DATA) &&
 	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
 		if (q->mq_ops)
-			blk_mq_sched_insert_request(rq, false, true, false);
+			blk_mq_sched_insert_request(rq, false, true, false, false);
 		else
 			list_add_tail(&rq->queuelist, &q->queue_head);
 		return;
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c27613de80c5..5e91743e193a 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -336,6 +336,64 @@ void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
 	}
 }
 
+/*
+ * Add flush/fua to the queue. If we fail getting a driver tag, then
+ * punt to the requeue list. Requeue will re-invoke us from a context
+ * that's safe to block from.
+ */
+static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
+				      struct request *rq, bool can_block)
+{
+	if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
+		blk_insert_flush(rq);
+		blk_mq_run_hw_queue(hctx, true);
+	} else
+		blk_mq_add_to_requeue_list(rq, true, true);
+}
+
+void blk_mq_sched_insert_request(struct request *rq, bool at_head,
+				 bool run_queue, bool async, bool can_block)
+{
+	struct request_queue *q = rq->q;
+	struct elevator_queue *e = q->elevator;
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+
+	if (rq->tag == -1 && (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))) {
+		blk_mq_sched_insert_flush(hctx, rq, can_block);
+		return;
+	}
+
+	if (e && e->type->ops.mq.insert_requests) {
+		LIST_HEAD(list);
+
+		list_add(&rq->queuelist, &list);
+		e->type->ops.mq.insert_requests(hctx, &list, at_head);
+	} else {
+		spin_lock(&ctx->lock);
+		__blk_mq_insert_request(hctx, rq, at_head);
+		spin_unlock(&ctx->lock);
+	}
+
+	if (run_queue)
+		blk_mq_run_hw_queue(hctx, async);
+}
+
+void blk_mq_sched_insert_requests(struct request_queue *q,
+				  struct blk_mq_ctx *ctx,
+				  struct list_head *list, bool run_queue_async)
+{
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+	struct elevator_queue *e = hctx->queue->elevator;
+
+	if (e && e->type->ops.mq.insert_requests)
+		e->type->ops.mq.insert_requests(hctx, list, false);
+	else
+		blk_mq_insert_requests(hctx, ctx, list);
+
+	blk_mq_run_hw_queue(hctx, run_queue_async);
+}
+
 static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
 				   struct blk_mq_hw_ctx *hctx,
 				   unsigned int hctx_idx)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index becbc7840364..9478aaeb48c5 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -21,6 +21,12 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
 void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
 
+void blk_mq_sched_insert_request(struct request *rq, bool at_head,
+				 bool run_queue, bool async, bool can_block);
+void blk_mq_sched_insert_requests(struct request_queue *q,
+				  struct blk_mq_ctx *ctx,
+				  struct list_head *list, bool run_queue_async);
+
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
 void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
 			struct list_head *rq_list,
@@ -62,45 +68,6 @@ static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
 		e->type->ops.mq.put_rq_priv(q, rq);
 }
 
-static inline void
-blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue,
-			    bool async)
-{
-	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
-	struct blk_mq_ctx *ctx = rq->mq_ctx;
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-
-	if (e && e->type->ops.mq.insert_requests) {
-		LIST_HEAD(list);
-
-		list_add(&rq->queuelist, &list);
-		e->type->ops.mq.insert_requests(hctx, &list, at_head);
-	} else {
-		spin_lock(&ctx->lock);
-		__blk_mq_insert_request(hctx, rq, at_head);
-		spin_unlock(&ctx->lock);
-	}
-
-	if (run_queue)
-		blk_mq_run_hw_queue(hctx, async);
-}
-
-static inline void
-blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_ctx *ctx,
-			     struct list_head *list, bool run_queue_async)
-{
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-	struct elevator_queue *e = hctx->queue->elevator;
-
-	if (e && e->type->ops.mq.insert_requests)
-		e->type->ops.mq.insert_requests(hctx, list, false);
-	else
-		blk_mq_insert_requests(hctx, ctx, list);
-
-	blk_mq_run_hw_queue(hctx, run_queue_async);
-}
-
 static inline bool
 blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 			 struct bio *bio)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 1b156ca79af6..78bbacd129c9 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -106,6 +106,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 	struct sbq_wait_state *ws;
 	DEFINE_WAIT(wait);
 	unsigned int tag_offset;
+	bool drop_ctx;
 	int tag;
 
 	if (data->flags & BLK_MQ_REQ_RESERVED) {
@@ -128,6 +129,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		return BLK_MQ_TAG_FAIL;
 
 	ws = bt_wait_ptr(bt, data->hctx);
+	drop_ctx = data->ctx == NULL;
 	do {
 		prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
 
@@ -150,7 +152,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		if (tag != -1)
 			break;
 
-		blk_mq_put_ctx(data->ctx);
+		if (data->ctx)
+			blk_mq_put_ctx(data->ctx);
 
 		io_schedule();
 
@@ -166,6 +169,9 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		ws = bt_wait_ptr(bt, data->hctx);
 	} while (1);
 
+	if (drop_ctx && data->ctx)
+		blk_mq_put_ctx(data->ctx);
+
 	finish_wait(&ws->wait, &wait);
 
 found_tag:
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4df397910251..888868b62018 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -568,13 +568,13 @@ static void blk_mq_requeue_work(struct work_struct *work)
 
 		rq->rq_flags &= ~RQF_SOFTBARRIER;
 		list_del_init(&rq->queuelist);
-		blk_mq_sched_insert_request(rq, true, false, false);
+		blk_mq_sched_insert_request(rq, true, false, false, true);
 	}
 
 	while (!list_empty(&rq_list)) {
 		rq = list_entry(rq_list.next, struct request, queuelist);
 		list_del_init(&rq->queuelist);
-		blk_mq_sched_insert_request(rq, false, false, false);
+		blk_mq_sched_insert_request(rq, false, false, false, true);
 	}
 
 	blk_mq_run_hw_queues(q, false);
@@ -847,12 +847,11 @@ static inline unsigned int queued_to_index(unsigned int queued)
 	return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
 }
 
-static bool blk_mq_get_driver_tag(struct request *rq,
-				  struct blk_mq_hw_ctx **hctx, bool wait)
+bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
+			   bool wait)
 {
 	struct blk_mq_alloc_data data = {
 		.q = rq->q,
-		.ctx = rq->mq_ctx,
 		.hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu),
 		.flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
 	};
@@ -1395,7 +1394,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
 	}
 
 insert:
-	blk_mq_sched_insert_request(rq, false, true, true);
+	blk_mq_sched_insert_request(rq, false, true, true, false);
 }
 
 /*
@@ -1446,10 +1445,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	cookie = request_to_qc_t(data.hctx, rq);
 
 	if (unlikely(is_flush_fua)) {
+		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_get_driver_tag(rq, NULL, true);
 		blk_insert_flush(rq);
-		goto run_queue;
+		blk_mq_run_hw_queue(data.hctx, true);
+		goto done;
 	}
 
 	plug = current->plug;
@@ -1502,7 +1503,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_sched_insert_request(rq, false, true,
-						!is_sync || is_flush_fua);
+						!is_sync || is_flush_fua, true);
 		goto done;
 	}
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1512,7 +1513,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		 * latter allows for merging opportunities and more efficient
 		 * dispatching.
 		 */
-run_queue:
 		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
 	}
 	blk_mq_put_ctx(data.ctx);
@@ -1568,10 +1568,12 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	cookie = request_to_qc_t(data.hctx, rq);
 
 	if (unlikely(is_flush_fua)) {
+		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_get_driver_tag(rq, NULL, true);
 		blk_insert_flush(rq);
-		goto run_queue;
+		blk_mq_run_hw_queue(data.hctx, true);
+		goto done;
 	}
 
 	/*
@@ -1612,7 +1614,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_sched_insert_request(rq, false, true,
-						!is_sync || is_flush_fua);
+						!is_sync || is_flush_fua, true);
 		goto done;
 	}
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1622,7 +1624,6 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 		 * latter allows for merging opportunities and more efficient
 		 * dispatching.
 		 */
-run_queue:
 		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
 	}
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d19b0e75a129..d34929968071 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -34,6 +34,8 @@ void blk_mq_wake_waiters(struct request_queue *q);
 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
 bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
+bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
+				bool wait);
 
 /*
  * Internal helpers for allocating/freeing the request map

-- 
Jens Axboe

^ permalink raw reply related	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-27  8:04                                             ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27  8:04 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/26/2017 11:40 PM, Jens Axboe wrote:
> On 01/26/2017 06:22 PM, Jens Axboe wrote:
>> On 01/26/2017 06:15 PM, Bart Van Assche wrote:
>>> On Thu, 2017-01-26 at 17:41 -0700, Jens Axboe wrote:
>>>> On 01/26/2017 05:38 PM, Bart Van Assche wrote:
>>>>> I see similar behavior with the blk-mq-sched branch of
>>>>> git://git.kernel.dk/linux-block.git (git commit ID 0efe27068ecf):
>>>>> booting happens much slower than usual and I/O hangs if I run the
>>>>> srp-test software.
>>>>
>>>> Please don't run that, run for-4.11/block and merge it to master.
>>>> Same behavior?
>>>
>>> I have not yet had the chance to run the srp-test software against that
>>> kernel. But I already see that booting takes more than ten times longer
>>> than usual. Note: as far as I know the dm-mpath driver is not involved
>>> in the boot process of my test system.
>>
>> What's your boot device? I've been booting this on a variety of setups,
>> no problems observed. It's booting my laptop, and on SCSI and SATA as
>> well. What is your root drive? What is the queue depth of it?
>> Controller?
> 
> Are you using dm for your root device?
> 
> I think I see what is going on. The scheduler framework put the
> insertion of flushes on the side, whereas it's integrated "nicely"
> on the legacy side.
> 
> Can you try with this applied? This is on top of the previous two that
> we already went through. Or, you can just pull:
> 
> git://git.kernel.dk/linux-block for-4.11/next
> 
> which is for-4.11/block with the next set of fixes on top that I haven't
> pulled in yet.

The previous patch had a bug if you didn't use a scheduler, here's a
version that should work fine in both cases. I've also updated the
above mentioned branch, so feel free to pull that as well and merge to
master like before.

commit 2f54ba92a274a7c1a5ceb34a56565f84f7b994b7
Author: Jens Axboe <axboe@fb.com>
Date:   Fri Jan 27 01:00:47 2017 -0700

    blk-mq-sched: add flush insertion into blk_mq_sched_insert_request()
    
    Instead of letting the caller check this and handle the details
    of inserting a flush request, put the logic in the scheduler
    insertion function. This fixes direct flush insertion outside
    of the usual make_request_fn calls, like from dm via
    blk_insert_cloned_request().
    
    Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index a61f1407f4f6..78daf5b6d7cb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2129,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 	if (q->mq_ops) {
 		if (blk_queue_io_stat(q))
 			blk_account_io_start(rq, true);
-		blk_mq_sched_insert_request(rq, false, true, false);
+		blk_mq_sched_insert_request(rq, false, true, false, false);
 		return 0;
 	}
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 86656fdfa637..ed1f10165268 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -66,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	 * be reused after dying flag is set
 	 */
 	if (q->mq_ops) {
-		blk_mq_sched_insert_request(rq, at_head, true, false);
+		blk_mq_sched_insert_request(rq, at_head, true, false, false);
 		return;
 	}
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index d7de34ee39c2..4427896641ac 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -456,7 +456,7 @@ void blk_insert_flush(struct request *rq)
 	if ((policy & REQ_FSEQ_DATA) &&
 	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
 		if (q->mq_ops)
-			blk_mq_sched_insert_request(rq, false, true, false);
+			blk_mq_sched_insert_request(rq, false, true, false, false);
 		else
 			list_add_tail(&rq->queuelist, &q->queue_head);
 		return;
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c27613de80c5..5e91743e193a 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -336,6 +336,64 @@ void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
 	}
 }
 
+/*
+ * Add flush/fua to the queue. If we fail getting a driver tag, then
+ * punt to the requeue list. Requeue will re-invoke us from a context
+ * that's safe to block from.
+ */
+static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
+				      struct request *rq, bool can_block)
+{
+	if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
+		blk_insert_flush(rq);
+		blk_mq_run_hw_queue(hctx, true);
+	} else
+		blk_mq_add_to_requeue_list(rq, true, true);
+}
+
+void blk_mq_sched_insert_request(struct request *rq, bool at_head,
+				 bool run_queue, bool async, bool can_block)
+{
+	struct request_queue *q = rq->q;
+	struct elevator_queue *e = q->elevator;
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+
+	if (rq->tag == -1 && (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))) {
+		blk_mq_sched_insert_flush(hctx, rq, can_block);
+		return;
+	}
+
+	if (e && e->type->ops.mq.insert_requests) {
+		LIST_HEAD(list);
+
+		list_add(&rq->queuelist, &list);
+		e->type->ops.mq.insert_requests(hctx, &list, at_head);
+	} else {
+		spin_lock(&ctx->lock);
+		__blk_mq_insert_request(hctx, rq, at_head);
+		spin_unlock(&ctx->lock);
+	}
+
+	if (run_queue)
+		blk_mq_run_hw_queue(hctx, async);
+}
+
+void blk_mq_sched_insert_requests(struct request_queue *q,
+				  struct blk_mq_ctx *ctx,
+				  struct list_head *list, bool run_queue_async)
+{
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+	struct elevator_queue *e = hctx->queue->elevator;
+
+	if (e && e->type->ops.mq.insert_requests)
+		e->type->ops.mq.insert_requests(hctx, list, false);
+	else
+		blk_mq_insert_requests(hctx, ctx, list);
+
+	blk_mq_run_hw_queue(hctx, run_queue_async);
+}
+
 static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
 				   struct blk_mq_hw_ctx *hctx,
 				   unsigned int hctx_idx)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index becbc7840364..9478aaeb48c5 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -21,6 +21,12 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
 void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
 
+void blk_mq_sched_insert_request(struct request *rq, bool at_head,
+				 bool run_queue, bool async, bool can_block);
+void blk_mq_sched_insert_requests(struct request_queue *q,
+				  struct blk_mq_ctx *ctx,
+				  struct list_head *list, bool run_queue_async);
+
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
 void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
 			struct list_head *rq_list,
@@ -62,45 +68,6 @@ static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
 		e->type->ops.mq.put_rq_priv(q, rq);
 }
 
-static inline void
-blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue,
-			    bool async)
-{
-	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
-	struct blk_mq_ctx *ctx = rq->mq_ctx;
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-
-	if (e && e->type->ops.mq.insert_requests) {
-		LIST_HEAD(list);
-
-		list_add(&rq->queuelist, &list);
-		e->type->ops.mq.insert_requests(hctx, &list, at_head);
-	} else {
-		spin_lock(&ctx->lock);
-		__blk_mq_insert_request(hctx, rq, at_head);
-		spin_unlock(&ctx->lock);
-	}
-
-	if (run_queue)
-		blk_mq_run_hw_queue(hctx, async);
-}
-
-static inline void
-blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_ctx *ctx,
-			     struct list_head *list, bool run_queue_async)
-{
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-	struct elevator_queue *e = hctx->queue->elevator;
-
-	if (e && e->type->ops.mq.insert_requests)
-		e->type->ops.mq.insert_requests(hctx, list, false);
-	else
-		blk_mq_insert_requests(hctx, ctx, list);
-
-	blk_mq_run_hw_queue(hctx, run_queue_async);
-}
-
 static inline bool
 blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 			 struct bio *bio)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 1b156ca79af6..78bbacd129c9 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -106,6 +106,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 	struct sbq_wait_state *ws;
 	DEFINE_WAIT(wait);
 	unsigned int tag_offset;
+	bool drop_ctx;
 	int tag;
 
 	if (data->flags & BLK_MQ_REQ_RESERVED) {
@@ -128,6 +129,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		return BLK_MQ_TAG_FAIL;
 
 	ws = bt_wait_ptr(bt, data->hctx);
+	drop_ctx = data->ctx == NULL;
 	do {
 		prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
 
@@ -150,7 +152,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		if (tag != -1)
 			break;
 
-		blk_mq_put_ctx(data->ctx);
+		if (data->ctx)
+			blk_mq_put_ctx(data->ctx);
 
 		io_schedule();
 
@@ -166,6 +169,9 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 		ws = bt_wait_ptr(bt, data->hctx);
 	} while (1);
 
+	if (drop_ctx && data->ctx)
+		blk_mq_put_ctx(data->ctx);
+
 	finish_wait(&ws->wait, &wait);
 
 found_tag:
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4df397910251..888868b62018 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -568,13 +568,13 @@ static void blk_mq_requeue_work(struct work_struct *work)
 
 		rq->rq_flags &= ~RQF_SOFTBARRIER;
 		list_del_init(&rq->queuelist);
-		blk_mq_sched_insert_request(rq, true, false, false);
+		blk_mq_sched_insert_request(rq, true, false, false, true);
 	}
 
 	while (!list_empty(&rq_list)) {
 		rq = list_entry(rq_list.next, struct request, queuelist);
 		list_del_init(&rq->queuelist);
-		blk_mq_sched_insert_request(rq, false, false, false);
+		blk_mq_sched_insert_request(rq, false, false, false, true);
 	}
 
 	blk_mq_run_hw_queues(q, false);
@@ -847,12 +847,11 @@ static inline unsigned int queued_to_index(unsigned int queued)
 	return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
 }
 
-static bool blk_mq_get_driver_tag(struct request *rq,
-				  struct blk_mq_hw_ctx **hctx, bool wait)
+bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
+			   bool wait)
 {
 	struct blk_mq_alloc_data data = {
 		.q = rq->q,
-		.ctx = rq->mq_ctx,
 		.hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu),
 		.flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
 	};
@@ -1395,7 +1394,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
 	}
 
 insert:
-	blk_mq_sched_insert_request(rq, false, true, true);
+	blk_mq_sched_insert_request(rq, false, true, true, false);
 }
 
 /*
@@ -1446,10 +1445,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	cookie = request_to_qc_t(data.hctx, rq);
 
 	if (unlikely(is_flush_fua)) {
+		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_get_driver_tag(rq, NULL, true);
 		blk_insert_flush(rq);
-		goto run_queue;
+		blk_mq_run_hw_queue(data.hctx, true);
+		goto done;
 	}
 
 	plug = current->plug;
@@ -1502,7 +1503,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_sched_insert_request(rq, false, true,
-						!is_sync || is_flush_fua);
+						!is_sync || is_flush_fua, true);
 		goto done;
 	}
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1512,7 +1513,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		 * latter allows for merging opportunities and more efficient
 		 * dispatching.
 		 */
-run_queue:
 		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
 	}
 	blk_mq_put_ctx(data.ctx);
@@ -1568,10 +1568,12 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	cookie = request_to_qc_t(data.hctx, rq);
 
 	if (unlikely(is_flush_fua)) {
+		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_get_driver_tag(rq, NULL, true);
 		blk_insert_flush(rq);
-		goto run_queue;
+		blk_mq_run_hw_queue(data.hctx, true);
+		goto done;
 	}
 
 	/*
@@ -1612,7 +1614,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_sched_insert_request(rq, false, true,
-						!is_sync || is_flush_fua);
+						!is_sync || is_flush_fua, true);
 		goto done;
 	}
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1622,7 +1624,6 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 		 * latter allows for merging opportunities and more efficient
 		 * dispatching.
 		 */
-run_queue:
 		blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
 	}
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d19b0e75a129..d34929968071 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -34,6 +34,8 @@ void blk_mq_wake_waiters(struct request_queue *q);
 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
 bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
+bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
+				bool wait);
 
 /*
  * Internal helpers for allocating/freeing the request map

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
@ 2017-01-27 16:11   ` Jens Axboe
  2017-01-25 17:25 ` [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request Christoph Hellwig
                     ` (19 subsequent siblings)
  20 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:11 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

On Wed, Jan 25 2017, Christoph Hellwig wrote:
> Hi all,
> 
> this series splits the support for SCSI passthrough commands from the
> main struct request used all over the block layer into a separate
> scsi_request structure that drivers that want to support SCSI passthough
> need to embedded as the first thing into their request-private data,
> similar to how we handle NVMe passthrough commands.
> 
> To support this I've added support for that the private data after
> request structure to the legacy request path instead, so that it can
> be treated the same way as the blk-mq path.  Compare to the current
> scsi_cmnd allocator that actually is a major simplification.
> 
> Changes since V1:
>  - fix handling of a NULL sense pointer in __scsi_execute
>  - clean up handling of the flush flags in the block layer and MD
>  - additional small cleanup in dm-rq

I've queued this up for 4.11. Since some of the patches had dependencies
on changes in master since for-4.11/block was forked, they are sitting
in a separate branch that has both for-4.11/block and v4.10-rc5 pulled
in first. for-next has everything, as usual.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-27 16:11   ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:11 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

On Wed, Jan 25 2017, Christoph Hellwig wrote:
> Hi all,
> 
> this series splits the support for SCSI passthrough commands from the
> main struct request used all over the block layer into a separate
> scsi_request structure that drivers that want to support SCSI passthough
> need to embedded as the first thing into their request-private data,
> similar to how we handle NVMe passthrough commands.
> 
> To support this I've added support for that the private data after
> request structure to the legacy request path instead, so that it can
> be treated the same way as the blk-mq path.  Compare to the current
> scsi_cmnd allocator that actually is a major simplification.
> 
> Changes since V1:
>  - fix handling of a NULL sense pointer in __scsi_execute
>  - clean up handling of the flush flags in the block layer and MD
>  - additional small cleanup in dm-rq

I've queued this up for 4.11. Since some of the patches had dependencies
on changes in master since for-4.11/block was forked, they are sitting
in a separate branch that has both for-4.11/block and v4.10-rc5 pulled
in first. for-next has everything, as usual.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 05/18] block: allow specifying size for extra command data
  2017-01-26  3:15   ` Martin K. Petersen
@ 2017-01-27 16:12       ` Christoph Hellwig
  0 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:12 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: linux-block, linux-raid, linux-scsi, Mike Snitzer, Jens Axboe,
	dm-devel, Junichi Nomura, Christoph Hellwig

On Wed, Jan 25, 2017 at 10:15:55PM -0500, Martin K. Petersen wrote:
> +static void *alloc_request_size(gfp_t gfp_mask, void *data)
> 
> I like alloc_request_simple() but alloc_request_size() seems a bit
> contrived. _reserve? _extra? _special? Don't have any good suggestions,
> I'm afraid.

Not that I'm a fan of _size, but I like the other suggestions even less.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 05/18] block: allow specifying size for extra command data
@ 2017-01-27 16:12       ` Christoph Hellwig
  0 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:12 UTC (permalink / raw)
  To: Martin K. Petersen
  Cc: Christoph Hellwig, Jens Axboe, Mike Snitzer, Junichi Nomura,
	linux-block, linux-scsi, linux-raid, dm-devel

On Wed, Jan 25, 2017 at 10:15:55PM -0500, Martin K. Petersen wrote:
> +static void *alloc_request_size(gfp_t gfp_mask, void *data)
> 
> I like alloc_request_simple() but alloc_request_size() seems a bit
> contrived. _reserve? _extra? _special? Don't have any good suggestions,
> I'm afraid.

Not that I'm a fan of _size, but I like the other suggestions even less.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 16:11   ` Jens Axboe
@ 2017-01-27 16:17     ` Christoph Hellwig
  -1 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:17 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-raid, linux-scsi, Mike Snitzer, linux-block, dm-devel,
	Junichi Nomura, Christoph Hellwig

On Fri, Jan 27, 2017 at 09:11:14AM -0700, Jens Axboe wrote:
> I've queued this up for 4.11. Since some of the patches had dependencies
> on changes in master since for-4.11/block was forked, they are sitting
> in a separate branch that has both for-4.11/block and v4.10-rc5 pulled
> in first. for-next has everything, as usual.

Eww.  I just had a couple non-trivial updates that I now do again.
In case you haven't pushed it out yet can you let me repost first?

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-27 16:17     ` Christoph Hellwig
  0 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:17 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

On Fri, Jan 27, 2017 at 09:11:14AM -0700, Jens Axboe wrote:
> I've queued this up for 4.11. Since some of the patches had dependencies
> on changes in master since for-4.11/block was forked, they are sitting
> in a separate branch that has both for-4.11/block and v4.10-rc5 pulled
> in first. for-next has everything, as usual.

Eww.  I just had a couple non-trivial updates that I now do again.
In case you haven't pushed it out yet can you let me repost first?

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 16:17     ` Christoph Hellwig
@ 2017-01-27 16:21       ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:21 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-raid, Mike Snitzer, linux-scsi, linux-block, dm-devel,
	Junichi Nomura

On 01/27/2017 09:17 AM, Christoph Hellwig wrote:
> On Fri, Jan 27, 2017 at 09:11:14AM -0700, Jens Axboe wrote:
>> I've queued this up for 4.11. Since some of the patches had dependencies
>> on changes in master since for-4.11/block was forked, they are sitting
>> in a separate branch that has both for-4.11/block and v4.10-rc5 pulled
>> in first. for-next has everything, as usual.
> 
> Eww.  I just had a couple non-trivial updates that I now do again.
> In case you haven't pushed it out yet can you let me repost first?

Why the eww?! You can't fix this with a repost.

It's fine, I'll just ship off for-4.11/block first (as usual), then
for-4.11/rq-refactor.

The two issues is in virtio_blk and raid1. For some reason, raid1
included a refactor of a function later in the cycle (hrmpf). So there's
really no good way to solve this, unless I pull in v4.10-rc5 into
for-4.11/block.  And I don't want to do that. Hence the topic branch for
this work.

I have pushed it out, but it's not merged into for-next yet, it's just
standalone. When I've done some sanity testing, I'll push it out.


-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-27 16:21       ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:21 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

On 01/27/2017 09:17 AM, Christoph Hellwig wrote:
> On Fri, Jan 27, 2017 at 09:11:14AM -0700, Jens Axboe wrote:
>> I've queued this up for 4.11. Since some of the patches had dependencies
>> on changes in master since for-4.11/block was forked, they are sitting
>> in a separate branch that has both for-4.11/block and v4.10-rc5 pulled
>> in first. for-next has everything, as usual.
> 
> Eww.  I just had a couple non-trivial updates that I now do again.
> In case you haven't pushed it out yet can you let me repost first?

Why the eww?! You can't fix this with a repost.

It's fine, I'll just ship off for-4.11/block first (as usual), then
for-4.11/rq-refactor.

The two issues is in virtio_blk and raid1. For some reason, raid1
included a refactor of a function later in the cycle (hrmpf). So there's
really no good way to solve this, unless I pull in v4.10-rc5 into
for-4.11/block.  And I don't want to do that. Hence the topic branch for
this work.

I have pushed it out, but it's not merged into for-next yet, it's just
standalone. When I've done some sanity testing, I'll push it out.


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 16:21       ` Jens Axboe
@ 2017-01-27 16:23         ` Christoph Hellwig
  -1 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:23 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-raid, linux-scsi, Mike Snitzer, linux-block, dm-devel,
	Junichi Nomura, Christoph Hellwig

On Fri, Jan 27, 2017 at 09:21:46AM -0700, Jens Axboe wrote:
> On 01/27/2017 09:17 AM, Christoph Hellwig wrote:
> > On Fri, Jan 27, 2017 at 09:11:14AM -0700, Jens Axboe wrote:
> >> I've queued this up for 4.11. Since some of the patches had dependencies
> >> on changes in master since for-4.11/block was forked, they are sitting
> >> in a separate branch that has both for-4.11/block and v4.10-rc5 pulled
> >> in first. for-next has everything, as usual.
> > 
> > Eww.  I just had a couple non-trivial updates that I now do again.
> > In case you haven't pushed it out yet can you let me repost first?
> 
> Why the eww?! You can't fix this with a repost.

Not because of the merge, mostly because I just spent two same
time adding all the ACKs, fixing typos and adding the removal of
the ->cmd tracing to the series and was getting ready for a repost.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-27 16:23         ` Christoph Hellwig
  0 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:23 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

On Fri, Jan 27, 2017 at 09:21:46AM -0700, Jens Axboe wrote:
> On 01/27/2017 09:17 AM, Christoph Hellwig wrote:
> > On Fri, Jan 27, 2017 at 09:11:14AM -0700, Jens Axboe wrote:
> >> I've queued this up for 4.11. Since some of the patches had dependencies
> >> on changes in master since for-4.11/block was forked, they are sitting
> >> in a separate branch that has both for-4.11/block and v4.10-rc5 pulled
> >> in first. for-next has everything, as usual.
> > 
> > Eww.  I just had a couple non-trivial updates that I now do again.
> > In case you haven't pushed it out yet can you let me repost first?
> 
> Why the eww?! You can't fix this with a repost.

Not because of the merge, mostly because I just spent two same
time adding all the ACKs, fixing typos and adding the removal of
the ->cmd tracing to the series and was getting ready for a repost.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 16:23         ` Christoph Hellwig
@ 2017-01-27 16:27           ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:27 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-raid, Mike Snitzer, linux-scsi, linux-block, dm-devel,
	Junichi Nomura

On 01/27/2017 09:23 AM, Christoph Hellwig wrote:
> On Fri, Jan 27, 2017 at 09:21:46AM -0700, Jens Axboe wrote:
>> On 01/27/2017 09:17 AM, Christoph Hellwig wrote:
>>> On Fri, Jan 27, 2017 at 09:11:14AM -0700, Jens Axboe wrote:
>>>> I've queued this up for 4.11. Since some of the patches had dependencies
>>>> on changes in master since for-4.11/block was forked, they are sitting
>>>> in a separate branch that has both for-4.11/block and v4.10-rc5 pulled
>>>> in first. for-next has everything, as usual.
>>>
>>> Eww.  I just had a couple non-trivial updates that I now do again.
>>> In case you haven't pushed it out yet can you let me repost first?
>>
>> Why the eww?! You can't fix this with a repost.
> 
> Not because of the merge, mostly because I just spent two same
> time adding all the ACKs, fixing typos and adding the removal of
> the ->cmd tracing to the series and was getting ready for a repost.

Feel free to repost it, I have no problem rebasing that branch as it's
standalone for now.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-27 16:27           ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:27 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

On 01/27/2017 09:23 AM, Christoph Hellwig wrote:
> On Fri, Jan 27, 2017 at 09:21:46AM -0700, Jens Axboe wrote:
>> On 01/27/2017 09:17 AM, Christoph Hellwig wrote:
>>> On Fri, Jan 27, 2017 at 09:11:14AM -0700, Jens Axboe wrote:
>>>> I've queued this up for 4.11. Since some of the patches had dependencies
>>>> on changes in master since for-4.11/block was forked, they are sitting
>>>> in a separate branch that has both for-4.11/block and v4.10-rc5 pulled
>>>> in first. for-next has everything, as usual.
>>>
>>> Eww.  I just had a couple non-trivial updates that I now do again.
>>> In case you haven't pushed it out yet can you let me repost first?
>>
>> Why the eww?! You can't fix this with a repost.
> 
> Not because of the merge, mostly because I just spent two same
> time adding all the ACKs, fixing typos and adding the removal of
> the ->cmd tracing to the series and was getting ready for a repost.

Feel free to repost it, I have no problem rebasing that branch as it's
standalone for now.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 07/18] dm: always defer request allocation to the owner of the request_queue
  2017-01-25 17:25 ` [PATCH 07/18] dm: always defer request allocation to the owner of the request_queue Christoph Hellwig
@ 2017-01-27 16:34     ` Mike Snitzer
  0 siblings, 0 replies; 172+ messages in thread
From: Mike Snitzer @ 2017-01-27 16:34 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-block, linux-raid, linux-scsi, Jens Axboe, dm-devel,
	Junichi Nomura

On Wed, Jan 25 2017 at 12:25pm -0500,
Christoph Hellwig <hch@lst.de> wrote:

> DM already calls blk_mq_alloc_request on the request_queue of the
> underlying device if it is a blk-mq device.  But now that we allow drivers
> to allocate additional data and initialize it ahead of time we need to do
> the same for all drivers.   Doing so and using the new cmd_size
> infrastructure in the block layer greatly simplifies the dm-rq and mpath
> code, and should also make arbitrary combinations of SQ and MQ devices
> with SQ or MQ device mapper tables easily possible as a further step.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Hannes Reinecke <hare@suse.com>
> Reviewed-by: Mike Snitzer <snitzer@redhat.com>
...
> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
> index 3f12916..8d06834 100644
> --- a/drivers/md/dm-rq.c
> +++ b/drivers/md/dm-rq.c
> @@ -185,7 +163,7 @@ static void end_clone_bio(struct bio *clone)
>  
>  static struct dm_rq_target_io *tio_from_request(struct request *rq)
>  {
> -	return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
> +	return blk_mq_rq_to_pdu(rq);
>  }

Noticed after further review that it seems a bit weird to have the non
blk-mq support in drivers calling blk_mq_rq_to_pdu().  But I'm not sure
a blk_rq_to_pdu() macro to blk_mq_rq_to_pdu() is the right thing.  What
do you guys think?

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 07/18] dm: always defer request allocation to the owner of the request_queue
@ 2017-01-27 16:34     ` Mike Snitzer
  0 siblings, 0 replies; 172+ messages in thread
From: Mike Snitzer @ 2017-01-27 16:34 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Junichi Nomura, linux-block, linux-scsi, linux-raid,
	dm-devel

On Wed, Jan 25 2017 at 12:25pm -0500,
Christoph Hellwig <hch@lst.de> wrote:

> DM already calls blk_mq_alloc_request on the request_queue of the
> underlying device if it is a blk-mq device.  But now that we allow drivers
> to allocate additional data and initialize it ahead of time we need to do
> the same for all drivers.   Doing so and using the new cmd_size
> infrastructure in the block layer greatly simplifies the dm-rq and mpath
> code, and should also make arbitrary combinations of SQ and MQ devices
> with SQ or MQ device mapper tables easily possible as a further step.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Hannes Reinecke <hare@suse.com>
> Reviewed-by: Mike Snitzer <snitzer@redhat.com>
...
> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
> index 3f12916..8d06834 100644
> --- a/drivers/md/dm-rq.c
> +++ b/drivers/md/dm-rq.c
> @@ -185,7 +163,7 @@ static void end_clone_bio(struct bio *clone)
>  
>  static struct dm_rq_target_io *tio_from_request(struct request *rq)
>  {
> -	return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
> +	return blk_mq_rq_to_pdu(rq);
>  }

Noticed after further review that it seems a bit weird to have the non
blk-mq support in drivers calling blk_mq_rq_to_pdu().  But I'm not sure
a blk_rq_to_pdu() macro to blk_mq_rq_to_pdu() is the right thing.  What
do you guys think?

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 16:27           ` Jens Axboe
@ 2017-01-27 16:34             ` Christoph Hellwig
  -1 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:34 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-raid, linux-scsi, Mike Snitzer, linux-block, dm-devel,
	Junichi Nomura, Christoph Hellwig

On Fri, Jan 27, 2017 at 09:27:02AM -0700, Jens Axboe wrote:
> Feel free to repost it, I have no problem rebasing that branch as it's
> standalone for now.

Ok, I'll repost what I have right now, which is on top of a merge
of your block/for-4.11/next and your for-next from this morning
my time.

Btw, I disagred with your patch to use op_is_flush in
generic_make_request_checks - given that we clear these flags just
below I think using the helper obsfucates what's really going on.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-27 16:34             ` Christoph Hellwig
  0 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:34 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

On Fri, Jan 27, 2017 at 09:27:02AM -0700, Jens Axboe wrote:
> Feel free to repost it, I have no problem rebasing that branch as it's
> standalone for now.

Ok, I'll repost what I have right now, which is on top of a merge
of your block/for-4.11/next and your for-next from this morning
my time.

Btw, I disagred with your patch to use op_is_flush in
generic_make_request_checks - given that we clear these flags just
below I think using the helper obsfucates what's really going on.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 07/18] dm: always defer request allocation to the owner of the request_queue
  2017-01-27 16:34     ` Mike Snitzer
@ 2017-01-27 16:36       ` Christoph Hellwig
  -1 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:36 UTC (permalink / raw)
  To: Mike Snitzer
  Cc: linux-block, linux-raid, linux-scsi, Jens Axboe, dm-devel,
	Junichi Nomura, Christoph Hellwig

On Fri, Jan 27, 2017 at 11:34:34AM -0500, Mike Snitzer wrote:
> Noticed after further review that it seems a bit weird to have the non
> blk-mq support in drivers calling blk_mq_rq_to_pdu().  But I'm not sure
> a blk_rq_to_pdu() macro to blk_mq_rq_to_pdu() is the right thing.  What
> do you guys think?

My first version had an additional name for it, but it caused more
confusion than help.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 07/18] dm: always defer request allocation to the owner of the request_queue
@ 2017-01-27 16:36       ` Christoph Hellwig
  0 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:36 UTC (permalink / raw)
  To: Mike Snitzer
  Cc: Christoph Hellwig, Jens Axboe, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

On Fri, Jan 27, 2017 at 11:34:34AM -0500, Mike Snitzer wrote:
> Noticed after further review that it seems a bit weird to have the non
> blk-mq support in drivers calling blk_mq_rq_to_pdu().  But I'm not sure
> a blk_rq_to_pdu() macro to blk_mq_rq_to_pdu() is the right thing.  What
> do you guys think?

My first version had an additional name for it, but it caused more
confusion than help.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 16:34             ` Christoph Hellwig
@ 2017-01-27 16:38               ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:38 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-raid, Mike Snitzer, linux-scsi, linux-block, dm-devel,
	Junichi Nomura

On 01/27/2017 09:34 AM, Christoph Hellwig wrote:
> On Fri, Jan 27, 2017 at 09:27:02AM -0700, Jens Axboe wrote:
>> Feel free to repost it, I have no problem rebasing that branch as it's
>> standalone for now.
> 
> Ok, I'll repost what I have right now, which is on top of a merge
> of your block/for-4.11/next and your for-next from this morning
> my time.

Perfect.

> Btw, I disagred with your patch to use op_is_flush in
> generic_make_request_checks - given that we clear these flags just
> below I think using the helper obsfucates what's really going on.

Why? It's the exact same check. The ugly part is the fact that
we strip the flags later on, imho.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-27 16:38               ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:38 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

On 01/27/2017 09:34 AM, Christoph Hellwig wrote:
> On Fri, Jan 27, 2017 at 09:27:02AM -0700, Jens Axboe wrote:
>> Feel free to repost it, I have no problem rebasing that branch as it's
>> standalone for now.
> 
> Ok, I'll repost what I have right now, which is on top of a merge
> of your block/for-4.11/next and your for-next from this morning
> my time.

Perfect.

> Btw, I disagred with your patch to use op_is_flush in
> generic_make_request_checks - given that we clear these flags just
> below I think using the helper obsfucates what's really going on.

Why? It's the exact same check. The ugly part is the fact that
we strip the flags later on, imho.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 16:38               ` Jens Axboe
@ 2017-01-27 16:42                 ` Christoph Hellwig
  -1 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:42 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-raid, linux-scsi, Mike Snitzer, linux-block, dm-devel,
	Junichi Nomura, Christoph Hellwig

On Fri, Jan 27, 2017 at 09:38:40AM -0700, Jens Axboe wrote:
> > Ok, I'll repost what I have right now, which is on top of a merge
> > of your block/for-4.11/next and your for-next from this morning
> > my time.
> 
> Perfect.

At least I tried, looks like the mail server is overloaded and crapped
out three mails into it.  For now there is a git tree here:

http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/block-pc-refactor

> 
> > Btw, I disagred with your patch to use op_is_flush in
> > generic_make_request_checks - given that we clear these flags just
> > below I think using the helper obsfucates what's really going on.
> 
> Why? It's the exact same check. The ugly part is the fact that
> we strip the flags later on, imho.

But before it was pretty obvious that it clears exactly the flags checked
two lines earlier.  Now it's not as obvious.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-27 16:42                 ` Christoph Hellwig
  0 siblings, 0 replies; 172+ messages in thread
From: Christoph Hellwig @ 2017-01-27 16:42 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Christoph Hellwig, Mike Snitzer, Junichi Nomura, linux-block,
	linux-scsi, linux-raid, dm-devel

On Fri, Jan 27, 2017 at 09:38:40AM -0700, Jens Axboe wrote:
> > Ok, I'll repost what I have right now, which is on top of a merge
> > of your block/for-4.11/next and your for-next from this morning
> > my time.
> 
> Perfect.

At least I tried, looks like the mail server is overloaded and crapped
out three mails into it.  For now there is a git tree here:

http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/block-pc-refactor

> 
> > Btw, I disagred with your patch to use op_is_flush in
> > generic_make_request_checks - given that we clear these flags just
> > below I think using the helper obsfucates what's really going on.
> 
> Why? It's the exact same check. The ugly part is the fact that
> we strip the flags later on, imho.

But before it was pretty obvious that it clears exactly the flags checked
two lines earlier.  Now it's not as obvious.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 07/18] dm: always defer request allocation to the owner of the request_queue
  2017-01-27 16:36       ` Christoph Hellwig
@ 2017-01-27 16:44         ` Mike Snitzer
  -1 siblings, 0 replies; 172+ messages in thread
From: Mike Snitzer @ 2017-01-27 16:44 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-raid, linux-scsi, Jens Axboe, linux-block, dm-devel,
	Junichi Nomura

On Fri, Jan 27 2017 at 11:36am -0500,
Christoph Hellwig <hch@lst.de> wrote:

> On Fri, Jan 27, 2017 at 11:34:34AM -0500, Mike Snitzer wrote:
> > Noticed after further review that it seems a bit weird to have the non
> > blk-mq support in drivers calling blk_mq_rq_to_pdu().  But I'm not sure
> > a blk_rq_to_pdu() macro to blk_mq_rq_to_pdu() is the right thing.  What
> > do you guys think?
> 
> My first version had an additional name for it, but it caused more
> confusion than help.

And renaming blk_mq_rq_to_pdu() to blk_rq_to_pdu() tree-wide would be
too much churn?

I can live with blk_mq_rq_to_pdu(); just figured I'd ask.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 07/18] dm: always defer request allocation to the owner of the request_queue
@ 2017-01-27 16:44         ` Mike Snitzer
  0 siblings, 0 replies; 172+ messages in thread
From: Mike Snitzer @ 2017-01-27 16:44 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-block, linux-raid, linux-scsi, Jens Axboe, dm-devel,
	Junichi Nomura

On Fri, Jan 27 2017 at 11:36am -0500,
Christoph Hellwig <hch@lst.de> wrote:

> On Fri, Jan 27, 2017 at 11:34:34AM -0500, Mike Snitzer wrote:
> > Noticed after further review that it seems a bit weird to have the non
> > blk-mq support in drivers calling blk_mq_rq_to_pdu().  But I'm not sure
> > a blk_rq_to_pdu() macro to blk_mq_rq_to_pdu() is the right thing.  What
> > do you guys think?
> 
> My first version had an additional name for it, but it caused more
> confusion than help.

And renaming blk_mq_rq_to_pdu() to blk_rq_to_pdu() tree-wide would be
too much churn?

I can live with blk_mq_rq_to_pdu(); just figured I'd ask.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27  8:04                                             ` [dm-devel] " Jens Axboe
@ 2017-01-27 16:52                                               ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 16:52 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Fri, 2017-01-27 at 01:04 -0700, Jens Axboe wrote:
> The previous patch had a bug if you didn't use a scheduler, here's a
> version that should work fine in both cases. I've also updated the
> above mentioned branch, so feel free to pull that as well and merge to
> master like before.

Booting time is back to normal with commit f3a8ab7d55bc merged with
v4.10-rc5. That's a great improvement. However, running the srp-test
software triggers now a new complaint:

[  215.600386] sd 11:0:0:0: [sdh] Attached SCSI disk
[  215.609485] sd 11:0:0:0: alua: port group 00 state A non-preferred supports TOlUSNA
[  215.722900] scsi 13:0:0:0: alua: Detached
[  215.724452] general protection fault: 0000 [#1] SMP
[  215.724484] Modules linked in: dm_service_time ib_srp scsi_transport_srp target_core_user uio target_core_pscsi target_core_file ib_srpt target_core_iblock target_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat libcrc32c nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables af_packet ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm msr configfs ib_cm iw_cm mlx4_ib ib_core sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp ipmi_ssif coretemp kvm_intel hid_generic kvm usbhid irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel mlx4_core ghash_clmulni_intel iTCO_wdt dcdbas pcbc tg3
[  215.724629]  iTCO_vendor_support ptp aesni_intel pps_core aes_x86_64 pcspkr crypto_simd libphy ipmi_si glue_helper cryptd ipmi_devintf tpm_tis devlink fjes ipmi_msghandler tpm_tis_core tpm mei_me lpc_ich mei mfd_core button shpchp wmi mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm sr_mod cdrom ehci_pci ehci_hcd usbcore usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua autofs4
[  215.724719] CPU: 9 PID: 8043 Comm: multipathd Not tainted 4.10.0-rc5-dbg+ #1
[  215.724748] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
[  215.724775] task: ffff8801717998c0 task.stack: ffffc90002a9c000
[  215.724804] RIP: 0010:scsi_device_put+0xb/0x30
[  215.724829] RSP: 0018:ffffc90002a9faa0 EFLAGS: 00010246
[  215.724855] RAX: 6b6b6b6b6b6b6b6b RBX: ffff88038bf85698 RCX: 0000000000000006
[  215.724880] RDX: 0000000000000006 RSI: ffff88017179a108 RDI: ffff88038bf85698
[  215.724906] RBP: ffffc90002a9faa8 R08: ffff880384786008 R09: 0000000100170007
[  215.724932] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88038bf85698
[  215.724958] R13: ffff88038919f090 R14: dead000000000100 R15: ffff88038a41dd28
[  215.724983] FS:  00007fbf8c6cf700(0000) GS:ffff88046f440000(0000) knlGS:0000000000000000
[  215.725010] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  215.725035] CR2: 00007f1262ef3ee0 CR3: 000000044f6cc000 CR4: 00000000001406e0
[  215.725060] Call Trace:
[  215.725086]  scsi_disk_put+0x2d/0x40
[  215.725110]  sd_release+0x3d/0xb0
[  215.725137]  __blkdev_put+0x29e/0x360
[  215.725163]  blkdev_put+0x49/0x170
[  215.725192]  dm_put_table_device+0x58/0xc0 [dm_mod]
[  215.725219]  dm_put_device+0x70/0xc0 [dm_mod]
[  215.725269]  free_priority_group+0x92/0xc0 [dm_multipath]
[  215.725295]  free_multipath+0x70/0xc0 [dm_multipath]
[  215.725320]  multipath_dtr+0x19/0x20 [dm_multipath]
[  215.725348]  dm_table_destroy+0x67/0x120 [dm_mod]
[  215.725379]  dev_suspend+0xde/0x240 [dm_mod]
[  215.725434]  ctl_ioctl+0x1f5/0x520 [dm_mod]
[  215.725489]  dm_ctl_ioctl+0xe/0x20 [dm_mod]
[  215.725515]  do_vfs_ioctl+0x8f/0x700
[  215.725589]  SyS_ioctl+0x3c/0x70
[  215.725614]  entry_SYSCALL_64_fastpath+0x18/0xad
[  215.725641] RIP: 0033:0x7fbf8aca0667
[  215.725665] RSP: 002b:00007fbf8c6cd668 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  215.725692] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fbf8aca0667
[  215.725716] RDX: 00007fbf8006b940 RSI: 00000000c138fd06 RDI: 0000000000000007
[  215.725743] RBP: 0000000000000009 R08: 00007fbf8c6cb3c0 R09: 00007fbf8b68d8d8
[  215.725768] R10: 0000000000000075 R11: 0000000000000246 R12: 00007fbf8c6cd770
[  215.725793] R13: 0000000000000013 R14: 00000000006168f0 R15: 0000000000f74780
[  215.725820] Code: bc 24 b8 00 00 00 e8 55 c8 1c 00 48 83 c4 08 48 89 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 1f 00 55 48 89 e5 53 48 8b 07 48 89 fb <48> 8b 80 a8 01 00 00 48 8b 38 e8 f6 68 c5 ff 48 8d bb 38 02 00 
[  215.725903] RIP: scsi_device_put+0xb/0x30 RSP: ffffc90002a9faa0

(gdb) list *(scsi_device_put+0xb)
0xffffffff8149fc2b is in scsi_device_put (drivers/scsi/scsi.c:957).
952      * count of the underlying LLDD module.  The device is freed once the last
953      * user vanishes.
954      */
955     void scsi_device_put(struct scsi_device *sdev)
956     {
957             module_put(sdev->host->hostt->module);
958             put_device(&sdev->sdev_gendev);
959     }
960     EXPORT_SYMBOL(scsi_device_put);
961
(gdb) disas scsi_device_put
Dump of assembler code for function scsi_device_put:
   0xffffffff8149fc20 <+0>:     push   %rbp
   0xffffffff8149fc21 <+1>:     mov    %rsp,%rbp
   0xffffffff8149fc24 <+4>:     push   %rbx
   0xffffffff8149fc25 <+5>:     mov    (%rdi),%rax
   0xffffffff8149fc28 <+8>:     mov    %rdi,%rbx
   0xffffffff8149fc2b <+11>:    mov    0x1a8(%rax),%rax
   0xffffffff8149fc32 <+18>:    mov    (%rax),%rdi
   0xffffffff8149fc35 <+21>:    callq  0xffffffff810f6530 <module_put>
   0xffffffff8149fc3a <+26>:    lea    0x238(%rbx),%rdi
   0xffffffff8149fc41 <+33>:    callq  0xffffffff814714b0 <put_device>
   0xffffffff8149fc46 <+38>:    pop    %rbx
   0xffffffff8149fc47 <+39>:    pop    %rbp
   0xffffffff8149fc48 <+40>:    retq    
End of assembler dump.
(gdb) print &((struct Scsi_Host *)0)->hostt  
$2 = (struct scsi_host_template **) 0x1a8 <irq_stack_union+424>

Apparently scsi_device_put() was called for a SCSI device that was already
freed (memory poisoning was enabled in my test). This is something I had
not yet seen before.

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-27 16:52                                               ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 16:52 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Fri, 2017-01-27 at 01:04 -0700, Jens Axboe wrote:
> The previous patch had a bug if you didn't use a scheduler, here's a
> version that should work fine in both cases. I've also updated the
> above mentioned branch, so feel free to pull that as well and merge to
> master like before.

Booting time is back to normal with commit f3a8ab7d55bc merged with
v4.10-rc5. That's a great improvement. However, running the srp-test
software triggers now a new complaint:

[  215.600386] sd 11:0:0:0: [sdh] Attached SCSI disk
[  215.609485] sd 11:0:0:0: alua: port group 00 state A non-preferred suppo=
rts TOlUSNA
[  215.722900] scsi 13:0:0:0: alua: Detached
[  215.724452] general protection fault: 0000 [#1] SMP
[  215.724484] Modules linked in: dm_service_time ib_srp scsi_transport_srp=
 target_core_user uio target_core_pscsi target_core_file ib_srpt target_cor=
e_iblock target_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MASQ=
UERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat libcrc32c nf_c=
onntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject=
_ipv4 xt_tcpudp tun bridge stp llc ebtable_filter ebtables ip6table_filter =
ip6_tables iptable_filter ip_tables x_tables af_packet ib_ipoib rdma_ucm ib=
_ucm ib_uverbs ib_umad rdma_cm msr configfs ib_cm iw_cm mlx4_ib ib_core sb_=
edac edac_core x86_pkg_temp_thermal intel_powerclamp ipmi_ssif coretemp kvm=
_intel hid_generic kvm usbhid irqbypass crct10dif_pclmul crc32_pclmul crc32=
c_intel mlx4_core ghash_clmulni_intel iTCO_wdt dcdbas pcbc tg3
[  215.724629]  iTCO_vendor_support ptp aesni_intel pps_core aes_x86_64 pcs=
pkr crypto_simd libphy ipmi_si glue_helper cryptd ipmi_devintf tpm_tis devl=
ink fjes ipmi_msghandler tpm_tis_core tpm mei_me lpc_ich mei mfd_core butto=
n shpchp wmi mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sy=
simgblt fb_sys_fops ttm drm sr_mod cdrom ehci_pci ehci_hcd usbcore usb_comm=
on sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua autofs4
[  215.724719] CPU: 9 PID: 8043 Comm: multipathd Not tainted 4.10.0-rc5-dbg=
+ #1
[  215.724748] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 1=
1/17/2014
[  215.724775] task: ffff8801717998c0 task.stack: ffffc90002a9c000
[  215.724804] RIP: 0010:scsi_device_put+0xb/0x30
[  215.724829] RSP: 0018:ffffc90002a9faa0 EFLAGS: 00010246
[  215.724855] RAX: 6b6b6b6b6b6b6b6b RBX: ffff88038bf85698 RCX: 00000000000=
00006
[  215.724880] RDX: 0000000000000006 RSI: ffff88017179a108 RDI: ffff88038bf=
85698
[  215.724906] RBP: ffffc90002a9faa8 R08: ffff880384786008 R09: 00000001001=
70007
[  215.724932] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88038bf=
85698
[  215.724958] R13: ffff88038919f090 R14: dead000000000100 R15: ffff88038a4=
1dd28
[  215.724983] FS:  00007fbf8c6cf700(0000) GS:ffff88046f440000(0000) knlGS:=
0000000000000000
[  215.725010] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  215.725035] CR2: 00007f1262ef3ee0 CR3: 000000044f6cc000 CR4: 00000000001=
406e0
[  215.725060] Call Trace:
[  215.725086]  scsi_disk_put+0x2d/0x40
[  215.725110]  sd_release+0x3d/0xb0
[  215.725137]  __blkdev_put+0x29e/0x360
[  215.725163]  blkdev_put+0x49/0x170
[  215.725192]  dm_put_table_device+0x58/0xc0 [dm_mod]
[  215.725219]  dm_put_device+0x70/0xc0 [dm_mod]
[  215.725269]  free_priority_group+0x92/0xc0 [dm_multipath]
[  215.725295]  free_multipath+0x70/0xc0 [dm_multipath]
[  215.725320]  multipath_dtr+0x19/0x20 [dm_multipath]
[  215.725348]  dm_table_destroy+0x67/0x120 [dm_mod]
[  215.725379]  dev_suspend+0xde/0x240 [dm_mod]
[  215.725434]  ctl_ioctl+0x1f5/0x520 [dm_mod]
[  215.725489]  dm_ctl_ioctl+0xe/0x20 [dm_mod]
[  215.725515]  do_vfs_ioctl+0x8f/0x700
[  215.725589]  SyS_ioctl+0x3c/0x70
[  215.725614]  entry_SYSCALL_64_fastpath+0x18/0xad
[  215.725641] RIP: 0033:0x7fbf8aca0667
[  215.725665] RSP: 002b:00007fbf8c6cd668 EFLAGS: 00000246 ORIG_RAX: 000000=
0000000010
[  215.725692] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fbf8ac=
a0667
[  215.725716] RDX: 00007fbf8006b940 RSI: 00000000c138fd06 RDI: 00000000000=
00007
[  215.725743] RBP: 0000000000000009 R08: 00007fbf8c6cb3c0 R09: 00007fbf8b6=
8d8d8
[  215.725768] R10: 0000000000000075 R11: 0000000000000246 R12: 00007fbf8c6=
cd770
[  215.725793] R13: 0000000000000013 R14: 00000000006168f0 R15: 0000000000f=
74780
[  215.725820] Code: bc 24 b8 00 00 00 e8 55 c8 1c 00 48 83 c4 08 48 89 d8 =
5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 1f 00 55 48 89 e5 53 48 8b 07 48 89 fb =
<48> 8b 80 a8 01 00 00 48 8b 38 e8 f6 68 c5 ff 48 8d bb 38 02 00=20
[  215.725903] RIP: scsi_device_put+0xb/0x30 RSP: ffffc90002a9faa0

(gdb) list *(scsi_device_put+0xb)
0xffffffff8149fc2b is in scsi_device_put (drivers/scsi/scsi.c:957).
952 =A0=A0=A0=A0=A0* count of the underlying LLDD module. =A0The device is =
freed once the last
953 =A0=A0=A0=A0=A0* user vanishes.
954 =A0=A0=A0=A0=A0*/
955 =A0=A0=A0=A0void scsi_device_put(struct scsi_device *sdev)
956 =A0=A0=A0=A0{
957 =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0module_put(sdev->host->hostt->modul=
e);
958 =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0put_device(&sdev->sdev_gendev);
959 =A0=A0=A0=A0}
960 =A0=A0=A0=A0EXPORT_SYMBOL(scsi_device_put);
961
(gdb) disas scsi_device_put
Dump of assembler code for function scsi_device_put:
 =A0=A00xffffffff8149fc20 <+0>: =A0=A0=A0=A0push =A0=A0%rbp
 =A0=A00xffffffff8149fc21 <+1>: =A0=A0=A0=A0mov =A0=A0=A0%rsp,%rbp
 =A0=A00xffffffff8149fc24 <+4>: =A0=A0=A0=A0push =A0=A0%rbx
 =A0=A00xffffffff8149fc25 <+5>: =A0=A0=A0=A0mov =A0=A0=A0(%rdi),%rax
 =A0=A00xffffffff8149fc28 <+8>: =A0=A0=A0=A0mov =A0=A0=A0%rdi,%rbx
 =A0=A00xffffffff8149fc2b <+11>: =A0=A0=A0mov =A0=A0=A00x1a8(%rax),%rax
 =A0=A00xffffffff8149fc32 <+18>: =A0=A0=A0mov =A0=A0=A0(%rax),%rdi
 =A0=A00xffffffff8149fc35 <+21>: =A0=A0=A0callq =A00xffffffff810f6530 <modu=
le_put>
 =A0=A00xffffffff8149fc3a <+26>: =A0=A0=A0lea =A0=A0=A00x238(%rbx),%rdi
 =A0=A00xffffffff8149fc41 <+33>: =A0=A0=A0callq =A00xffffffff814714b0 <put_=
device>
 =A0=A00xffffffff8149fc46 <+38>: =A0=A0=A0pop =A0=A0=A0%rbx
 =A0=A00xffffffff8149fc47 <+39>: =A0=A0=A0pop =A0=A0=A0%rbp
 =A0=A00xffffffff8149fc48 <+40>: =A0=A0=A0retq =A0=A0=A0
End of assembler dump.
(gdb) print &((struct Scsi_Host *)0)->hostt =A0
$2 =3D (struct scsi_host_template **) 0x1a8 <irq_stack_union+424>

Apparently scsi_device_put() was called for a SCSI device that was already
freed (memory poisoning was enabled in my test). This is something I had
not yet seen before.

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 16:52                                               ` [dm-devel] " Bart Van Assche
@ 2017-01-27 16:56                                                 ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:56 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On 01/27/2017 09:52 AM, Bart Van Assche wrote:
> On Fri, 2017-01-27 at 01:04 -0700, Jens Axboe wrote:
>> The previous patch had a bug if you didn't use a scheduler, here's a
>> version that should work fine in both cases. I've also updated the
>> above mentioned branch, so feel free to pull that as well and merge to
>> master like before.
> 
> Booting time is back to normal with commit f3a8ab7d55bc merged with
> v4.10-rc5. That's a great improvement. However, running the srp-test
> software triggers now a new complaint:
> 
> [  215.600386] sd 11:0:0:0: [sdh] Attached SCSI disk
> [  215.609485] sd 11:0:0:0: alua: port group 00 state A non-preferred supports TOlUSNA
> [  215.722900] scsi 13:0:0:0: alua: Detached
> [  215.724452] general protection fault: 0000 [#1] SMP
> [  215.724484] Modules linked in: dm_service_time ib_srp scsi_transport_srp target_core_user uio target_core_pscsi target_core_file ib_srpt target_core_iblock target_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat libcrc32c nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables af_packet ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm msr configfs ib_cm iw_cm mlx4_ib ib_core sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp ipmi_ssif coretemp kvm_intel hid_generic kvm usbhid irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel mlx4_core ghash_clmulni_intel iTCO_wdt d
 cdbas pcbc tg3
> [  215.724629]  iTCO_vendor_support ptp aesni_intel pps_core aes_x86_64 pcspkr crypto_simd libphy ipmi_si glue_helper cryptd ipmi_devintf tpm_tis devlink fjes ipmi_msghandler tpm_tis_core tpm mei_me lpc_ich mei mfd_core button shpchp wmi mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm sr_mod cdrom ehci_pci ehci_hcd usbcore usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua autofs4
> [  215.724719] CPU: 9 PID: 8043 Comm: multipathd Not tainted 4.10.0-rc5-dbg+ #1
> [  215.724748] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
> [  215.724775] task: ffff8801717998c0 task.stack: ffffc90002a9c000
> [  215.724804] RIP: 0010:scsi_device_put+0xb/0x30
> [  215.724829] RSP: 0018:ffffc90002a9faa0 EFLAGS: 00010246
> [  215.724855] RAX: 6b6b6b6b6b6b6b6b RBX: ffff88038bf85698 RCX: 0000000000000006
> [  215.724880] RDX: 0000000000000006 RSI: ffff88017179a108 RDI: ffff88038bf85698
> [  215.724906] RBP: ffffc90002a9faa8 R08: ffff880384786008 R09: 0000000100170007
> [  215.724932] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88038bf85698
> [  215.724958] R13: ffff88038919f090 R14: dead000000000100 R15: ffff88038a41dd28
> [  215.724983] FS:  00007fbf8c6cf700(0000) GS:ffff88046f440000(0000) knlGS:0000000000000000
> [  215.725010] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  215.725035] CR2: 00007f1262ef3ee0 CR3: 000000044f6cc000 CR4: 00000000001406e0
> [  215.725060] Call Trace:
> [  215.725086]  scsi_disk_put+0x2d/0x40
> [  215.725110]  sd_release+0x3d/0xb0
> [  215.725137]  __blkdev_put+0x29e/0x360
> [  215.725163]  blkdev_put+0x49/0x170
> [  215.725192]  dm_put_table_device+0x58/0xc0 [dm_mod]
> [  215.725219]  dm_put_device+0x70/0xc0 [dm_mod]
> [  215.725269]  free_priority_group+0x92/0xc0 [dm_multipath]
> [  215.725295]  free_multipath+0x70/0xc0 [dm_multipath]
> [  215.725320]  multipath_dtr+0x19/0x20 [dm_multipath]
> [  215.725348]  dm_table_destroy+0x67/0x120 [dm_mod]
> [  215.725379]  dev_suspend+0xde/0x240 [dm_mod]
> [  215.725434]  ctl_ioctl+0x1f5/0x520 [dm_mod]
> [  215.725489]  dm_ctl_ioctl+0xe/0x20 [dm_mod]
> [  215.725515]  do_vfs_ioctl+0x8f/0x700
> [  215.725589]  SyS_ioctl+0x3c/0x70
> [  215.725614]  entry_SYSCALL_64_fastpath+0x18/0xad
> [  215.725641] RIP: 0033:0x7fbf8aca0667
> [  215.725665] RSP: 002b:00007fbf8c6cd668 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
> [  215.725692] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fbf8aca0667
> [  215.725716] RDX: 00007fbf8006b940 RSI: 00000000c138fd06 RDI: 0000000000000007
> [  215.725743] RBP: 0000000000000009 R08: 00007fbf8c6cb3c0 R09: 00007fbf8b68d8d8
> [  215.725768] R10: 0000000000000075 R11: 0000000000000246 R12: 00007fbf8c6cd770
> [  215.725793] R13: 0000000000000013 R14: 00000000006168f0 R15: 0000000000f74780
> [  215.725820] Code: bc 24 b8 00 00 00 e8 55 c8 1c 00 48 83 c4 08 48 89 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 1f 00 55 48 89 e5 53 48 8b 07 48 89 fb <48> 8b 80 a8 01 00 00 48 8b 38 e8 f6 68 c5 ff 48 8d bb 38 02 00 
> [  215.725903] RIP: scsi_device_put+0xb/0x30 RSP: ffffc90002a9faa0
> 
> (gdb) list *(scsi_device_put+0xb)
> 0xffffffff8149fc2b is in scsi_device_put (drivers/scsi/scsi.c:957).
> 952      * count of the underlying LLDD module.  The device is freed once the last
> 953      * user vanishes.
> 954      */
> 955     void scsi_device_put(struct scsi_device *sdev)
> 956     {
> 957             module_put(sdev->host->hostt->module);
> 958             put_device(&sdev->sdev_gendev);
> 959     }
> 960     EXPORT_SYMBOL(scsi_device_put);
> 961
> (gdb) disas scsi_device_put
> Dump of assembler code for function scsi_device_put:
>    0xffffffff8149fc20 <+0>:     push   %rbp
>    0xffffffff8149fc21 <+1>:     mov    %rsp,%rbp
>    0xffffffff8149fc24 <+4>:     push   %rbx
>    0xffffffff8149fc25 <+5>:     mov    (%rdi),%rax
>    0xffffffff8149fc28 <+8>:     mov    %rdi,%rbx
>    0xffffffff8149fc2b <+11>:    mov    0x1a8(%rax),%rax
>    0xffffffff8149fc32 <+18>:    mov    (%rax),%rdi
>    0xffffffff8149fc35 <+21>:    callq  0xffffffff810f6530 <module_put>
>    0xffffffff8149fc3a <+26>:    lea    0x238(%rbx),%rdi
>    0xffffffff8149fc41 <+33>:    callq  0xffffffff814714b0 <put_device>
>    0xffffffff8149fc46 <+38>:    pop    %rbx
>    0xffffffff8149fc47 <+39>:    pop    %rbp
>    0xffffffff8149fc48 <+40>:    retq    
> End of assembler dump.
> (gdb) print &((struct Scsi_Host *)0)->hostt  
> $2 = (struct scsi_host_template **) 0x1a8 <irq_stack_union+424>
> 
> Apparently scsi_device_put() was called for a SCSI device that was already
> freed (memory poisoning was enabled in my test). This is something I had
> not yet seen before.

I have no idea what this is, I haven't messed with life time or devices
or queues at all in that branch.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-27 16:56                                                 ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:56 UTC (permalink / raw)
  To: Bart Van Assche, hch
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/27/2017 09:52 AM, Bart Van Assche wrote:
> On Fri, 2017-01-27 at 01:04 -0700, Jens Axboe wrote:
>> The previous patch had a bug if you didn't use a scheduler, here's a
>> version that should work fine in both cases. I've also updated the
>> above mentioned branch, so feel free to pull that as well and merge to
>> master like before.
> 
> Booting time is back to normal with commit f3a8ab7d55bc merged with
> v4.10-rc5. That's a great improvement. However, running the srp-test
> software triggers now a new complaint:
> 
> [  215.600386] sd 11:0:0:0: [sdh] Attached SCSI disk
> [  215.609485] sd 11:0:0:0: alua: port group 00 state A non-preferred supports TOlUSNA
> [  215.722900] scsi 13:0:0:0: alua: Detached
> [  215.724452] general protection fault: 0000 [#1] SMP
> [  215.724484] Modules linked in: dm_service_time ib_srp scsi_transport_srp target_core_user uio target_core_pscsi target_core_file ib_srpt target_core_iblock target_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat libcrc32c nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables af_packet ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm msr configfs ib_cm iw_cm mlx4_ib ib_core sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp ipmi_ssif coretemp kvm_intel hid_generic kvm usbhid irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel mlx4_core ghash_clmulni_intel iTCO_wdt dcdbas pcbc tg3
> [  215.724629]  iTCO_vendor_support ptp aesni_intel pps_core aes_x86_64 pcspkr crypto_simd libphy ipmi_si glue_helper cryptd ipmi_devintf tpm_tis devlink fjes ipmi_msghandler tpm_tis_core tpm mei_me lpc_ich mei mfd_core button shpchp wmi mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm sr_mod cdrom ehci_pci ehci_hcd usbcore usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua autofs4
> [  215.724719] CPU: 9 PID: 8043 Comm: multipathd Not tainted 4.10.0-rc5-dbg+ #1
> [  215.724748] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
> [  215.724775] task: ffff8801717998c0 task.stack: ffffc90002a9c000
> [  215.724804] RIP: 0010:scsi_device_put+0xb/0x30
> [  215.724829] RSP: 0018:ffffc90002a9faa0 EFLAGS: 00010246
> [  215.724855] RAX: 6b6b6b6b6b6b6b6b RBX: ffff88038bf85698 RCX: 0000000000000006
> [  215.724880] RDX: 0000000000000006 RSI: ffff88017179a108 RDI: ffff88038bf85698
> [  215.724906] RBP: ffffc90002a9faa8 R08: ffff880384786008 R09: 0000000100170007
> [  215.724932] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88038bf85698
> [  215.724958] R13: ffff88038919f090 R14: dead000000000100 R15: ffff88038a41dd28
> [  215.724983] FS:  00007fbf8c6cf700(0000) GS:ffff88046f440000(0000) knlGS:0000000000000000
> [  215.725010] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  215.725035] CR2: 00007f1262ef3ee0 CR3: 000000044f6cc000 CR4: 00000000001406e0
> [  215.725060] Call Trace:
> [  215.725086]  scsi_disk_put+0x2d/0x40
> [  215.725110]  sd_release+0x3d/0xb0
> [  215.725137]  __blkdev_put+0x29e/0x360
> [  215.725163]  blkdev_put+0x49/0x170
> [  215.725192]  dm_put_table_device+0x58/0xc0 [dm_mod]
> [  215.725219]  dm_put_device+0x70/0xc0 [dm_mod]
> [  215.725269]  free_priority_group+0x92/0xc0 [dm_multipath]
> [  215.725295]  free_multipath+0x70/0xc0 [dm_multipath]
> [  215.725320]  multipath_dtr+0x19/0x20 [dm_multipath]
> [  215.725348]  dm_table_destroy+0x67/0x120 [dm_mod]
> [  215.725379]  dev_suspend+0xde/0x240 [dm_mod]
> [  215.725434]  ctl_ioctl+0x1f5/0x520 [dm_mod]
> [  215.725489]  dm_ctl_ioctl+0xe/0x20 [dm_mod]
> [  215.725515]  do_vfs_ioctl+0x8f/0x700
> [  215.725589]  SyS_ioctl+0x3c/0x70
> [  215.725614]  entry_SYSCALL_64_fastpath+0x18/0xad
> [  215.725641] RIP: 0033:0x7fbf8aca0667
> [  215.725665] RSP: 002b:00007fbf8c6cd668 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
> [  215.725692] RAX: ffffffffffffffda RBX: 0000000000000046 RCX: 00007fbf8aca0667
> [  215.725716] RDX: 00007fbf8006b940 RSI: 00000000c138fd06 RDI: 0000000000000007
> [  215.725743] RBP: 0000000000000009 R08: 00007fbf8c6cb3c0 R09: 00007fbf8b68d8d8
> [  215.725768] R10: 0000000000000075 R11: 0000000000000246 R12: 00007fbf8c6cd770
> [  215.725793] R13: 0000000000000013 R14: 00000000006168f0 R15: 0000000000f74780
> [  215.725820] Code: bc 24 b8 00 00 00 e8 55 c8 1c 00 48 83 c4 08 48 89 d8 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 1f 00 55 48 89 e5 53 48 8b 07 48 89 fb <48> 8b 80 a8 01 00 00 48 8b 38 e8 f6 68 c5 ff 48 8d bb 38 02 00 
> [  215.725903] RIP: scsi_device_put+0xb/0x30 RSP: ffffc90002a9faa0
> 
> (gdb) list *(scsi_device_put+0xb)
> 0xffffffff8149fc2b is in scsi_device_put (drivers/scsi/scsi.c:957).
> 952      * count of the underlying LLDD module.  The device is freed once the last
> 953      * user vanishes.
> 954      */
> 955     void scsi_device_put(struct scsi_device *sdev)
> 956     {
> 957             module_put(sdev->host->hostt->module);
> 958             put_device(&sdev->sdev_gendev);
> 959     }
> 960     EXPORT_SYMBOL(scsi_device_put);
> 961
> (gdb) disas scsi_device_put
> Dump of assembler code for function scsi_device_put:
>    0xffffffff8149fc20 <+0>:     push   %rbp
>    0xffffffff8149fc21 <+1>:     mov    %rsp,%rbp
>    0xffffffff8149fc24 <+4>:     push   %rbx
>    0xffffffff8149fc25 <+5>:     mov    (%rdi),%rax
>    0xffffffff8149fc28 <+8>:     mov    %rdi,%rbx
>    0xffffffff8149fc2b <+11>:    mov    0x1a8(%rax),%rax
>    0xffffffff8149fc32 <+18>:    mov    (%rax),%rdi
>    0xffffffff8149fc35 <+21>:    callq  0xffffffff810f6530 <module_put>
>    0xffffffff8149fc3a <+26>:    lea    0x238(%rbx),%rdi
>    0xffffffff8149fc41 <+33>:    callq  0xffffffff814714b0 <put_device>
>    0xffffffff8149fc46 <+38>:    pop    %rbx
>    0xffffffff8149fc47 <+39>:    pop    %rbp
>    0xffffffff8149fc48 <+40>:    retq    
> End of assembler dump.
> (gdb) print &((struct Scsi_Host *)0)->hostt  
> $2 = (struct scsi_host_template **) 0x1a8 <irq_stack_union+424>
> 
> Apparently scsi_device_put() was called for a SCSI device that was already
> freed (memory poisoning was enabled in my test). This is something I had
> not yet seen before.

I have no idea what this is, I haven't messed with life time or devices
or queues at all in that branch.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 16:42                 ` Christoph Hellwig
@ 2017-01-27 16:58                   ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:58 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-raid, Mike Snitzer, linux-scsi, linux-block, dm-devel,
	Junichi Nomura

On 01/27/2017 09:42 AM, Christoph Hellwig wrote:
> On Fri, Jan 27, 2017 at 09:38:40AM -0700, Jens Axboe wrote:
>>> Ok, I'll repost what I have right now, which is on top of a merge
>>> of your block/for-4.11/next and your for-next from this morning
>>> my time.
>>
>> Perfect.
> 
> At least I tried, looks like the mail server is overloaded and crapped
> out three mails into it.  For now there is a git tree here:
> 
> http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/block-pc-refactor

I grabbed it all from there. for-4.11/rq-refactor has been rebased to v3.
Basic testing looks fine here, at least on v2. I'll repeat the same and
then merge it into for-next as well.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-27 16:58                   ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 16:58 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, Junichi Nomura, linux-block, linux-scsi,
	linux-raid, dm-devel

On 01/27/2017 09:42 AM, Christoph Hellwig wrote:
> On Fri, Jan 27, 2017 at 09:38:40AM -0700, Jens Axboe wrote:
>>> Ok, I'll repost what I have right now, which is on top of a merge
>>> of your block/for-4.11/next and your for-next from this morning
>>> my time.
>>
>> Perfect.
> 
> At least I tried, looks like the mail server is overloaded and crapped
> out three mails into it.  For now there is a git tree here:
> 
> http://git.infradead.org/users/hch/block.git/shortlog/refs/heads/block-pc-refactor

I grabbed it all from there. for-4.11/rq-refactor has been rebased to v3.
Basic testing looks fine here, at least on v2. I'll repeat the same and
then merge it into for-next as well.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-27  1:22                                         ` [dm-devel] " Jens Axboe
@ 2017-01-27 17:02                                           ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:02 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 18:22 -0700, Jens Axboe wrote:
> What's your boot device? I've been booting this on a variety of setups,
> no problems observed. It's booting my laptop, and on SCSI and SATA as
> well. What is your root drive? What is the queue depth of it?
> Controller?

The boot device in my test setup is a SATA hard disk:

# cat /proc/cmdline  
BOOT_IMAGE=/boot/vmlinuz-4.10.0-rc5-dbg+ root=UUID=60a4b064-b3ef-4d28-96d3-3c13ecbec43e resume=/dev/sda2 showopts
# ls -l /dev/disk/by-uuid/60a4b064-b3ef-4d28-96d3-3c13ecbec43e
lrwxrwxrwx 1 root root 10 Jan 27 08:43 /dev/disk/by-uuid/60a4b064-b3ef-4d28-96d3-3c13ecbec43e -> ../../sda1
# cat /sys/block/sda/queue/nr_requests  
31
# lsscsi | grep sda
[0:0:0:0]    disk    ATA      ST1000NM0033-9ZM GA67  /dev/sda
# hdparm -i /dev/sda

/dev/sda:
 Model=ST1000NM0033-9ZM173, FwRev=GA67, SerialNo=Z1W2HM75
 Config={ HardSect NotMFM HdSw>15uSec Fixed DTR>10Mbs RotSpdTol>.5% }
 RawCHS=16383/16/63, TrkSize=0, SectSize=0, ECCbytes=0
 BuffType=unknown, BuffSize=unknown, MaxMultSect=16, MultSect=off
 CurCHS=16383/16/63, CurSects=16514064, LBA=yes, LBAsects=1953525168
 IORDY=on/off, tPIO={min:120,w/IORDY:120}, tDMA={min:120,rec:120}
 PIO modes:  pio0 pio1 pio2 pio3 pio4  
 DMA modes:  mdma0 mdma1 mdma2  
 UDMA modes: udma0 udma1 udma2 udma3 udma4 udma5 *udma6  
 AdvancedPM=no WriteCache=disabled
 Drive conforms to: Unspecified:  ATA/ATAPI-4,5,6,7

 * signifies the current active mode

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-27 17:02                                           ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:02 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Thu, 2017-01-26 at 18:22 -0700, Jens Axboe wrote:
> What's your boot device? I've been booting this on a variety of setups,
> no problems observed. It's booting my laptop, and on SCSI and SATA as
> well. What is your root drive? What is the queue depth of it?
> Controller?

The boot device in my test setup is a SATA hard disk:

# cat /proc/cmdline =A0
BOOT_IMAGE=3D/boot/vmlinuz-4.10.0-rc5-dbg+ root=3DUUID=3D60a4b064-b3ef-4d28=
-96d3-3c13ecbec43e resume=3D/dev/sda2 showopts
# ls -l /dev/disk/by-uuid/60a4b064-b3ef-4d28-96d3-3c13ecbec43e
lrwxrwxrwx 1 root root 10 Jan 27 08:43 /dev/disk/by-uuid/60a4b064-b3ef-4d28=
-96d3-3c13ecbec43e -> ../../sda1
# cat /sys/block/sda/queue/nr_requests =A0
31
# lsscsi | grep sda
[0:0:0:0] =A0=A0=A0disk =A0=A0=A0ATA =A0=A0=A0=A0=A0ST1000NM0033-9ZM GA67 =
=A0/dev/sda
# hdparm -i /dev/sda

/dev/sda:
=A0Model=3DST1000NM0033-9ZM173, FwRev=3DGA67, SerialNo=3DZ1W2HM75
=A0Config=3D{ HardSect NotMFM HdSw>15uSec Fixed DTR>10Mbs RotSpdTol>.5% }
=A0RawCHS=3D16383/16/63, TrkSize=3D0, SectSize=3D0, ECCbytes=3D0
=A0BuffType=3Dunknown, BuffSize=3Dunknown, MaxMultSect=3D16, MultSect=3Doff
=A0CurCHS=3D16383/16/63, CurSects=3D16514064, LBA=3Dyes, LBAsects=3D1953525=
168
=A0IORDY=3Don/off, tPIO=3D{min:120,w/IORDY:120}, tDMA=3D{min:120,rec:120}
=A0PIO modes: =A0pio0 pio1 pio2 pio3 pio4 =A0
=A0DMA modes: =A0mdma0 mdma1 mdma2 =A0
=A0UDMA modes: udma0 udma1 udma2 udma3 udma4 udma5 *udma6 =A0
=A0AdvancedPM=3Dno WriteCache=3Ddisabled
=A0Drive conforms to: Unspecified: =A0ATA/ATAPI-4,5,6,7

=A0* signifies the current active mode

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 16:56                                                 ` [dm-devel] " Jens Axboe
@ 2017-01-27 17:03                                                   ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:03 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Fri, 2017-01-27 at 09:56 -0700, Jens Axboe wrote:
> I have no idea what this is, I haven't messed with life time or devices
> or queues at all in that branch.

The srp-test software passes with kernel v4.9. Something must have changed.
I'll see whether I can find some time to look further into this.

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-27 17:03                                                   ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:03 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Fri, 2017-01-27 at 09:56 -0700, Jens Axboe wrote:
> I have no idea what this is, I haven't messed with life time or devices
> or queues at all in that branch.

The srp-test software passes with kernel v4.9. Something must have changed.
I'll see whether I can find some time to look further into this.

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 05/18] block: allow specifying size for extra command data
  2017-01-27 16:12       ` Christoph Hellwig
@ 2017-01-27 17:21         ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:21 UTC (permalink / raw)
  To: hch, martin.petersen
  Cc: linux-block, linux-scsi, snitzer, axboe, linux-raid, dm-devel, j-nomura

On Fri, 2017-01-27 at 17:12 +0100, Christoph Hellwig wrote:
> On Wed, Jan 25, 2017 at 10:15:55PM -0500, Martin K. Petersen wrote:
> > +static void *alloc_request_size(gfp_t gfp_mask, void *data)
> > 
> > I like alloc_request_simple() but alloc_request_size() seems a bit
> > contrived. _reserve? _extra? _special? Don't have any good suggestions,
> > I'm afraid.
> 
> Not that I'm a fan of _size, but I like the other suggestions even less.

Hello Christoph and Martin,

How about using the function names alloc_full_request() / free_full_request()
together with a comment that mentions that cmd_size is set by the LLD?

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 05/18] block: allow specifying size for extra command data
@ 2017-01-27 17:21         ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:21 UTC (permalink / raw)
  To: hch, martin.petersen
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, axboe, snitzer, j-nomura

On Fri, 2017-01-27 at 17:12 +0100, Christoph Hellwig wrote:
> On Wed, Jan 25, 2017 at 10:15:55PM -0500, Martin K. Petersen wrote:
> > +static void *alloc_request_size(gfp_t gfp_mask, void *data)
> >=20
> > I like alloc_request_simple() but alloc_request_size() seems a bit
> > contrived. _reserve? _extra? _special? Don't have any good suggestions,
> > I'm afraid.
>=20
> Not that I'm a fan of _size, but I like the other suggestions even less.

Hello Christoph and Martin,

How about using the function names alloc_full_request() / free_full_request=
()
together with a comment that mentions that cmd_size is set by the LLD?

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 05/18] block: allow specifying size for extra command data
  2017-01-27 17:21         ` Bart Van Assche
@ 2017-01-27 17:26           ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 17:26 UTC (permalink / raw)
  To: Bart Van Assche, hch, martin.petersen
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On 01/27/2017 10:21 AM, Bart Van Assche wrote:
> On Fri, 2017-01-27 at 17:12 +0100, Christoph Hellwig wrote:
>> On Wed, Jan 25, 2017 at 10:15:55PM -0500, Martin K. Petersen wrote:
>>> +static void *alloc_request_size(gfp_t gfp_mask, void *data)
>>>
>>> I like alloc_request_simple() but alloc_request_size() seems a bit
>>> contrived. _reserve? _extra? _special? Don't have any good suggestions,
>>> I'm afraid.
>>
>> Not that I'm a fan of _size, but I like the other suggestions even less.
> 
> Hello Christoph and Martin,
> 
> How about using the function names alloc_full_request() / free_full_request()
> together with a comment that mentions that cmd_size is set by the LLD?

Since we use pdu in other places, how about alloc_request_pdu() or
alloc_request_with_pdu()?

And since it's all queued up, any bike shedding changes will have to be
incremental.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 05/18] block: allow specifying size for extra command data
@ 2017-01-27 17:26           ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 17:26 UTC (permalink / raw)
  To: Bart Van Assche, hch, martin.petersen
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/27/2017 10:21 AM, Bart Van Assche wrote:
> On Fri, 2017-01-27 at 17:12 +0100, Christoph Hellwig wrote:
>> On Wed, Jan 25, 2017 at 10:15:55PM -0500, Martin K. Petersen wrote:
>>> +static void *alloc_request_size(gfp_t gfp_mask, void *data)
>>>
>>> I like alloc_request_simple() but alloc_request_size() seems a bit
>>> contrived. _reserve? _extra? _special? Don't have any good suggestions,
>>> I'm afraid.
>>
>> Not that I'm a fan of _size, but I like the other suggestions even less.
> 
> Hello Christoph and Martin,
> 
> How about using the function names alloc_full_request() / free_full_request()
> together with a comment that mentions that cmd_size is set by the LLD?

Since we use pdu in other places, how about alloc_request_pdu() or
alloc_request_with_pdu()?

And since it's all queued up, any bike shedding changes will have to be
incremental.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 05/18] block: allow specifying size for extra command data
  2017-01-27 17:26           ` Jens Axboe
@ 2017-01-27 17:30             ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:30 UTC (permalink / raw)
  To: hch, axboe, martin.petersen
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Fri, 2017-01-27 at 10:26 -0700, Jens Axboe wrote:
> On 01/27/2017 10:21 AM, Bart Van Assche wrote:
> > On Fri, 2017-01-27 at 17:12 +0100, Christoph Hellwig wrote:
> > > On Wed, Jan 25, 2017 at 10:15:55PM -0500, Martin K. Petersen wrote:
> > > > +static void *alloc_request_size(gfp_t gfp_mask, void *data)
> > > > 
> > > > I like alloc_request_simple() but alloc_request_size() seems a bit
> > > > contrived. _reserve? _extra? _special? Don't have any good suggestions,
> > > > I'm afraid.
> > > 
> > > Not that I'm a fan of _size, but I like the other suggestions even less.
> > 
> > Hello Christoph and Martin,
> > 
> > How about using the function names alloc_full_request() / free_full_request()
> > together with a comment that mentions that cmd_size is set by the LLD?
> 
> Since we use pdu in other places, how about alloc_request_pdu() or
> alloc_request_with_pdu()?
> 
> And since it's all queued up, any bike shedding changes will have to be
> incremental.

Hello Jens,

Other Linux subsystems use the term "private data" instead of PDU. How about
modifying the block layer such that it uses the same terminology? I'm
referring to function names like blk_mq_rq_from_pdu() and blk_mq_rq_to_pdu()

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 05/18] block: allow specifying size for extra command data
@ 2017-01-27 17:30             ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:30 UTC (permalink / raw)
  To: hch, axboe, martin.petersen
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Fri, 2017-01-27 at 10:26 -0700, Jens Axboe wrote:
> On 01/27/2017 10:21 AM, Bart Van Assche wrote:
> > On Fri, 2017-01-27 at 17:12 +0100, Christoph Hellwig wrote:
> > > On Wed, Jan 25, 2017 at 10:15:55PM -0500, Martin K. Petersen wrote:
> > > > +static void *alloc_request_size(gfp_t gfp_mask, void *data)
> > > >=20
> > > > I like alloc_request_simple() but alloc_request_size() seems a bit
> > > > contrived. _reserve? _extra? _special? Don't have any good suggesti=
ons,
> > > > I'm afraid.
> > >=20
> > > Not that I'm a fan of _size, but I like the other suggestions even le=
ss.
> >=20
> > Hello Christoph and Martin,
> >=20
> > How about using the function names alloc_full_request() / free_full_req=
uest()
> > together with a comment that mentions that cmd_size is set by the LLD?
>=20
> Since we use pdu in other places, how about alloc_request_pdu() or
> alloc_request_with_pdu()?
>=20
> And since it's all queued up, any bike shedding changes will have to be
> incremental.

Hello Jens,

Other Linux subsystems use the term "private data" instead of PDU. How abou=
t
modifying the block layer such that it uses the same terminology? I'm
referring to function names like blk_mq_rq_from_pdu() and blk_mq_rq_to_pdu(=
)

Thanks,

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 06/18] dm: remove incomple BLOCK_PC support
  2017-01-25 17:25 ` [PATCH 06/18] dm: remove incomple BLOCK_PC support Christoph Hellwig
@ 2017-01-27 17:32     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:32 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> DM tries to copy a few fields around for BLOCK_PC requests, but given
> that no dm-target ever wires up scsi_cmd_ioctl BLOCK_PC can't actually
> be sent to dm.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 06/18] dm: remove incomple BLOCK_PC support
@ 2017-01-27 17:32     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:32 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> DM tries to copy a few fields around for BLOCK_PC requests, but given
> that no dm-target ever wires up scsi_cmd_ioctl BLOCK_PC can't actually
> be sent to dm.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 05/18] block: allow specifying size for extra command data
  2017-01-27 17:30             ` Bart Van Assche
  (?)
@ 2017-01-27 17:33             ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-27 17:33 UTC (permalink / raw)
  To: Bart Van Assche, hch, martin.petersen
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/27/2017 10:30 AM, Bart Van Assche wrote:
> On Fri, 2017-01-27 at 10:26 -0700, Jens Axboe wrote:
>> On 01/27/2017 10:21 AM, Bart Van Assche wrote:
>>> On Fri, 2017-01-27 at 17:12 +0100, Christoph Hellwig wrote:
>>>> On Wed, Jan 25, 2017 at 10:15:55PM -0500, Martin K. Petersen wrote:
>>>>> +static void *alloc_request_size(gfp_t gfp_mask, void *data)
>>>>>
>>>>> I like alloc_request_simple() but alloc_request_size() seems a bit
>>>>> contrived. _reserve? _extra? _special? Don't have any good suggestions,
>>>>> I'm afraid.
>>>>
>>>> Not that I'm a fan of _size, but I like the other suggestions even less.
>>>
>>> Hello Christoph and Martin,
>>>
>>> How about using the function names alloc_full_request() / free_full_request()
>>> together with a comment that mentions that cmd_size is set by the LLD?
>>
>> Since we use pdu in other places, how about alloc_request_pdu() or
>> alloc_request_with_pdu()?
>>
>> And since it's all queued up, any bike shedding changes will have to be
>> incremental.
> 
> Hello Jens,
> 
> Other Linux subsystems use the term "private data" instead of PDU. How about
> modifying the block layer such that it uses the same terminology? I'm
> referring to function names like blk_mq_rq_from_pdu() and blk_mq_rq_to_pdu()

It's been pdu since it was introduced in 3.13, I really don't see a good
reason to change it. At least pdu or payload means something, where as
private is just... Well, not a big fan.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 11/18] scsi: remove gfp_flags member in scsi_host_cmd_pool
  2017-01-25 17:25 ` [PATCH 11/18] scsi: remove gfp_flags member in scsi_host_cmd_pool Christoph Hellwig
@ 2017-01-27 17:38     ` Bart Van Assche
  2017-01-27 17:38     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:38 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> When using the slab allocator we already decide at cache creation time if
> an allocation comes from a GFP_DMA pool using the SLAB_CACHE_DMA flag,
> and there is no point passing the kmalloc-family only GFP_DMA flag to
> kmem_cache_alloc.  Drop all the infrastructure for doing so.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 11/18] scsi: remove gfp_flags member in scsi_host_cmd_pool
@ 2017-01-27 17:38     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:38 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> When using the slab allocator we already decide at cache creation time if
> an allocation comes from a GFP_DMA pool using the SLAB_CACHE_DMA flag,
> and there is no point passing the kmalloc-family only GFP_DMA flag to
> kmem_cache_alloc.  Drop all the infrastructure for doing so.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 12/18] scsi: respect unchecked_isa_dma for blk-mq
  2017-01-25 17:25 ` [PATCH 12/18] scsi: respect unchecked_isa_dma for blk-mq Christoph Hellwig
@ 2017-01-27 17:45     ` Bart Van Assche
  2017-01-27 17:45     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:45 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> Currently blk-mq always allocates the sense buffer using normal GFP_KERNEL
> allocation.  Refactor the cmd pool code to split the cmd and sense allocation
> and share the code to allocate the sense buffers as well as the sense buffer
> slab caches between the legacy and blk-mq path.
> 
> Note that this switches to lazy allocation of the sense slab caches - the
> slab caches (not the actual allocations) won't be destroy until the scsi
> module is unloaded instead of keeping track of hosts using them.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 12/18] scsi: respect unchecked_isa_dma for blk-mq
@ 2017-01-27 17:45     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:45 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> Currently blk-mq always allocates the sense buffer using normal GFP_KERNE=
L
> allocation.  Refactor the cmd pool code to split the cmd and sense alloca=
tion
> and share the code to allocate the sense buffers as well as the sense buf=
fer
> slab caches between the legacy and blk-mq path.
>=20
> Note that this switches to lazy allocation of the sense slab caches - the
> slab caches (not the actual allocations) won't be destroy until the scsi
> module is unloaded instead of keeping track of hosts using them.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 13/18] scsi: remove scsi_cmd_dma_pool
  2017-01-25 17:25 ` [PATCH 13/18] scsi: remove scsi_cmd_dma_pool Christoph Hellwig
@ 2017-01-27 17:51     ` Bart Van Assche
  2017-01-27 17:51     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:51 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> There is no need for GFP_DMA allocations of the scsi_cmnd structures
> themselves, all that might be DMAed to or from is the actual payload,
> or the sense buffers.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 13/18] scsi: remove scsi_cmd_dma_pool
@ 2017-01-27 17:51     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:51 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> There is no need for GFP_DMA allocations of the scsi_cmnd structures
> themselves, all that might be DMAed to or from is the actual payload,
> or the sense buffers.

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 14/18] scsi: remove __scsi_alloc_queue
  2017-01-25 17:25 ` [PATCH 14/18] scsi: remove __scsi_alloc_queue Christoph Hellwig
@ 2017-01-27 17:58     ` Bart Van Assche
  2017-01-27 17:58     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:58 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h
> index 8129239..b6e07b5 100644
> --- a/include/scsi/scsi_transport.h
> +++ b/include/scsi/scsi_transport.h
> @@ -119,4 +119,6 @@ scsi_transport_device_data(struct scsi_device *sdev)
>  		+ shost->transportt->device_private_offset;
>  }
>  
> +void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q);
> +
>  #endif /* SCSI_TRANSPORT_H */

Hello Christoph,

Since __scsi_init_queue() modifies data in the Scsi_Host structure, have you
considered to add the declaration for this function to <scsi/scsi_host.h>?
If you want to keep this declaration in <scsi/scsi_transport.h> please add a
direct include of that header file to drivers/scsi/scsi_lib.c such that the
declaration remains visible to the compiler if someone would minimize the
number of #include directives in SCSI header files.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 14/18] scsi: remove __scsi_alloc_queue
@ 2017-01-27 17:58     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 17:58 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.=
h
> index 8129239..b6e07b5 100644
> --- a/include/scsi/scsi_transport.h
> +++ b/include/scsi/scsi_transport.h
> @@ -119,4 +119,6 @@ scsi_transport_device_data(struct scsi_device *sdev)
>  		+ shost->transportt->device_private_offset;
>  }
> =20
> +void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)=
;
> +
>  #endif /* SCSI_TRANSPORT_H */

Hello Christoph,

Since __scsi_init_queue() modifies data in the Scsi_Host structure, have yo=
u
considered to add the declaration for this function to <scsi/scsi_host.h>?
If you want to keep this declaration in <scsi/scsi_transport.h> please add =
a
direct include of that header file to drivers/scsi/scsi_lib.c such that the
declaration remains visible to the compiler if someone would minimize the
number of #include directives in SCSI header files.

Thanks,

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 15/18] scsi: allocate scsi_cmnd structures as part of struct request
  2017-01-25 17:25 ` [PATCH 15/18] scsi: allocate scsi_cmnd structures as part of struct request Christoph Hellwig
@ 2017-01-27 18:39     ` Bart Van Assche
  2017-01-27 18:39     ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 18:39 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> -unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost, gfp_t gfp_mask,
> -               int numa_node)
> +static unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost,
> +       gfp_t gfp_mask, int numa_node)
>  {
>         return kmem_cache_alloc_node(scsi_select_sense_cache(shost), gfp_mask,
>                         numa_node);
> @@ -697,14 +696,13 @@ static bool scsi_end_request(struct request *req, int error,
>  
>                 if (bidi_bytes)
>                         scsi_release_bidi_buffers(cmd);
> +               scsi_release_buffers(cmd);
> +               scsi_put_command(cmd);
>  
>                 spin_lock_irqsave(q->queue_lock, flags);
>                 blk_finish_request(req, error);
>                 spin_unlock_irqrestore(q->queue_lock, flags);
>  
> -               scsi_release_buffers(cmd);
> -
> -               scsi_put_command(cmd);
>                 scsi_run_queue(q);
>         }

Hello Christoph,

Why have the scsi_release_buffers() and scsi_put_command(cmd) calls been
moved up? I haven't found an explanation for this change in the patch
description.

Please also consider to remove the cmd->request->special = NULL assignments
via this patch. Since this patch makes the lifetime of struct scsi_cmnd and
struct request identical these assignments are no longer needed.

This patch introduces the function scsi_exit_rq(). Having two functions
for the single-queue path that release resources (scsi_release_buffers()
and scsi_exit_rq()) is confusing. Since every scsi_release_buffers() call
is followed by a blk_unprep_request() call, have you considered to move
the scsi_release_buffers() call into scsi_unprep_fn() via an additional
patch?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 15/18] scsi: allocate scsi_cmnd structures as part of struct request
@ 2017-01-27 18:39     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 18:39 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> -unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost, gfp_t gf=
p_mask,
> -=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0int numa_node)
> +static unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost,
> +=A0=A0=A0=A0=A0=A0=A0gfp_t gfp_mask, int numa_node)
> =A0{
> =A0=A0=A0=A0=A0=A0=A0=A0return kmem_cache_alloc_node(scsi_select_sense_ca=
che(shost), gfp_mask,
> =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0n=
uma_node);
> @@ -697,14 +696,13 @@ static bool scsi_end_request(struct request *req, i=
nt error,
> =A0
> =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0if (bidi_bytes)
> =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0s=
csi_release_bidi_buffers(cmd);
> +=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0scsi_release_buffers(cmd);
> +=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0scsi_put_command(cmd);
> =A0
> =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0spin_lock_irqsave(q->queu=
e_lock, flags);
> =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0blk_finish_request(req, e=
rror);
> =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0spin_unlock_irqrestore(q-=
>queue_lock, flags);
> =A0
> -=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0scsi_release_buffers(cmd);
> -
> -=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0scsi_put_command(cmd);
> =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0scsi_run_queue(q);
> =A0=A0=A0=A0=A0=A0=A0=A0}

Hello Christoph,

Why=A0have the=A0scsi_release_buffers() and scsi_put_command(cmd) calls bee=
n
moved up? I haven't found an explanation for this change in the patch
description.

Please also consider to remove the cmd->request->special =3D NULL assignmen=
ts
via this patch. Since this patch makes the lifetime of struct scsi_cmnd and
struct request identical these assignments are no longer needed.

This patch introduces the function scsi_exit_rq(). Having two functions
for the single-queue path that release resources (scsi_release_buffers()
and scsi_exit_rq()) is confusing. Since every scsi_release_buffers() call
is followed by a blk_unprep_request() call, have you considered to move
the scsi_release_buffers() call into scsi_unprep_fn() via an additional
patch?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 16/18] block/bsg: move queue creation into bsg_setup_queue
  2017-01-25 17:25 ` [PATCH 16/18] block/bsg: move queue creation into bsg_setup_queue Christoph Hellwig
@ 2017-01-27 18:48     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 18:48 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> Simply the boilerplate code needed for bsg nodes a bit.

Did you perhaps mean "Simplify"? Anyway, nice work!

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 16/18] block/bsg: move queue creation into bsg_setup_queue
@ 2017-01-27 18:48     ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 18:48 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> Simply the boilerplate code needed for bsg nodes a bit.

Did you perhaps mean "Simplify"? Anyway, nice work!

Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com>=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
@ 2017-01-27 21:27   ` Bart Van Assche
  2017-01-25 17:25 ` [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request Christoph Hellwig
                     ` (19 subsequent siblings)
  20 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 21:27 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> this series splits the support for SCSI passthrough commands from the
> main struct request used all over the block layer into a separate
> scsi_request structure that drivers that want to support SCSI passthough
> need to embedded as the first thing into their request-private data,
> similar to how we handle NVMe passthrough commands.
> 
> To support this I've added support for that the private data after
> request structure to the legacy request path instead, so that it can
> be treated the same way as the blk-mq path.  Compare to the current
> scsi_cmnd allocator that actually is a major simplification.
> 
> Changes since V1:
>  - fix handling of a NULL sense pointer in __scsi_execute
>  - clean up handling of the flush flags in the block layer and MD
>  - additional small cleanup in dm-rq

Hello Christoph,

A general comment: patch "block: allow specifying size for extra
command data" is a very welcome improvement but unfortunately also
introduces an inconsistency among block drivers. This patch series
namely creates two kinds of block drivers:
- Block drivers that use the block layer core to allocate
  request-private data. These block drivers set request.cmd_size
  to a non-zero value and do not need request.special.
- Block drivers that allocate request-private data themselves.
  These block drivers set request.cmd_size to zero and use
  request.special to translate a request pointer into the private
  data pointer.

Have you considered to convert all block drivers to the new
approach and to get rid of request.special? If so, do you already
have plans to start working on this? I'm namely wondering wheter I
should start working on this myself.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-27 21:27   ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-27 21:27 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
> this series splits the support for SCSI passthrough commands from the
> main struct request used all over the block layer into a separate
> scsi_request structure that drivers that want to support SCSI passthough
> need to embedded as the first thing into their request-private data,
> similar to how we handle NVMe passthrough commands.
>=20
> To support this I've added support for that the private data after
> request structure to the legacy request path instead, so that it can
> be treated the same way as the blk-mq path.  Compare to the current
> scsi_cmnd allocator that actually is a major simplification.
>=20
> Changes since V1:
>  - fix handling of a NULL sense pointer in __scsi_execute
>  - clean up handling of the flush flags in the block layer and MD
>  - additional small cleanup in dm-rq

Hello Christoph,

A general comment: patch "block: allow specifying size for extra
command data" is a very welcome improvement but unfortunately also
introduces an inconsistency among block drivers. This patch series
namely creates two kinds of block drivers:
- Block drivers that use the block layer core to allocate
  request-private data. These block drivers set request.cmd_size
  to a non-zero value and do not need request.special.
- Block drivers that allocate request-private data themselves.
  These block drivers set request.cmd_size to zero and use
  request.special to translate a request pointer into the private
  data pointer.

Have you considered to convert all block drivers to the new
approach and to get rid of request.special? If so, do you already
have plans to start working on this? I'm namely wondering wheter I
should start working on this myself.

Thanks,

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 14/18] scsi: remove __scsi_alloc_queue
  2017-01-27 17:58     ` Bart Van Assche
  (?)
@ 2017-01-28  8:23     ` hch
  -1 siblings, 0 replies; 172+ messages in thread
From: hch @ 2017-01-28  8:23 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: hch, axboe, linux-scsi, linux-raid, dm-devel, linux-block,
	snitzer, j-nomura

On Fri, Jan 27, 2017 at 05:58:02PM +0000, Bart Van Assche wrote:
> Since __scsi_init_queue() modifies data in the Scsi_Host structure, have you
> considered to add the declaration for this function to <scsi/scsi_host.h>?
> If you want to keep this declaration in <scsi/scsi_transport.h> please add a
> direct include of that header file to drivers/scsi/scsi_lib.c such that the
> declaration remains visible to the compiler if someone would minimize the
> number of #include directives in SCSI header files.

Feel free to send an incremental patch either way.  In the long run
I'd really like to kill off __scsi_init_queue and remove the transport
BSG queue abuse of SCSI internals, though.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 15/18] scsi: allocate scsi_cmnd structures as part of struct request
  2017-01-27 18:39     ` Bart Van Assche
@ 2017-01-28  8:25       ` hch
  -1 siblings, 0 replies; 172+ messages in thread
From: hch @ 2017-01-28  8:25 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-raid, snitzer, linux-scsi, axboe, dm-devel, linux-block,
	j-nomura, hch

On Fri, Jan 27, 2017 at 06:39:46PM +0000, Bart Van Assche wrote:
> Why have the scsi_release_buffers() and scsi_put_command(cmd) calls been
> moved up? I haven't found an explanation for this change in the patch
> description.

Because they reference the scsi_cmnd, which are now part of the request
and thus freed by blk_finish_request.  And yes, I should have mentioned
it in the changelog, sorry.

> Please also consider to remove the cmd->request->special = NULL assignments
> via this patch. Since this patch makes the lifetime of struct scsi_cmnd and
> struct request identical these assignments are no longer needed.

True.  If I had to resend again I would have fixed it up, but it's probably
not worth the churn now.

> This patch introduces the function scsi_exit_rq(). Having two functions
> for the single-queue path that release resources (scsi_release_buffers()
> and scsi_exit_rq()) is confusing. Since every scsi_release_buffers() call
> is followed by a blk_unprep_request() call, have you considered to move
> the scsi_release_buffers() call into scsi_unprep_fn() via an additional
> patch?

We could have done that.  But it's just more change for a code path
that I hope won't survive this calendar year.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [PATCH 15/18] scsi: allocate scsi_cmnd structures as part of struct request
@ 2017-01-28  8:25       ` hch
  0 siblings, 0 replies; 172+ messages in thread
From: hch @ 2017-01-28  8:25 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: hch, axboe, linux-scsi, linux-raid, dm-devel, linux-block,
	snitzer, j-nomura

On Fri, Jan 27, 2017 at 06:39:46PM +0000, Bart Van Assche wrote:
> Why�have the�scsi_release_buffers() and scsi_put_command(cmd) calls been
> moved up? I haven't found an explanation for this change in the patch
> description.

Because they reference the scsi_cmnd, which are now part of the request
and thus freed by blk_finish_request.  And yes, I should have mentioned
it in the changelog, sorry.

> Please also consider to remove the cmd->request->special = NULL assignments
> via this patch. Since this patch makes the lifetime of struct scsi_cmnd and
> struct request identical these assignments are no longer needed.

True.  If I had to resend again I would have fixed it up, but it's probably
not worth the churn now.

> This patch introduces the function scsi_exit_rq(). Having two functions
> for the single-queue path that release resources (scsi_release_buffers()
> and scsi_exit_rq()) is confusing. Since every scsi_release_buffers() call
> is followed by a blk_unprep_request() call, have you considered to move
> the scsi_release_buffers() call into scsi_unprep_fn() via an additional
> patch?

We could have done that.  But it's just more change for a code path
that I hope won't survive this calendar year.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 21:27   ` Bart Van Assche
  (?)
@ 2017-01-28  8:29   ` hch
  -1 siblings, 0 replies; 172+ messages in thread
From: hch @ 2017-01-28  8:29 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: hch, axboe, linux-scsi, linux-raid, dm-devel, linux-block,
	snitzer, j-nomura

On Fri, Jan 27, 2017 at 09:27:53PM +0000, Bart Van Assche wrote:
> Have you considered to convert all block drivers to the new
> approach and to get rid of request.special? If so, do you already
> have plans to start working on this? I'm namely wondering wheter I
> should start working on this myself.

Hi Bart,

I'd love to have all drivers move of using .special (and thus reducing
request size further).  I think the general way to do that is to convert
them to blk-mq and not using the legacy cmd_size field.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-27 21:27   ` Bart Van Assche
@ 2017-01-30  6:58     ` Hannes Reinecke
  -1 siblings, 0 replies; 172+ messages in thread
From: Hannes Reinecke @ 2017-01-30  6:58 UTC (permalink / raw)
  To: Bart Van Assche, hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/27/2017 10:27 PM, Bart Van Assche wrote:
> On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
>> this series splits the support for SCSI passthrough commands from the
>> main struct request used all over the block layer into a separate
>> scsi_request structure that drivers that want to support SCSI passthough
>> need to embedded as the first thing into their request-private data,
>> similar to how we handle NVMe passthrough commands.
>>
>> To support this I've added support for that the private data after
>> request structure to the legacy request path instead, so that it can
>> be treated the same way as the blk-mq path.  Compare to the current
>> scsi_cmnd allocator that actually is a major simplification.
>>
>> Changes since V1:
>>  - fix handling of a NULL sense pointer in __scsi_execute
>>  - clean up handling of the flush flags in the block layer and MD
>>  - additional small cleanup in dm-rq
> 
> Hello Christoph,
> 
> A general comment: patch "block: allow specifying size for extra
> command data" is a very welcome improvement but unfortunately also
> introduces an inconsistency among block drivers. This patch series
> namely creates two kinds of block drivers:
> - Block drivers that use the block layer core to allocate
>   request-private data. These block drivers set request.cmd_size
>   to a non-zero value and do not need request.special.
> - Block drivers that allocate request-private data themselves.
>   These block drivers set request.cmd_size to zero and use
>   request.special to translate a request pointer into the private
>   data pointer.
> 
> Have you considered to convert all block drivers to the new
> approach and to get rid of request.special? If so, do you already
> have plans to start working on this? I'm namely wondering wheter I
> should start working on this myself.
> 
I was actually looking into it, too.
Once scsi passthrough is removed from struct request there is no
reasonable need to rely on '->special' for anything, and we should just
ditch it.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
@ 2017-01-30  6:58     ` Hannes Reinecke
  0 siblings, 0 replies; 172+ messages in thread
From: Hannes Reinecke @ 2017-01-30  6:58 UTC (permalink / raw)
  To: Bart Van Assche, hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/27/2017 10:27 PM, Bart Van Assche wrote:
> On Wed, 2017-01-25 at 18:25 +0100, Christoph Hellwig wrote:
>> this series splits the support for SCSI passthrough commands from the
>> main struct request used all over the block layer into a separate
>> scsi_request structure that drivers that want to support SCSI passthough
>> need to embedded as the first thing into their request-private data,
>> similar to how we handle NVMe passthrough commands.
>>
>> To support this I've added support for that the private data after
>> request structure to the legacy request path instead, so that it can
>> be treated the same way as the blk-mq path.  Compare to the current
>> scsi_cmnd allocator that actually is a major simplification.
>>
>> Changes since V1:
>>  - fix handling of a NULL sense pointer in __scsi_execute
>>  - clean up handling of the flush flags in the block layer and MD
>>  - additional small cleanup in dm-rq
> 
> Hello Christoph,
> 
> A general comment: patch "block: allow specifying size for extra
> command data" is a very welcome improvement but unfortunately also
> introduces an inconsistency among block drivers. This patch series
> namely creates two kinds of block drivers:
> - Block drivers that use the block layer core to allocate
>   request-private data. These block drivers set request.cmd_size
>   to a non-zero value and do not need request.special.
> - Block drivers that allocate request-private data themselves.
>   These block drivers set request.cmd_size to zero and use
>   request.special to translate a request pointer into the private
>   data pointer.
> 
> Have you considered to convert all block drivers to the new
> approach and to get rid of request.special? If so, do you already
> have plans to start working on this? I'm namely wondering wheter I
> should start working on this myself.
> 
I was actually looking into it, too.
Once scsi passthrough is removed from struct request there is no
reasonable need to rely on '->special' for anything, and we should just
ditch it.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N�rnberg
GF: F. Imend�rffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG N�rnberg)

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-27 16:56                                                 ` [dm-devel] " Jens Axboe
@ 2017-01-31  1:12                                                   ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-31  1:12 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Fri, 2017-01-27 at 09:56 -0700, Jens Axboe wrote:
> On 01/27/2017 09:52 AM, Bart Van Assche wrote:
> > [  215.724452] general protection fault: 0000 [#1] SMP
> > [  215.725060] Call Trace:
> > [  215.725086]  scsi_disk_put+0x2d/0x40
> > [  215.725110]  sd_release+0x3d/0xb0
> > [  215.725137]  __blkdev_put+0x29e/0x360
> > [  215.725163]  blkdev_put+0x49/0x170
> > [  215.725192]  dm_put_table_device+0x58/0xc0 [dm_mod]
> > [  215.725219]  dm_put_device+0x70/0xc0 [dm_mod]
> > [  215.725269]  free_priority_group+0x92/0xc0 [dm_multipath]
> > [  215.725295]  free_multipath+0x70/0xc0 [dm_multipath]
> > [  215.725320]  multipath_dtr+0x19/0x20 [dm_multipath]
> > [  215.725348]  dm_table_destroy+0x67/0x120 [dm_mod]
> > [  215.725379]  dev_suspend+0xde/0x240 [dm_mod]
> > [  215.725434]  ctl_ioctl+0x1f5/0x520 [dm_mod]
> > [  215.725489]  dm_ctl_ioctl+0xe/0x20 [dm_mod]
> > [  215.725515]  do_vfs_ioctl+0x8f/0x700
> > [  215.725589]  SyS_ioctl+0x3c/0x70
> > [  215.725614]  entry_SYSCALL_64_fastpath+0x18/0xad
> > 
> 
> I have no idea what this is, I haven't messed with life time or devices
> or queues at all in that branch.

Hello Jens,

Running the srp-test software against kernel 4.9.6 and kernel 4.10-rc5
went fine. With your for-4.11/block branch (commit 400f73b23f457a) however
I just ran into the following:

[  214.555527] ------------[ cut here ]------------
[  214.555565] WARNING: CPU: 5 PID: 13201 at kernel/locking/lockdep.c:3514 lock_release+0x346/0x480
[  214.555588] DEBUG_LOCKS_WARN_ON(depth <= 0)
[  214.555824] CPU: 5 PID: 13201 Comm: fio Not tainted 4.10.0-rc3-dbg+ #1
[  214.555846] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
[  214.555867] Call Trace:
[  214.555889]  dump_stack+0x68/0x93
[  214.555911]  __warn+0xc6/0xe0
[  214.555953]  warn_slowpath_fmt+0x4a/0x50
[  214.555973]  lock_release+0x346/0x480
[  214.556021]  aio_write+0x106/0x140
[  214.556067]  do_io_submit+0x37d/0x900
[  214.556108]  SyS_io_submit+0xb/0x10
[  214.556131]  entry_SYSCALL_64_fastpath+0x18/0xad

I will continue to try to figure out what is causing this behavior.

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-31  1:12                                                   ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-31  1:12 UTC (permalink / raw)
  To: hch, axboe
  Cc: linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On Fri, 2017-01-27 at 09:56 -0700, Jens Axboe wrote:
> On 01/27/2017 09:52 AM, Bart Van Assche wrote:
> > [  215.724452] general protection fault: 0000 [#1] SMP
> > [  215.725060] Call Trace:
> > [  215.725086]  scsi_disk_put+0x2d/0x40
> > [  215.725110]  sd_release+0x3d/0xb0
> > [  215.725137]  __blkdev_put+0x29e/0x360
> > [  215.725163]  blkdev_put+0x49/0x170
> > [  215.725192]  dm_put_table_device+0x58/0xc0 [dm_mod]
> > [  215.725219]  dm_put_device+0x70/0xc0 [dm_mod]
> > [  215.725269]  free_priority_group+0x92/0xc0 [dm_multipath]
> > [  215.725295]  free_multipath+0x70/0xc0 [dm_multipath]
> > [  215.725320]  multipath_dtr+0x19/0x20 [dm_multipath]
> > [  215.725348]  dm_table_destroy+0x67/0x120 [dm_mod]
> > [  215.725379]  dev_suspend+0xde/0x240 [dm_mod]
> > [  215.725434]  ctl_ioctl+0x1f5/0x520 [dm_mod]
> > [  215.725489]  dm_ctl_ioctl+0xe/0x20 [dm_mod]
> > [  215.725515]  do_vfs_ioctl+0x8f/0x700
> > [  215.725589]  SyS_ioctl+0x3c/0x70
> > [  215.725614]  entry_SYSCALL_64_fastpath+0x18/0xad
> >=20
>=20
> I have no idea what this is, I haven't messed with life time or devices
> or queues at all in that branch.

Hello Jens,

Running the srp-test software against kernel 4.9.6 and kernel 4.10-rc5
went fine. With your for-4.11/block branch (commit 400f73b23f457a) however
I just ran into the following:

[ =A0214.555527] ------------[ cut here ]------------
[ =A0214.555565] WARNING: CPU: 5 PID: 13201 at kernel/locking/lockdep.c:351=
4 lock_release+0x346/0x480
[ =A0214.555588] DEBUG_LOCKS_WARN_ON(depth <=3D 0)
[ =A0214.555824] CPU: 5 PID: 13201 Comm: fio Not tainted 4.10.0-rc3-dbg+ #1
[ =A0214.555846] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2=
 11/17/2014
[ =A0214.555867] Call Trace:
[ =A0214.555889] =A0dump_stack+0x68/0x93
[ =A0214.555911] =A0__warn+0xc6/0xe0
[ =A0214.555953] =A0warn_slowpath_fmt+0x4a/0x50
[ =A0214.555973] =A0lock_release+0x346/0x480
[ =A0214.556021] =A0aio_write+0x106/0x140
[ =A0214.556067] =A0do_io_submit+0x37d/0x900
[ =A0214.556108] =A0SyS_io_submit+0xb/0x10
[ =A0214.556131] =A0entry_SYSCALL_64_fastpath+0x18/0xad

I will continue to try to figure out what is causing this behavior.

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-31  1:12                                                   ` Bart Van Assche
@ 2017-01-31  1:38                                                     ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-31  1:38 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: hch, axboe, linux-scsi, linux-raid, dm-devel, linux-block,
	snitzer, j-nomura



> On Jan 30, 2017, at 5:12 PM, Bart Van Assche <Bart.VanAssche@sandisk.com> wrote:
> 
>> On Fri, 2017-01-27 at 09:56 -0700, Jens Axboe wrote:
>>> On 01/27/2017 09:52 AM, Bart Van Assche wrote:
>>> [  215.724452] general protection fault: 0000 [#1] SMP
>>> [  215.725060] Call Trace:
>>> [  215.725086]  scsi_disk_put+0x2d/0x40
>>> [  215.725110]  sd_release+0x3d/0xb0
>>> [  215.725137]  __blkdev_put+0x29e/0x360
>>> [  215.725163]  blkdev_put+0x49/0x170
>>> [  215.725192]  dm_put_table_device+0x58/0xc0 [dm_mod]
>>> [  215.725219]  dm_put_device+0x70/0xc0 [dm_mod]
>>> [  215.725269]  free_priority_group+0x92/0xc0 [dm_multipath]
>>> [  215.725295]  free_multipath+0x70/0xc0 [dm_multipath]
>>> [  215.725320]  multipath_dtr+0x19/0x20 [dm_multipath]
>>> [  215.725348]  dm_table_destroy+0x67/0x120 [dm_mod]
>>> [  215.725379]  dev_suspend+0xde/0x240 [dm_mod]
>>> [  215.725434]  ctl_ioctl+0x1f5/0x520 [dm_mod]
>>> [  215.725489]  dm_ctl_ioctl+0xe/0x20 [dm_mod]
>>> [  215.725515]  do_vfs_ioctl+0x8f/0x700
>>> [  215.725589]  SyS_ioctl+0x3c/0x70
>>> [  215.725614]  entry_SYSCALL_64_fastpath+0x18/0xad
>>> 
>> 
>> I have no idea what this is, I haven't messed with life time or devices
>> or queues at all in that branch.
> 
> Hello Jens,
> 
> Running the srp-test software against kernel 4.9.6 and kernel 4.10-rc5
> went fine. With your for-4.11/block branch (commit 400f73b23f457a) however
> I just ran into the following:
> 
> [  214.555527] ------------[ cut here ]------------
> [  214.555565] WARNING: CPU: 5 PID: 13201 at kernel/locking/lockdep.c:3514 lock_release+0x346/0x480
> [  214.555588] DEBUG_LOCKS_WARN_ON(depth <= 0)
> [  214.555824] CPU: 5 PID: 13201 Comm: fio Not tainted 4.10.0-rc3-dbg+ #1
> [  214.555846] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
> [  214.555867] Call Trace:
> [  214.555889]  dump_stack+0x68/0x93
> [  214.555911]  __warn+0xc6/0xe0
> [  214.555953]  warn_slowpath_fmt+0x4a/0x50
> [  214.555973]  lock_release+0x346/0x480
> [  214.556021]  aio_write+0x106/0x140
> [  214.556067]  do_io_submit+0x37d/0x900
> [  214.556108]  SyS_io_submit+0xb/0x10
> [  214.556131]  entry_SYSCALL_64_fastpath+0x18/0xad
> 
> I will continue to try to figure out what is causing this behavior.

That's a known bug in mainline. Pull it into 4.10-rc6,
or use my for-next where everything is already merged. 



^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-31  1:38                                                     ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-31  1:38 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: hch, axboe, linux-scsi, linux-raid, dm-devel, linux-block,
	snitzer, j-nomura



> On Jan 30, 2017, at 5:12 PM, Bart Van Assche <Bart.VanAssche@sandisk.com> w=
rote:
>=20
>> On Fri, 2017-01-27 at 09:56 -0700, Jens Axboe wrote:
>>> On 01/27/2017 09:52 AM, Bart Van Assche wrote:
>>> [  215.724452] general protection fault: 0000 [#1] SMP
>>> [  215.725060] Call Trace:
>>> [  215.725086]  scsi_disk_put+0x2d/0x40
>>> [  215.725110]  sd_release+0x3d/0xb0
>>> [  215.725137]  __blkdev_put+0x29e/0x360
>>> [  215.725163]  blkdev_put+0x49/0x170
>>> [  215.725192]  dm_put_table_device+0x58/0xc0 [dm_mod]
>>> [  215.725219]  dm_put_device+0x70/0xc0 [dm_mod]
>>> [  215.725269]  free_priority_group+0x92/0xc0 [dm_multipath]
>>> [  215.725295]  free_multipath+0x70/0xc0 [dm_multipath]
>>> [  215.725320]  multipath_dtr+0x19/0x20 [dm_multipath]
>>> [  215.725348]  dm_table_destroy+0x67/0x120 [dm_mod]
>>> [  215.725379]  dev_suspend+0xde/0x240 [dm_mod]
>>> [  215.725434]  ctl_ioctl+0x1f5/0x520 [dm_mod]
>>> [  215.725489]  dm_ctl_ioctl+0xe/0x20 [dm_mod]
>>> [  215.725515]  do_vfs_ioctl+0x8f/0x700
>>> [  215.725589]  SyS_ioctl+0x3c/0x70
>>> [  215.725614]  entry_SYSCALL_64_fastpath+0x18/0xad
>>>=20
>>=20
>> I have no idea what this is, I haven't messed with life time or devices
>> or queues at all in that branch.
>=20
> Hello Jens,
>=20
> Running the srp-test software against kernel 4.9.6 and kernel 4.10-rc5
> went fine. With your for-4.11/block branch (commit 400f73b23f457a) however=

> I just ran into the following:
>=20
> [  214.555527] ------------[ cut here ]------------
> [  214.555565] WARNING: CPU: 5 PID: 13201 at kernel/locking/lockdep.c:3514=
 lock_release+0x346/0x480
> [  214.555588] DEBUG_LOCKS_WARN_ON(depth <=3D 0)
> [  214.555824] CPU: 5 PID: 13201 Comm: fio Not tainted 4.10.0-rc3-dbg+ #1
> [  214.555846] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 1=
1/17/2014
> [  214.555867] Call Trace:
> [  214.555889]  dump_stack+0x68/0x93
> [  214.555911]  __warn+0xc6/0xe0
> [  214.555953]  warn_slowpath_fmt+0x4a/0x50
> [  214.555973]  lock_release+0x346/0x480
> [  214.556021]  aio_write+0x106/0x140
> [  214.556067]  do_io_submit+0x37d/0x900
> [  214.556108]  SyS_io_submit+0xb/0x10
> [  214.556131]  entry_SYSCALL_64_fastpath+0x18/0xad
>=20
> I will continue to try to figure out what is causing this behavior.

That's a known bug in mainline. Pull it into 4.10-rc6,
or use my for-next where everything is already merged.=20

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-01-31  1:38                                                     ` Jens Axboe
@ 2017-01-31  4:13                                                       ` Jens Axboe
  -1 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-31  4:13 UTC (permalink / raw)
  To: Jens Axboe, Bart Van Assche
  Cc: linux-block, linux-scsi, snitzer, linux-raid, dm-devel, j-nomura, hch

On 01/30/2017 05:38 PM, Jens Axboe wrote:
> 
> 
>> On Jan 30, 2017, at 5:12 PM, Bart Van Assche <Bart.VanAssche@sandisk.com> wrote:
>>
>>> On Fri, 2017-01-27 at 09:56 -0700, Jens Axboe wrote:
>>>> On 01/27/2017 09:52 AM, Bart Van Assche wrote:
>>>> [  215.724452] general protection fault: 0000 [#1] SMP
>>>> [  215.725060] Call Trace:
>>>> [  215.725086]  scsi_disk_put+0x2d/0x40
>>>> [  215.725110]  sd_release+0x3d/0xb0
>>>> [  215.725137]  __blkdev_put+0x29e/0x360
>>>> [  215.725163]  blkdev_put+0x49/0x170
>>>> [  215.725192]  dm_put_table_device+0x58/0xc0 [dm_mod]
>>>> [  215.725219]  dm_put_device+0x70/0xc0 [dm_mod]
>>>> [  215.725269]  free_priority_group+0x92/0xc0 [dm_multipath]
>>>> [  215.725295]  free_multipath+0x70/0xc0 [dm_multipath]
>>>> [  215.725320]  multipath_dtr+0x19/0x20 [dm_multipath]
>>>> [  215.725348]  dm_table_destroy+0x67/0x120 [dm_mod]
>>>> [  215.725379]  dev_suspend+0xde/0x240 [dm_mod]
>>>> [  215.725434]  ctl_ioctl+0x1f5/0x520 [dm_mod]
>>>> [  215.725489]  dm_ctl_ioctl+0xe/0x20 [dm_mod]
>>>> [  215.725515]  do_vfs_ioctl+0x8f/0x700
>>>> [  215.725589]  SyS_ioctl+0x3c/0x70
>>>> [  215.725614]  entry_SYSCALL_64_fastpath+0x18/0xad
>>>>
>>>
>>> I have no idea what this is, I haven't messed with life time or devices
>>> or queues at all in that branch.
>>
>> Hello Jens,
>>
>> Running the srp-test software against kernel 4.9.6 and kernel 4.10-rc5
>> went fine. With your for-4.11/block branch (commit 400f73b23f457a) however
>> I just ran into the following:
>>
>> [  214.555527] ------------[ cut here ]------------
>> [  214.555565] WARNING: CPU: 5 PID: 13201 at kernel/locking/lockdep.c:3514 lock_release+0x346/0x480
>> [  214.555588] DEBUG_LOCKS_WARN_ON(depth <= 0)
>> [  214.555824] CPU: 5 PID: 13201 Comm: fio Not tainted 4.10.0-rc3-dbg+ #1
>> [  214.555846] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
>> [  214.555867] Call Trace:
>> [  214.555889]  dump_stack+0x68/0x93
>> [  214.555911]  __warn+0xc6/0xe0
>> [  214.555953]  warn_slowpath_fmt+0x4a/0x50
>> [  214.555973]  lock_release+0x346/0x480
>> [  214.556021]  aio_write+0x106/0x140
>> [  214.556067]  do_io_submit+0x37d/0x900
>> [  214.556108]  SyS_io_submit+0xb/0x10
>> [  214.556131]  entry_SYSCALL_64_fastpath+0x18/0xad
>>
>> I will continue to try to figure out what is causing this behavior.
> 
> That's a known bug in mainline. Pull it into 4.10-rc6,
> or use my for-next where everything is already merged.

Since I'm not on the phone anymore, this is the commit that was
merged after my for-4.11/block was forked, which fixes this issue:

commit a12f1ae61c489076a9aeb90bddca7722bf330df3
Author: Shaohua Li <shli@fb.com>
Date:   Tue Dec 13 12:09:56 2016 -0800

    aio: fix lock dep warning

So you can just pull that in, if you want, or do what I suggested above.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-31  4:13                                                       ` Jens Axboe
  0 siblings, 0 replies; 172+ messages in thread
From: Jens Axboe @ 2017-01-31  4:13 UTC (permalink / raw)
  To: Jens Axboe, Bart Van Assche
  Cc: hch, linux-scsi, linux-raid, dm-devel, linux-block, snitzer, j-nomura

On 01/30/2017 05:38 PM, Jens Axboe wrote:
> 
> 
>> On Jan 30, 2017, at 5:12 PM, Bart Van Assche <Bart.VanAssche@sandisk.com> wrote:
>>
>>> On Fri, 2017-01-27 at 09:56 -0700, Jens Axboe wrote:
>>>> On 01/27/2017 09:52 AM, Bart Van Assche wrote:
>>>> [  215.724452] general protection fault: 0000 [#1] SMP
>>>> [  215.725060] Call Trace:
>>>> [  215.725086]  scsi_disk_put+0x2d/0x40
>>>> [  215.725110]  sd_release+0x3d/0xb0
>>>> [  215.725137]  __blkdev_put+0x29e/0x360
>>>> [  215.725163]  blkdev_put+0x49/0x170
>>>> [  215.725192]  dm_put_table_device+0x58/0xc0 [dm_mod]
>>>> [  215.725219]  dm_put_device+0x70/0xc0 [dm_mod]
>>>> [  215.725269]  free_priority_group+0x92/0xc0 [dm_multipath]
>>>> [  215.725295]  free_multipath+0x70/0xc0 [dm_multipath]
>>>> [  215.725320]  multipath_dtr+0x19/0x20 [dm_multipath]
>>>> [  215.725348]  dm_table_destroy+0x67/0x120 [dm_mod]
>>>> [  215.725379]  dev_suspend+0xde/0x240 [dm_mod]
>>>> [  215.725434]  ctl_ioctl+0x1f5/0x520 [dm_mod]
>>>> [  215.725489]  dm_ctl_ioctl+0xe/0x20 [dm_mod]
>>>> [  215.725515]  do_vfs_ioctl+0x8f/0x700
>>>> [  215.725589]  SyS_ioctl+0x3c/0x70
>>>> [  215.725614]  entry_SYSCALL_64_fastpath+0x18/0xad
>>>>
>>>
>>> I have no idea what this is, I haven't messed with life time or devices
>>> or queues at all in that branch.
>>
>> Hello Jens,
>>
>> Running the srp-test software against kernel 4.9.6 and kernel 4.10-rc5
>> went fine. With your for-4.11/block branch (commit 400f73b23f457a) however
>> I just ran into the following:
>>
>> [  214.555527] ------------[ cut here ]------------
>> [  214.555565] WARNING: CPU: 5 PID: 13201 at kernel/locking/lockdep.c:3514 lock_release+0x346/0x480
>> [  214.555588] DEBUG_LOCKS_WARN_ON(depth <= 0)
>> [  214.555824] CPU: 5 PID: 13201 Comm: fio Not tainted 4.10.0-rc3-dbg+ #1
>> [  214.555846] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
>> [  214.555867] Call Trace:
>> [  214.555889]  dump_stack+0x68/0x93
>> [  214.555911]  __warn+0xc6/0xe0
>> [  214.555953]  warn_slowpath_fmt+0x4a/0x50
>> [  214.555973]  lock_release+0x346/0x480
>> [  214.556021]  aio_write+0x106/0x140
>> [  214.556067]  do_io_submit+0x37d/0x900
>> [  214.556108]  SyS_io_submit+0xb/0x10
>> [  214.556131]  entry_SYSCALL_64_fastpath+0x18/0xad
>>
>> I will continue to try to figure out what is causing this behavior.
> 
> That's a known bug in mainline. Pull it into 4.10-rc6,
> or use my for-next where everything is already merged.

Since I'm not on the phone anymore, this is the commit that was
merged after my for-4.11/block was forked, which fixes this issue:

commit a12f1ae61c489076a9aeb90bddca7722bf330df3
Author: Shaohua Li <shli@fb.com>
Date:   Tue Dec 13 12:09:56 2016 -0800

    aio: fix lock dep warning

So you can just pull that in, if you want, or do what I suggested above.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-31  1:38                                                     ` Jens Axboe
@ 2017-01-31 21:35                                                       ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-31 21:35 UTC (permalink / raw)
  To: axboe
  Cc: linux-block, linux-raid, snitzer, hch, linux-scsi, axboe,
	j-nomura, dm-devel

On Mon, 2017-01-30 at 17:38 -0800, Jens Axboe wrote:
> That's a known bug in mainline. Pull it into 4.10-rc6,
> or use my for-next where everything is already merged. 

Hello Jens,

With your for-next branch (commit c2e60b3a2602) I haven't hit any block
layer crashes so far. The only issue I encountered that is new is a
memory leak triggered by the SG-IO code. These memory leak reports
started to appear after I started testing the mq-deadline scheduler.
kmemleak reported the following call stack multiple times after my tests
had finished:

unreferenced object 0xffff88041119e528 (size 192):
  comm "multipathd", pid 2353, jiffies 4295128020 (age 1332.440s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 12 01 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff8165e3b5>] kmemleak_alloc+0x45/0xa0
    [<ffffffff811cc23d>] __kmalloc+0x15d/0x2f0
    [<ffffffff81310e35>] bio_alloc_bioset+0x185/0x1f0
    [<ffffffff813117f4>] bio_map_user_iov+0x124/0x400
    [<ffffffff81320b7a>] blk_rq_map_user_iov+0x11a/0x210
    [<ffffffff81320cbd>] blk_rq_map_user+0x4d/0x60
    [<ffffffff81336694>] sg_io+0x3d4/0x410
    [<ffffffff813369d0>] scsi_cmd_ioctl+0x300/0x490
    [<ffffffff81336b9d>] scsi_cmd_blk_ioctl+0x3d/0x50
    [<ffffffff814b4360>] sd_ioctl+0x80/0x100
    [<ffffffff8132ddde>] blkdev_ioctl+0x51e/0x9f0
    [<ffffffff8122f388>] block_ioctl+0x38/0x40
    [<ffffffff8120097f>] do_vfs_ioctl+0x8f/0x700
    [<ffffffff8120102c>] SyS_ioctl+0x3c/0x70
    [<ffffffff8166c4aa>] entry_SYSCALL_64_fastpath+0x18/0xad

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-31 21:35                                                       ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-31 21:35 UTC (permalink / raw)
  To: axboe
  Cc: linux-block, linux-raid, snitzer, hch, linux-scsi, axboe,
	j-nomura, dm-devel

On Mon, 2017-01-30 at 17:38 -0800, Jens Axboe wrote:
> That's a known bug in mainline. Pull it into 4.10-rc6,
> or use my for-next where everything is already merged.=20

Hello Jens,

With your for-next branch (commit c2e60b3a2602) I haven't hit any block
layer crashes so far. The only issue I encountered that is new is a
memory leak triggered by the SG-IO code. These memory leak reports
started to appear after I started testing the mq-deadline scheduler.
kmemleak reported the following call stack multiple times after my tests
had finished:

unreferenced object 0xffff88041119e528 (size 192):
=A0 comm "multipathd", pid 2353, jiffies 4295128020 (age 1332.440s)
=A0 hex dump (first 32 bytes):
=A0=A0=A0=A000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00=A0=A0..........=
......
=A0=A0=A0=A000 00 00 00 00 00 00 00 12 01 00 00 00 00 00 00=A0=A0..........=
......
=A0 backtrace:
=A0=A0=A0=A0[<ffffffff8165e3b5>] kmemleak_alloc+0x45/0xa0
=A0=A0=A0=A0[<ffffffff811cc23d>] __kmalloc+0x15d/0x2f0
=A0=A0=A0=A0[<ffffffff81310e35>] bio_alloc_bioset+0x185/0x1f0
=A0=A0=A0=A0[<ffffffff813117f4>] bio_map_user_iov+0x124/0x400
=A0=A0=A0=A0[<ffffffff81320b7a>] blk_rq_map_user_iov+0x11a/0x210
=A0=A0=A0=A0[<ffffffff81320cbd>] blk_rq_map_user+0x4d/0x60
=A0=A0=A0=A0[<ffffffff81336694>] sg_io+0x3d4/0x410
=A0=A0=A0=A0[<ffffffff813369d0>] scsi_cmd_ioctl+0x300/0x490
=A0=A0=A0=A0[<ffffffff81336b9d>] scsi_cmd_blk_ioctl+0x3d/0x50
=A0=A0=A0=A0[<ffffffff814b4360>] sd_ioctl+0x80/0x100
=A0=A0=A0=A0[<ffffffff8132ddde>] blkdev_ioctl+0x51e/0x9f0
=A0=A0=A0=A0[<ffffffff8122f388>] block_ioctl+0x38/0x40
=A0=A0=A0=A0[<ffffffff8120097f>] do_vfs_ioctl+0x8f/0x700
=A0=A0=A0=A0[<ffffffff8120102c>] SyS_ioctl+0x3c/0x70
=A0=A0=A0=A0[<ffffffff8166c4aa>] entry_SYSCALL_64_fastpath+0x18/0xad

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-31 21:35                                                       ` Bart Van Assche
@ 2017-01-31 21:55                                                         ` Bart Van Assche
  -1 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-31 21:55 UTC (permalink / raw)
  To: axboe
  Cc: linux-block, linux-raid, snitzer, hch, linux-scsi, axboe,
	j-nomura, dm-devel

On Tue, 2017-01-31 at 13:34 -0800, Bart Van Assche wrote:
> On Mon, 2017-01-30 at 17:38 -0800, Jens Axboe wrote:
> > That's a known bug in mainline. Pull it into 4.10-rc6,
> > or use my for-next where everything is already merged. 
> 
> Hello Jens,
> 
> With your for-next branch (commit c2e60b3a2602) I haven't hit any block
> layer crashes so far. The only issue I encountered that is new is a
> memory leak triggered by the SG-IO code. These memory leak reports
> started to appear after I started testing the mq-deadline scheduler.
> kmemleak reported the following call stack multiple times after my tests
> had finished:
> 
> unreferenced object 0xffff88041119e528 (size 192):
>   comm "multipathd", pid 2353, jiffies 4295128020 (age 1332.440s)
>   hex dump (first 32 bytes):
>     00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>     00 00 00 00 00 00 00 00 12 01 00 00 00 00 00 00  ................
>   backtrace:
>     [<ffffffff8165e3b5>] kmemleak_alloc+0x45/0xa0
>     [<ffffffff811cc23d>] __kmalloc+0x15d/0x2f0
>     [<ffffffff81310e35>] bio_alloc_bioset+0x185/0x1f0
>     [<ffffffff813117f4>] bio_map_user_iov+0x124/0x400
>     [<ffffffff81320b7a>] blk_rq_map_user_iov+0x11a/0x210
>     [<ffffffff81320cbd>] blk_rq_map_user+0x4d/0x60
>     [<ffffffff81336694>] sg_io+0x3d4/0x410
>     [<ffffffff813369d0>] scsi_cmd_ioctl+0x300/0x490
>     [<ffffffff81336b9d>] scsi_cmd_blk_ioctl+0x3d/0x50
>     [<ffffffff814b4360>] sd_ioctl+0x80/0x100
>     [<ffffffff8132ddde>] blkdev_ioctl+0x51e/0x9f0
>     [<ffffffff8122f388>] block_ioctl+0x38/0x40
>     [<ffffffff8120097f>] do_vfs_ioctl+0x8f/0x700
>     [<ffffffff8120102c>] SyS_ioctl+0x3c/0x70
>     [<ffffffff8166c4aa>] entry_SYSCALL_64_fastpath+0x18/0xad

After I repeated my test the above findings were confirmed: no memory leaks
were reported by kmemleak after a test with I/O scheduler "none" and the
above call stack was reported 44 times by kmemleak after a test with I/O
scheduler "mq-deadline".

Bart.

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
@ 2017-01-31 21:55                                                         ` Bart Van Assche
  0 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-01-31 21:55 UTC (permalink / raw)
  To: axboe
  Cc: linux-block, linux-raid, snitzer, hch, linux-scsi, axboe,
	j-nomura, dm-devel

On Tue, 2017-01-31 at 13:34 -0800, Bart Van Assche wrote:
> On Mon, 2017-01-30 at 17:38 -0800, Jens Axboe wrote:
> > That's a known bug in mainline. Pull it into 4.10-rc6,
> > or use my for-next where everything is already merged.=20
>=20
> Hello Jens,
>=20
> With your for-next branch (commit c2e60b3a2602) I haven't hit any block
> layer crashes so far. The only issue I encountered that is new is a
> memory leak triggered by the SG-IO code. These memory leak reports
> started to appear after I started testing the mq-deadline scheduler.
> kmemleak reported the following call stack multiple times after my tests
> had finished:
>=20
> unreferenced object 0xffff88041119e528 (size 192):
> =A0 comm "multipathd", pid 2353, jiffies 4295128020 (age 1332.440s)
> =A0 hex dump (first 32 bytes):
> =A0=A0=A0=A000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00=A0=A0........=
........
> =A0=A0=A0=A000 00 00 00 00 00 00 00 12 01 00 00 00 00 00 00=A0=A0........=
........
> =A0 backtrace:
> =A0=A0=A0=A0[<ffffffff8165e3b5>] kmemleak_alloc+0x45/0xa0
> =A0=A0=A0=A0[<ffffffff811cc23d>] __kmalloc+0x15d/0x2f0
> =A0=A0=A0=A0[<ffffffff81310e35>] bio_alloc_bioset+0x185/0x1f0
> =A0=A0=A0=A0[<ffffffff813117f4>] bio_map_user_iov+0x124/0x400
> =A0=A0=A0=A0[<ffffffff81320b7a>] blk_rq_map_user_iov+0x11a/0x210
> =A0=A0=A0=A0[<ffffffff81320cbd>] blk_rq_map_user+0x4d/0x60
> =A0=A0=A0=A0[<ffffffff81336694>] sg_io+0x3d4/0x410
> =A0=A0=A0=A0[<ffffffff813369d0>] scsi_cmd_ioctl+0x300/0x490
> =A0=A0=A0=A0[<ffffffff81336b9d>] scsi_cmd_blk_ioctl+0x3d/0x50
> =A0=A0=A0=A0[<ffffffff814b4360>] sd_ioctl+0x80/0x100
> =A0=A0=A0=A0[<ffffffff8132ddde>] blkdev_ioctl+0x51e/0x9f0
> =A0=A0=A0=A0[<ffffffff8122f388>] block_ioctl+0x38/0x40
> =A0=A0=A0=A0[<ffffffff8120097f>] do_vfs_ioctl+0x8f/0x700
> =A0=A0=A0=A0[<ffffffff8120102c>] SyS_ioctl+0x3c/0x70
> =A0=A0=A0=A0[<ffffffff8166c4aa>] entry_SYSCALL_64_fastpath+0x18/0xad

After I repeated my test the above findings were confirmed: no memory leaks
were reported by kmemleak after a test with I/O scheduler "none" and the
above call stack was reported 44 times by kmemleak after a test with I/O
scheduler "mq-deadline".

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-31 21:55                                                         ` Bart Van Assche
  (?)
@ 2017-01-31 21:58                                                         ` Jens Axboe
  2017-02-01  1:01                                                           ` Bart Van Assche
  -1 siblings, 1 reply; 172+ messages in thread
From: Jens Axboe @ 2017-01-31 21:58 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-block, linux-raid, snitzer, hch, linux-scsi, axboe,
	j-nomura, dm-devel

On 01/31/2017 01:55 PM, Bart Van Assche wrote:
> On Tue, 2017-01-31 at 13:34 -0800, Bart Van Assche wrote:
>> On Mon, 2017-01-30 at 17:38 -0800, Jens Axboe wrote:
>>> That's a known bug in mainline. Pull it into 4.10-rc6,
>>> or use my for-next where everything is already merged. 
>>
>> Hello Jens,
>>
>> With your for-next branch (commit c2e60b3a2602) I haven't hit any block
>> layer crashes so far. The only issue I encountered that is new is a
>> memory leak triggered by the SG-IO code. These memory leak reports
>> started to appear after I started testing the mq-deadline scheduler.
>> kmemleak reported the following call stack multiple times after my tests
>> had finished:
>>
>> unreferenced object 0xffff88041119e528 (size 192):
>>   comm "multipathd", pid 2353, jiffies 4295128020 (age 1332.440s)
>>   hex dump (first 32 bytes):
>>     00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>>     00 00 00 00 00 00 00 00 12 01 00 00 00 00 00 00  ................
>>   backtrace:
>>     [<ffffffff8165e3b5>] kmemleak_alloc+0x45/0xa0
>>     [<ffffffff811cc23d>] __kmalloc+0x15d/0x2f0
>>     [<ffffffff81310e35>] bio_alloc_bioset+0x185/0x1f0
>>     [<ffffffff813117f4>] bio_map_user_iov+0x124/0x400
>>     [<ffffffff81320b7a>] blk_rq_map_user_iov+0x11a/0x210
>>     [<ffffffff81320cbd>] blk_rq_map_user+0x4d/0x60
>>     [<ffffffff81336694>] sg_io+0x3d4/0x410
>>     [<ffffffff813369d0>] scsi_cmd_ioctl+0x300/0x490
>>     [<ffffffff81336b9d>] scsi_cmd_blk_ioctl+0x3d/0x50
>>     [<ffffffff814b4360>] sd_ioctl+0x80/0x100
>>     [<ffffffff8132ddde>] blkdev_ioctl+0x51e/0x9f0
>>     [<ffffffff8122f388>] block_ioctl+0x38/0x40
>>     [<ffffffff8120097f>] do_vfs_ioctl+0x8f/0x700
>>     [<ffffffff8120102c>] SyS_ioctl+0x3c/0x70
>>     [<ffffffff8166c4aa>] entry_SYSCALL_64_fastpath+0x18/0xad
> 
> After I repeated my test the above findings were confirmed: no memory leaks
> were reported by kmemleak after a test with I/O scheduler "none" and the
> above call stack was reported 44 times by kmemleak after a test with I/O
> scheduler "mq-deadline".

Interesting, I'll check this. Doesn't make any sense why the scheduler
would be implicated in that, given how we run completions now. But if
it complains, then something must be up.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-01-31 21:58                                                         ` Jens Axboe
@ 2017-02-01  1:01                                                           ` Bart Van Assche
  2017-02-01  6:38                                                             ` Jens Axboe
  0 siblings, 1 reply; 172+ messages in thread
From: Bart Van Assche @ 2017-02-01  1:01 UTC (permalink / raw)
  To: axboe; +Cc: linux-block

On Tue, 2017-01-31 at 13:58 -0800, Jens Axboe wrote:
> Interesting, I'll check this. Doesn't make any sense why the scheduler
> would be implicated in that, given how we run completions now. But if
> it complains, then something must be up.

(reduced CC-list)

There is another issue that needs further attention and for which I have
not yet had the time to root-cause it: several processes became unkillable
while I was running srp-test (with scheduler "none"). Apparently the dm
queues got stuck in state BLK_MQ_S_SCHED_RESTART:

# grep -aH '' dm*/*/0/{state,dispatch,*/rq_list}
dm-0/mq/0/state:0x4
dm-1/mq/0/state:0x4
dm-0/mq/0/dispatch:ffff880386d1ca00 {.cmd_type=3D1, .cmd_flags=3D0x8801, .r=
q_flags=3D0x2000, .tag=3D1637, .internal_tag=3D-1}
dm-1/mq/0/dispatch:ffff880388ee5400 {.cmd_type=3D1, .cmd_flags=3D0x4000, .r=
q_flags=3D0x2000, .tag=3D42, .internal_tag=3D-1}
dm-1/mq/0/dispatch:ffff880388ee5600 {.cmd_type=3D1, .cmd_flags=3D0x4000, .r=
q_flags=3D0x2000, .tag=3D43, .internal_tag=3D-1}
dm-1/mq/0/dispatch:ffff880388ee5800 {.cmd_type=3D1, .cmd_flags=3D0x4000, .r=
q_flags=3D0x2000, .tag=3D44, .internal_tag=3D-1}
dm-1/mq/0/dispatch:ffff880388ee5a00 {.cmd_type=3D1, .cmd_flags=3D0x0, .rq_f=
lags=3D0x2000, .tag=3D45, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1cc00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1638, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1ce00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1639, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1d000 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1640, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1d200 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1641, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1b600 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1627, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1b800 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1628, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1d400 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1642, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1bc00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1630, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1ae00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1623, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1c200 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1633, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1d600 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1643, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1d800 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1644, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1c400 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1634, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1a400 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1618, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1c600 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1635, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1c800 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1636, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1da00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1645, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1dc00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1646, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1de00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1647, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1e000 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1648, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1e200 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1649, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1e400 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1650, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1e600 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1651, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1e800 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1652, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1ea00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1653, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1ec00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1654, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1ee00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1655, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1f000 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1656, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1f200 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1657, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1f400 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1658, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1f600 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1659, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1f800 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1660, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1fa00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1661, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1fc00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1662, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1fe00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1663, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d18000 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1600, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d18200 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1601, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d18400 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1602, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d18600 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1603, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d18800 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1604, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d18a00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1605, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d18c00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1606, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d18e00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1607, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d19000 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1608, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d19200 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1609, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d19400 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1610, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d19600 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1611, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d19800 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1612, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d19a00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1613, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d19c00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1614, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d19e00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1615, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1a000 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1616, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1a200 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1617, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1a600 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1619, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1a800 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1620, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1aa00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1621, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1ac00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1622, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1b000 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1624, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1b200 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1625, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1b400 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1626, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1ba00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1629, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1be00 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1631, .internal_tag=3D-1}
dm-0/mq/0/cpu1/rq_list:ffff880386d1c000 {.cmd_type=3D1, .cmd_flags=3D0x8801=
, .rq_flags=3D0x2000, .tag=3D1632, .internal_tag=3D-1}
dm-0/mq/0/cpu5/rq_list:ffff880385b97e00 {.cmd_type=3D1, .cmd_flags=3D0x0, .=
rq_flags=3D0x2000, .tag=3D447, .internal_tag=3D-1}

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-02-01  1:01                                                           ` Bart Van Assche
@ 2017-02-01  6:38                                                             ` Jens Axboe
  2017-02-01 16:46                                                               ` Bart Van Assche
  0 siblings, 1 reply; 172+ messages in thread
From: Jens Axboe @ 2017-02-01  6:38 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: linux-block

On 01/31/2017 05:01 PM, Bart Van Assche wrote:
> On Tue, 2017-01-31 at 13:58 -0800, Jens Axboe wrote:
>> Interesting, I'll check this. Doesn't make any sense why the scheduler
>> would be implicated in that, given how we run completions now. But if
>> it complains, then something must be up.
> 
> (reduced CC-list)
> 
> There is another issue that needs further attention and for which I have
> not yet had the time to root-cause it: several processes became unkillable
> while I was running srp-test (with scheduler "none"). Apparently the dm
> queues got stuck in state BLK_MQ_S_SCHED_RESTART:
> 
> # grep -aH '' dm*/*/0/{state,dispatch,*/rq_list}
> dm-0/mq/0/state:0x4
> dm-1/mq/0/state:0x4
> dm-0/mq/0/dispatch:ffff880386d1ca00 {.cmd_type=1, .cmd_flags=0x8801, .rq_flags=0x2000, .tag=1637, .internal_tag=-1}
> dm-1/mq/0/dispatch:ffff880388ee5400 {.cmd_type=1, .cmd_flags=0x4000, .rq_flags=0x2000, .tag=42, .internal_tag=-1}
> dm-1/mq/0/dispatch:ffff880388ee5600 {.cmd_type=1, .cmd_flags=0x4000, .rq_flags=0x2000, .tag=43, .internal_tag=-1}

I think this patch:

http://git.kernel.dk/cgit/linux-block/commit/?h=for-4.11/block&id=12d70958a2e8d587acaa51dafd5d6620e00b7543

should fix it for you. I just ran into the same thing tonight, testing
an unrelated thing. It's the only reason that state should be 0x4 for
you, so it has the same finger print.

The patch has been merged into for-next.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-02-01  6:38                                                             ` Jens Axboe
@ 2017-02-01 16:46                                                               ` Bart Van Assche
  2017-02-01 17:13                                                                 ` Jens Axboe
  0 siblings, 1 reply; 172+ messages in thread
From: Bart Van Assche @ 2017-02-01 16:46 UTC (permalink / raw)
  To: axboe; +Cc: linux-block

On Tue, 2017-01-31 at 22:38 -0800, Jens Axboe wrote:
> I think this patch:
>=20
> http://git.kernel.dk/cgit/linux-block/commit/?h=3Dfor-4.11/block&id=3D12d=
70958a2e8d587acaa51dafd5d6620e00b7543
>=20
> should fix it for you. I just ran into the same thing tonight, testing
> an unrelated thing. It's the only reason that state should be 0x4 for
> you, so it has the same finger print.
>=20
> The patch has been merged into for-next.

Hello Jens,

Thanks for having looked into this. However, after having pulled the latest
block for-next tree=A0(dbb85b06229f) another lockup was triggered soon (02-=
sq
is the name of a shell script of the srp-test suite):

[  243.021265] sysrq: SysRq : Show Blocked State
[  243.021301]   task                        PC stack   pid father
[  243.022909] 02-sq           D    0 10864  10509 0x00000000
[  243.022933] Call Trace:
[  243.022956]  __schedule+0x2da/0xb00
[  243.022979]  schedule+0x38/0x90
[  243.023002]  blk_mq_freeze_queue_wait+0x51/0xa0
[  243.023025]  ? remove_wait_queue+0x70/0x70
[  243.023047]  blk_mq_freeze_queue+0x15/0x20
[  243.023070]  elevator_switch+0x24/0x220
[  243.023093]  __elevator_change+0xd3/0x110
[  243.023115]  elv_iosched_store+0x21/0x60
[  243.023140]  queue_attr_store+0x54/0x90
[  243.023164]  sysfs_kf_write+0x40/0x50
[  243.023188]  kernfs_fop_write+0x137/0x1c0
[  243.023214]  __vfs_write+0x23/0x140
[  243.023242]  ? rcu_read_lock_sched_held+0x45/0x80
[  243.023265]  ? rcu_sync_lockdep_assert+0x2a/0x50
[  243.023287]  ? __sb_start_write+0xde/0x200
[  243.023308]  ? vfs_write+0x190/0x1e0
[  243.023329]  vfs_write+0xc3/0x1e0
[  243.023351]  SyS_write+0x44/0xa0
[  243.023373]  entry_SYSCALL_64_fastpath+0x18/0xad

My attempt to query the state of the blk-mq queues triggered the
following hang:

[  243.023555] grep            D    0 11010  11008 0x00000000
[  243.023578] Call Trace:
[  243.023599]  __schedule+0x2da/0xb00
[  243.023619]  schedule+0x38/0x90
[  243.023640]  schedule_preempt_disabled+0x10/0x20
[  243.023662]  mutex_lock_nested+0x23a/0x650
[  243.023683]  ? hctx_tags_show+0x2c/0x60
[  243.023703]  hctx_tags_show+0x2c/0x60
[  243.023725]  seq_read+0xf2/0x3d0
[  243.023746]  ? full_proxy_poll+0xb0/0xb0
[  243.023776]  full_proxy_read+0x83/0xb0
[  243.023798]  ? full_proxy_poll+0xb0/0xb0
[  243.023821]  __vfs_read+0x23/0x130
[  243.023843]  vfs_read+0xa3/0x170
[  243.023865]  SyS_read+0x44/0xa0
[  243.023888]  entry_SYSCALL_64_fastpath+0x18/0xad

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-02-01 16:46                                                               ` Bart Van Assche
@ 2017-02-01 17:13                                                                 ` Jens Axboe
  2017-02-01 17:28                                                                   ` Bart Van Assche
                                                                                     ` (2 more replies)
  0 siblings, 3 replies; 172+ messages in thread
From: Jens Axboe @ 2017-02-01 17:13 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: linux-block

On 02/01/2017 08:46 AM, Bart Van Assche wrote:
> On Tue, 2017-01-31 at 22:38 -0800, Jens Axboe wrote:
>> I think this patch:
>>
>> http://git.kernel.dk/cgit/linux-block/commit/?h=for-4.11/block&id=12d70958a2e8d587acaa51dafd5d6620e00b7543
>>
>> should fix it for you. I just ran into the same thing tonight, testing
>> an unrelated thing. It's the only reason that state should be 0x4 for
>> you, so it has the same finger print.
>>
>> The patch has been merged into for-next.
> 
> Hello Jens,
> 
> Thanks for having looked into this. However, after having pulled the latest
> block for-next tree (dbb85b06229f) another lockup was triggered soon (02-sq
> is the name of a shell script of the srp-test suite):
> 
> [  243.021265] sysrq: SysRq : Show Blocked State
> [  243.021301]   task                        PC stack   pid father
> [  243.022909] 02-sq           D    0 10864  10509 0x00000000
> [  243.022933] Call Trace:
> [  243.022956]  __schedule+0x2da/0xb00
> [  243.022979]  schedule+0x38/0x90
> [  243.023002]  blk_mq_freeze_queue_wait+0x51/0xa0
> [  243.023025]  ? remove_wait_queue+0x70/0x70
> [  243.023047]  blk_mq_freeze_queue+0x15/0x20
> [  243.023070]  elevator_switch+0x24/0x220
> [  243.023093]  __elevator_change+0xd3/0x110
> [  243.023115]  elv_iosched_store+0x21/0x60
> [  243.023140]  queue_attr_store+0x54/0x90
> [  243.023164]  sysfs_kf_write+0x40/0x50
> [  243.023188]  kernfs_fop_write+0x137/0x1c0
> [  243.023214]  __vfs_write+0x23/0x140
> [  243.023242]  ? rcu_read_lock_sched_held+0x45/0x80
> [  243.023265]  ? rcu_sync_lockdep_assert+0x2a/0x50
> [  243.023287]  ? __sb_start_write+0xde/0x200
> [  243.023308]  ? vfs_write+0x190/0x1e0
> [  243.023329]  vfs_write+0xc3/0x1e0
> [  243.023351]  SyS_write+0x44/0xa0
> [  243.023373]  entry_SYSCALL_64_fastpath+0x18/0xad

So that's changing the elevator - did this happen while heavy IO was
going to the drive, or was it idle?

> My attempt to query the state of the blk-mq queues triggered the
> following hang:
> 
> [  243.023555] grep            D    0 11010  11008 0x00000000
> [  243.023578] Call Trace:
> [  243.023599]  __schedule+0x2da/0xb00
> [  243.023619]  schedule+0x38/0x90
> [  243.023640]  schedule_preempt_disabled+0x10/0x20
> [  243.023662]  mutex_lock_nested+0x23a/0x650
> [  243.023683]  ? hctx_tags_show+0x2c/0x60
> [  243.023703]  hctx_tags_show+0x2c/0x60
> [  243.023725]  seq_read+0xf2/0x3d0
> [  243.023746]  ? full_proxy_poll+0xb0/0xb0
> [  243.023776]  full_proxy_read+0x83/0xb0
> [  243.023798]  ? full_proxy_poll+0xb0/0xb0
> [  243.023821]  __vfs_read+0x23/0x130
> [  243.023843]  vfs_read+0xa3/0x170
> [  243.023865]  SyS_read+0x44/0xa0
> [  243.023888]  entry_SYSCALL_64_fastpath+0x18/0xad

That's because the previous elevator switch is stalled in sysfs, and we
grab the queue sysfs lock for any of the show/store functions.  So if
one hangs, all of them will...

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-02-01 17:13                                                                 ` Jens Axboe
@ 2017-02-01 17:28                                                                   ` Bart Van Assche
  2017-02-01 19:21                                                                   ` Bart Van Assche
  2017-02-01 22:01                                                                   ` Bart Van Assche
  2 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-02-01 17:28 UTC (permalink / raw)
  To: axboe; +Cc: linux-block

On Wed, 2017-02-01 at 09:13 -0800, Jens Axboe wrote:
> On 02/01/2017 08:46 AM, Bart Van Assche wrote:
> > Thanks for having looked into this. However, after having pulled the la=
test
> > block for-next tree (dbb85b06229f) another lockup was triggered soon (0=
2-sq
> > is the name of a shell script of the srp-test suite):
> >=20
> > [  243.021265] sysrq: SysRq : Show Blocked State
> > [  243.021301]   task                        PC stack   pid father
> > [  243.022909] 02-sq           D    0 10864  10509 0x00000000
> > [  243.022933] Call Trace:
> > [  243.022956]  __schedule+0x2da/0xb00
> > [  243.022979]  schedule+0x38/0x90
> > [  243.023002]  blk_mq_freeze_queue_wait+0x51/0xa0
> > [  243.023025]  ? remove_wait_queue+0x70/0x70
> > [  243.023047]  blk_mq_freeze_queue+0x15/0x20
> > [  243.023070]  elevator_switch+0x24/0x220
> > [  243.023093]  __elevator_change+0xd3/0x110
> > [  243.023115]  elv_iosched_store+0x21/0x60
> > [  243.023140]  queue_attr_store+0x54/0x90
> > [  243.023164]  sysfs_kf_write+0x40/0x50
> > [  243.023188]  kernfs_fop_write+0x137/0x1c0
> > [  243.023214]  __vfs_write+0x23/0x140
> > [  243.023242]  ? rcu_read_lock_sched_held+0x45/0x80
> > [  243.023265]  ? rcu_sync_lockdep_assert+0x2a/0x50
> > [  243.023287]  ? __sb_start_write+0xde/0x200
> > [  243.023308]  ? vfs_write+0x190/0x1e0
> > [  243.023329]  vfs_write+0xc3/0x1e0
> > [  243.023351]  SyS_write+0x44/0xa0
> > [  243.023373]  entry_SYSCALL_64_fastpath+0x18/0xad
>=20
> So that's changing the elevator - did this happen while heavy IO was
> going to the drive, or was it idle?

Hello Jens,

The shell command that was used to set the elevator is the following
($realdev is a dm device):

echo none > /sys/class/block/$(basename "$realdev")/queue/scheduler

I'm not sure whether any I/O was ongoing when the scheduler was being
changed from "none" into "none". There are two other processes that got
stuck but running lsof against these processes did not reveal what block
device these two processes were trying to examine:

[  243.021672] systemd-udevd   D    0 10585    504 0x00000000
[  243.021700] Call Trace:
[  243.021726]  __schedule+0x2da/0xb00
[  243.021749]  schedule+0x38/0x90
[  243.021771]  schedule_timeout+0x2fe/0x640
[  243.021882]  io_schedule_timeout+0x9f/0x110
[  243.021930]  wait_on_page_bit_common+0x121/0x1e0
[  243.021977]  generic_file_read_iter+0x17c/0x790
[  243.022030]  blkdev_read_iter+0x30/0x40
[  243.022053]  __vfs_read+0xbb/0x130
[  243.022075]  vfs_read+0xa3/0x170
[  243.022098]  SyS_read+0x44/0xa0
[  243.022120]  entry_SYSCALL_64_fastpath+0x18/0xad
[  243.022298] systemd-udevd   D    0 10612    504 0x00000000
[  243.022320] Call Trace:
[  243.022341]  __schedule+0x2da/0xb00
[  243.022363]  schedule+0x38/0x90
[  243.022383]  schedule_timeout+0x2fe/0x640
[  243.022490]  io_schedule_timeout+0x9f/0x110
[  243.022543]  wait_on_page_bit_common+0x121/0x1e0
[  243.022595]  generic_file_read_iter+0x17c/0x790
[  243.022640]  blkdev_read_iter+0x30/0x40
[  243.022663]  __vfs_read+0xbb/0x130
[  243.022685]  vfs_read+0xa3/0x170
[  243.022707]  SyS_read+0x44/0xa0
[  243.022729]  entry_SYSCALL_64_fastpath+0x18/0xad

# lsof -p10585
COMMAND =A0=A0=A0=A0PID USER =A0=A0FD =A0=A0=A0=A0=A0TYPE DEVICE SIZE/OFF N=
ODE NAME
systemd-u 10585 root =A0cwd =A0=A0unknown =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0/proc/10585/cwd (readlink: No such file or di=
rectory)
systemd-u 10585 root =A0rtd =A0=A0unknown =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0/proc/10585/root (readlink: No such file or d=
irectory)
systemd-u 10585 root =A0txt =A0=A0unknown =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0/proc/10585/exe
# lsof -p10612
COMMAND =A0=A0=A0=A0PID USER =A0=A0FD =A0=A0=A0=A0=A0TYPE DEVICE SIZE/OFF N=
ODE NAME
systemd-u 10612 root =A0cwd =A0=A0unknown =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0/proc/10612/cwd (readlink: No such file or di=
rectory)
systemd-u 10612 root =A0rtd =A0=A0unknown =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0/proc/10612/root (readlink: No such file or d=
irectory)
systemd-u 10612 root =A0txt =A0=A0unknown =A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0/proc/10612/exe

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-02-01 17:13                                                                 ` Jens Axboe
  2017-02-01 17:28                                                                   ` Bart Van Assche
@ 2017-02-01 19:21                                                                   ` Bart Van Assche
  2017-02-01 22:01                                                                   ` Bart Van Assche
  2 siblings, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-02-01 19:21 UTC (permalink / raw)
  To: axboe; +Cc: linux-block

On Wed, 2017-02-01 at 09:13 -0800, Jens Axboe wrote:
> So that's changing the elevator - did this happen while heavy IO was
> going to the drive, or was it idle?

I just ran into an I/O hang while running test 02-sq on top of kernel
v4.9.6. I will have a closer look at the dm code to see whether I can
find anything that is wrong in the dm code.

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-02-01 17:13                                                                 ` Jens Axboe
  2017-02-01 17:28                                                                   ` Bart Van Assche
  2017-02-01 19:21                                                                   ` Bart Van Assche
@ 2017-02-01 22:01                                                                   ` Bart Van Assche
  2017-02-02 17:27                                                                     ` Bart Van Assche
  2 siblings, 1 reply; 172+ messages in thread
From: Bart Van Assche @ 2017-02-01 22:01 UTC (permalink / raw)
  To: axboe; +Cc: hch, linux-block

On Wed, 2017-02-01 at 09:13 -0800, Jens Axboe wrote:
> So that's changing the elevator - did this happen while heavy IO was
> going to the drive, or was it idle?

Hello Jens,

I think I figured out what was going on:
* Test 02-mq created scsi-mq SRP paths and multipathd created dm-mq device
  nodes on top of these SRP paths.
* Test 02-sq started with removing the SRP paths and with switching the
  SCSI and DM cores from mq to sq but did not remove the dm-mq device
  nodes. When that test script next performed an SRP log in multipathd
  failed to add the single queue SCSI devices to an existing dm-mq
  device node. In other words, the dm-mq devices had queue_if_no_path
  set but did not have any paths. Hence the lockup for requests sent
  to these dm devices.

I have modified my tests scripts such that the dm device nodes from a
previous test are removed before a new test starts. Since I made that
change I haven't seen any I/O lockup. However, a new issue shows up
sporadically, an issue that I had not yet seen during any test with
a kernel tree from Linus:

[  227.613440] general protection fault: 0000 [#1] SMP
[  227.613495] Modules linked in: dm_service_time ib_srp scsi_transport_srp=
 target_core_user uio target_core_pscsi target_core_file ib_srpt target_cor=
e_iblock target_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MASQ=
UERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat libcrc32c nf_c=
onntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject=
_ipv4 xt_tcpudp tun bridge stp llc ebtable_filter ebtables ip6table_filter =
ip6_tables iptable_filter ip_tables x_tables af_packet ib_ipoib msr rdma_uc=
m ib_ucm ib_uverbs ib_umad rdma_cm configfs ib_cm iw_cm mlx4_ib ib_core sb_=
edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ipm=
i_ssif kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmul=
ni_intel pcbc tg3 aesni_intel iTCO_wdt mlx4_core ptp iTCO_vendor_support dc=
dbas aes_x86_64 crypto_simd glue_helper pps_core cryptd pcspkr devlink ipmi=
_si libphy ipmi_devintf fjes ipmi_msghandler tpm_tis tpm_tis_core lpc_ich m=
ei_me mfd_core mei shpchp wmi tpm button hid_generic usbhid mgag200 i2c_alg=
o_bit drm_kms_helper syscopyarea sysfillrect sr_mod sysimgblt fb_sys_fops c=
drom ttm drm ehci_pci ehci_hcd usbcore usb_common sg dm_multipath dm_mod sc=
si_dh_rdac scsi_dh_emc scsi_dh_alua autofs4
[  227.613774] CPU: 3 PID: 28 Comm: ksoftirqd/3 Not tainted 4.10.0-rc5-dbg+=
 #1
[  227.613840] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 1=
1/17/2014
[  227.613893] task: ffff880172a249c0 task.stack: ffffc90001aa8000
[  227.613932] RIP: 0010:rq_completed+0x12/0x90 [dm_mod]
[  227.613965] RSP: 0018:ffffc90001aabda8 EFLAGS: 00010246
[  227.614006] RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b6b RCX: 00000000000=
00000
[  227.614043] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 6b6b6b6b6b6=
b6b6b
[  227.614074] RBP: ffffc90001aabdc0 R08: ffff8803825f4c38 R09: 00000000000=
00000
[  227.614105] R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000=
00000
[  227.614137] R13: 0000000000000000 R14: ffffffff81c05120 R15: 00000000000=
00004
[  227.614170] FS:  0000000000000000(0000) GS:ffff88046f2c0000(0000) knlGS:=
0000000000000000
[  227.614209] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  227.614239] CR2: 0000557e28bc20d0 CR3: 000000038594e000 CR4: 00000000001=
406e0
[  227.614268] Call Trace:
[  227.614301]  dm_softirq_done+0xe6/0x1e0 [dm_mod]
[  227.614337]  blk_done_softirq+0x88/0xa0
[  227.614369]  __do_softirq+0xba/0x4c0
[  227.614470]  run_ksoftirqd+0x1a/0x50
[  227.614499]  smpboot_thread_fn+0x123/0x1e0
[  227.614529]  kthread+0x107/0x140
[  227.614624]  ret_from_fork+0x2e/0x40
[  227.614648] Code: ff ff 31 f6 48 89 c7 e8 cd 0e 2f e1 5d c3 90 66 2e 0f =
1f 84 00 00 00 00 00 55 48 63 f6 48 89 e5 41 55 41 89 d5 41 54 53 48 89 fb =
<4c> 8b a7 88 02 00 00 f0 ff 8c b7 50 03 00 00 e8 ba 43 ff ff 85=20
[  227.614738] RIP: rq_completed+0x12/0x90 [dm_mod] RSP: ffffc90001aabda8

(gdb) list *(rq_completed+0x12)
0xdd12 is in rq_completed (drivers/md/dm-rq.c:187).
182      * the md may be freed in dm_put() at the end of this function.
183      * Or do dm_get() before calling this function and dm_put() later.
184      */
185     static void rq_completed(struct mapped_device *md, int rw, bool run=
_queue)
186     {
187             struct request_queue *q =3D md->queue;
188             unsigned long flags;
189
190             atomic_dec(&md->pending[rw]);
191

(gdb) disas rq_completed
Dump of assembler code for function rq_completed:
   0x000000000000dd00 <+0>:     push   %rbp
   0x000000000000dd01 <+1>:     movslq %esi,%rsi
   0x000000000000dd04 <+4>:     mov    %rsp,%rbp
   0x000000000000dd07 <+7>:     push   %r13
   0x000000000000dd09 <+9>:     mov    %edx,%r13d
   0x000000000000dd0c <+12>:    push   %r12
   0x000000000000dd0e <+14>:    push   %rbx
   0x000000000000dd0f <+15>:    mov    %rdi,%rbx
   0x000000000000dd12 <+18>:    mov    0x288(%rdi),%r12
   0x000000000000dd19 <+25>:    lock decl 0x350(%rdi,%rsi,4)

So this was caused by an attempt to dereference %rdi =3D 0x6b6b6b6b6b6b6b6b=
.
Hence this is probably a use-after-free of struct mapped_device.

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: [dm-devel] split scsi passthrough fields out of struct request V2
  2017-02-01 22:01                                                                   ` Bart Van Assche
@ 2017-02-02 17:27                                                                     ` Bart Van Assche
  2017-02-02 18:33                                                                       ` Mike Snitzer
  0 siblings, 1 reply; 172+ messages in thread
From: Bart Van Assche @ 2017-02-02 17:27 UTC (permalink / raw)
  To: hch, snitzer; +Cc: linux-block, axboe

On Wed, 2017-02-01 at 22:01 +0000, Bart Van Assche wrote:
> However, a new issue shows up sporadically, an issue that I had not yet s=
een
> during any test with a kernel tree from Linus:
>
> [  227.613440] general protection fault: 0000 [#1] SMP
> [  227.613495] Modules linked in: dm_service_time ib_srp scsi_transport_s=
rp target_core_user uio target_core_pscsi target_core_file ib_srpt target_c=
ore_iblock target_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MA=
SQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat libcrc32c nf=
_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reje=
ct_ipv4 xt_tcpudp tun bridge stp llc ebtable_filter ebtables ip6table_filte=
r ip6_tables iptable_filter ip_tables x_tables af_packet ib_ipoib msr rdma_=
ucm ib_ucm ib_uverbs ib_umad rdma_cm configfs ib_cm iw_cm mlx4_ib ib_core s=
b_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel i=
pmi_ssif kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clm=
ulni_intel pcbc tg3 aesni_intel iTCO_wdt mlx4_core ptp iTCO_vendor_support =
dcdbas aes_x86_64 crypto_simd glue_helper pps_core cryptd pcspkr devlink ip=
mi_si libphy ipmi_devintf fjes ipmi_msghandler tpm_tis tpm_tis_core lpc_ich=
 mei_me mfd_core mei shpchp wmi tpm button hid_generic usbhid mgag200 i2c_a=
lgo_bit drm_kms_helper syscopyarea sysfillrect sr_mod sysimgblt fb_sys_fops=
 cdrom ttm drm ehci_pci ehci_hcd usbcore usb_common sg dm_multipath dm_mod =
scsi_dh_rdac scsi_dh_emc scsi_dh_alua autofs4
> [  227.613774] CPU: 3 PID: 28 Comm: ksoftirqd/3 Not tainted 4.10.0-rc5-db=
g+ #1
> [  227.613840] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2=
 11/17/2014
> [  227.613893] task: ffff880172a249c0 task.stack: ffffc90001aa8000
> [  227.613932] RIP: 0010:rq_completed+0x12/0x90 [dm_mod]
> [  227.613965] RSP: 0018:ffffc90001aabda8 EFLAGS: 00010246
> [  227.614006] RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b6b RCX: 000000000=
0000000
> [  227.614043] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 6b6b6b6b6=
b6b6b6b
> [  227.614074] RBP: ffffc90001aabdc0 R08: ffff8803825f4c38 R09: 000000000=
0000000
> [  227.614105] R10: 0000000000000000 R11: 0000000000000000 R12: 000000000=
0000000
> [  227.614137] R13: 0000000000000000 R14: ffffffff81c05120 R15: 000000000=
0000004
> [  227.614170] FS:  0000000000000000(0000) GS:ffff88046f2c0000(0000) knlG=
S:0000000000000000
> [  227.614209] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  227.614239] CR2: 0000557e28bc20d0 CR3: 000000038594e000 CR4: 000000000=
01406e0
> [  227.614268] Call Trace:
> [  227.614301]  dm_softirq_done+0xe6/0x1e0 [dm_mod]
> [  227.614337]  blk_done_softirq+0x88/0xa0
> [  227.614369]  __do_softirq+0xba/0x4c0
> [  227.614470]  run_ksoftirqd+0x1a/0x50
> [  227.614499]  smpboot_thread_fn+0x123/0x1e0
> [  227.614529]  kthread+0x107/0x140
> [  227.614624]  ret_from_fork+0x2e/0x40
> [  227.614648] Code: ff ff 31 f6 48 89 c7 e8 cd 0e 2f e1 5d c3 90 66 2e 0=
f 1f 84 00 00 00 00 00 55 48 63 f6 48 89 e5 41 55 41 89 d5 41 54 53 48 89 f=
b <4c> 8b a7 88 02 00 00 f0 ff 8c b7 50 03 00 00 e8 ba 43 ff ff 85=20
> [  227.614738] RIP: rq_completed+0x12/0x90 [dm_mod] RSP: ffffc90001aabda8
>=20
> (gdb) list *(rq_completed+0x12)
> 0xdd12 is in rq_completed (drivers/md/dm-rq.c:187).
> 182      * the md may be freed in dm_put() at the end of this function.
> 183      * Or do dm_get() before calling this function and dm_put() later=
.
> 184      */
> 185     static void rq_completed(struct mapped_device *md, int rw, bool r=
un_queue)
> 186     {
> 187             struct request_queue *q =3D md->queue;
> 188             unsigned long flags;
> 189
> 190             atomic_dec(&md->pending[rw]);
> 191
>=20
> (gdb) disas rq_completed
> Dump of assembler code for function rq_completed:
>    0x000000000000dd00 <+0>:     push   %rbp
>    0x000000000000dd01 <+1>:     movslq %esi,%rsi
>    0x000000000000dd04 <+4>:     mov    %rsp,%rbp
>    0x000000000000dd07 <+7>:     push   %r13
>    0x000000000000dd09 <+9>:     mov    %edx,%r13d
>    0x000000000000dd0c <+12>:    push   %r12
>    0x000000000000dd0e <+14>:    push   %rbx
>    0x000000000000dd0f <+15>:    mov    %rdi,%rbx
>    0x000000000000dd12 <+18>:    mov    0x288(%rdi),%r12
>    0x000000000000dd19 <+25>:    lock decl 0x350(%rdi,%rsi,4)
>=20
> So this was caused by an attempt to dereference %rdi =3D 0x6b6b6b6b6b6b6b=
6b.
> Hence this is probably a use-after-free of struct mapped_device.

Hello Christoph and Mike,

The above crash occurs with Jens' for-next branch but not with Jens'
for-4.11/block branch. Sorry but I think this means that the SCSI
passthrough refactoring code is not yet ready for prime time.

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-02-02 17:27                                                                     ` Bart Van Assche
@ 2017-02-02 18:33                                                                       ` Mike Snitzer
  2017-02-02 18:43                                                                         ` Bart Van Assche
  0 siblings, 1 reply; 172+ messages in thread
From: Mike Snitzer @ 2017-02-02 18:33 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

On Thu, Feb 02 2017 at 12:27pm -0500,
Bart Van Assche <Bart.VanAssche@sandisk.com> wrote:

> On Wed, 2017-02-01 at 22:01 +0000, Bart Van Assche wrote:
> > However, a new issue shows up sporadically, an issue that I had not yet seen
> > during any test with a kernel tree from Linus:
> >
> > [  227.613440] general protection fault: 0000 [#1] SMP
> > [  227.613495] Modules linked in: dm_service_time ib_srp scsi_transport_srp target_core_user uio target_core_pscsi target_core_file ib_srpt target_core_iblock target_core_mod brd netconsole xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat libcrc32c nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables af_packet ib_ipoib msr rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm configfs ib_cm iw_cm mlx4_ib ib_core sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ipmi_ssif kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel pcbc tg3 aesni_intel iTCO_wdt mlx4_core ptp iTCO_vendor_support dcdbas aes_x86_64 crypto_simd glue_helper pps_core cryptd pcspkr devlink ipmi_si libphy ipmi_devintf fjes ipmi_msghandler tpm_tis tpm_tis_core lpc_ich mei_me mfd_core mei shpchp wmi tpm button hid_generic usbhid mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sr_mod sysimgblt fb_sys_fops cdrom ttm drm ehci_pci ehci_hcd usbcore usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua autofs4
> > [  227.613774] CPU: 3 PID: 28 Comm: ksoftirqd/3 Not tainted 4.10.0-rc5-dbg+ #1
> > [  227.613840] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014
> > [  227.613893] task: ffff880172a249c0 task.stack: ffffc90001aa8000
> > [  227.613932] RIP: 0010:rq_completed+0x12/0x90 [dm_mod]
> > [  227.613965] RSP: 0018:ffffc90001aabda8 EFLAGS: 00010246
> > [  227.614006] RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b6b RCX: 0000000000000000
> > [  227.614043] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 6b6b6b6b6b6b6b6b
> > [  227.614074] RBP: ffffc90001aabdc0 R08: ffff8803825f4c38 R09: 0000000000000000
> > [  227.614105] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
> > [  227.614137] R13: 0000000000000000 R14: ffffffff81c05120 R15: 0000000000000004
> > [  227.614170] FS:  0000000000000000(0000) GS:ffff88046f2c0000(0000) knlGS:0000000000000000
> > [  227.614209] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [  227.614239] CR2: 0000557e28bc20d0 CR3: 000000038594e000 CR4: 00000000001406e0
> > [  227.614268] Call Trace:
> > [  227.614301]  dm_softirq_done+0xe6/0x1e0 [dm_mod]
> > [  227.614337]  blk_done_softirq+0x88/0xa0
> > [  227.614369]  __do_softirq+0xba/0x4c0
> > [  227.614470]  run_ksoftirqd+0x1a/0x50
> > [  227.614499]  smpboot_thread_fn+0x123/0x1e0
> > [  227.614529]  kthread+0x107/0x140
> > [  227.614624]  ret_from_fork+0x2e/0x40
> > [  227.614648] Code: ff ff 31 f6 48 89 c7 e8 cd 0e 2f e1 5d c3 90 66 2e 0f 1f 84 00 00 00 00 00 55 48 63 f6 48 89 e5 41 55 41 89 d5 41 54 53 48 89 fb <4c> 8b a7 88 02 00 00 f0 ff 8c b7 50 03 00 00 e8 ba 43 ff ff 85 
> > [  227.614738] RIP: rq_completed+0x12/0x90 [dm_mod] RSP: ffffc90001aabda8
> > 
> > (gdb) list *(rq_completed+0x12)
> > 0xdd12 is in rq_completed (drivers/md/dm-rq.c:187).
> > 182      * the md may be freed in dm_put() at the end of this function.
> > 183      * Or do dm_get() before calling this function and dm_put() later.
> > 184      */
> > 185     static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
> > 186     {
> > 187             struct request_queue *q = md->queue;
> > 188             unsigned long flags;
> > 189
> > 190             atomic_dec(&md->pending[rw]);
> > 191
> > 
> > (gdb) disas rq_completed
> > Dump of assembler code for function rq_completed:
> >    0x000000000000dd00 <+0>:     push   %rbp
> >    0x000000000000dd01 <+1>:     movslq %esi,%rsi
> >    0x000000000000dd04 <+4>:     mov    %rsp,%rbp
> >    0x000000000000dd07 <+7>:     push   %r13
> >    0x000000000000dd09 <+9>:     mov    %edx,%r13d
> >    0x000000000000dd0c <+12>:    push   %r12
> >    0x000000000000dd0e <+14>:    push   %rbx
> >    0x000000000000dd0f <+15>:    mov    %rdi,%rbx
> >    0x000000000000dd12 <+18>:    mov    0x288(%rdi),%r12
> >    0x000000000000dd19 <+25>:    lock decl 0x350(%rdi,%rsi,4)
> > 
> > So this was caused by an attempt to dereference %rdi = 0x6b6b6b6b6b6b6b6b.
> > Hence this is probably a use-after-free of struct mapped_device.
> 
> Hello Christoph and Mike,
> 
> The above crash occurs with Jens' for-next branch but not with Jens'
> for-4.11/block branch. Sorry but I think this means that the SCSI
> passthrough refactoring code is not yet ready for prime time.

I somehow missed your original report from yesterday.

I do agree that this would appear to be a use-after-free; certainly not
a fun bug to sort out.

I'll go back over hch's changes to see if I can spot anything.  But is
this testing using dm_mod.use_bk_mq=Y or are you testing old .request_fn
dm-multipath?

Mike

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-02-02 18:33                                                                       ` Mike Snitzer
@ 2017-02-02 18:43                                                                         ` Bart Van Assche
  2017-02-02 19:13                                                                           ` Mike Snitzer
  0 siblings, 1 reply; 172+ messages in thread
From: Bart Van Assche @ 2017-02-02 18:43 UTC (permalink / raw)
  To: snitzer; +Cc: hch, linux-block, axboe

On Thu, 2017-02-02 at 13:33 -0500, Mike Snitzer wrote:
> I'll go back over hch's changes to see if I can spot anything.  But is
> this testing using dm_mod.use_bk_mq=3DY or are you testing old .request_f=
n
> dm-multipath?

Hello Mike,

The srp-test software tests multiple configurations: dm-mq on scsi-mq, dm-s=
q
on scsi-mq and dm-sq on scsi-sq. I have not yet checked which of these
three configurations triggers the kernel crash.

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-02-02 18:43                                                                         ` Bart Van Assche
@ 2017-02-02 19:13                                                                           ` Mike Snitzer
  2017-02-02 19:46                                                                             ` Bart Van Assche
  0 siblings, 1 reply; 172+ messages in thread
From: Mike Snitzer @ 2017-02-02 19:13 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

On Thu, Feb 02 2017 at  1:43pm -0500,
Bart Van Assche <Bart.VanAssche@sandisk.com> wrote:

> On Thu, 2017-02-02 at 13:33 -0500, Mike Snitzer wrote:
> > I'll go back over hch's changes to see if I can spot anything.  But is
> > this testing using dm_mod.use_bk_mq=Y or are you testing old .request_fn
> > dm-multipath?
> 
> Hello Mike,
> 
> The srp-test software tests multiple configurations: dm-mq on scsi-mq, dm-sq
> on scsi-mq and dm-sq on scsi-sq. I have not yet checked which of these
> three configurations triggers the kernel crash.

OK, such info is important to provide for crashes like this.  Please let
me know once you do.

Thanks,
Mike

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-02-02 19:13                                                                           ` Mike Snitzer
@ 2017-02-02 19:46                                                                             ` Bart Van Assche
  2017-02-02 21:04                                                                               ` Mike Snitzer
  0 siblings, 1 reply; 172+ messages in thread
From: Bart Van Assche @ 2017-02-02 19:46 UTC (permalink / raw)
  To: snitzer; +Cc: hch, linux-block, axboe

On Thu, 2017-02-02 at 14:13 -0500, Mike Snitzer wrote:
> On Thu, Feb 02 2017 at  1:43pm -0500, Bart Van Assche <Bart.VanAssche@san=
disk.com> wrote:
> > On Thu, 2017-02-02 at 13:33 -0500, Mike Snitzer wrote:
> > > I'll go back over hch's changes to see if I can spot anything.  But i=
s
> > > this testing using dm_mod.use_bk_mq=3DY or are you testing old .reque=
st_fn
> > > dm-multipath?
> >=20
> > The srp-test software tests multiple configurations: dm-mq on scsi-mq, =
dm-sq
> > on scsi-mq and dm-sq on scsi-sq. I have not yet checked which of these
> > three configurations triggers the kernel crash.
>=20
> OK, such info is important to provide for crashes like this.  Please let
> me know once you do.

Hello Mike,

Apparently it's the large I/O test (using dm-mq on scsi-mq) that triggers t=
he
crash:

# ~bart/software/infiniband/srp-test/run_tests -r 10
[ ... ]
Test /home/bart/software/infiniband/srp-test/tests/02-sq-on-mq succeeded
Running test /home/bart/software/infiniband/srp-test/tests/03 ...
Test large transfer sizes with cmd_sg_entries=3D255
removing /dev/mapper/mpatht:=A0[ CRASH ]

The source code of the test I ran is available at
https://github.com/bvanassche/srp-test.

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-02-02 19:46                                                                             ` Bart Van Assche
@ 2017-02-02 21:04                                                                               ` Mike Snitzer
  2017-02-02 21:10                                                                                 ` Mike Snitzer
  2017-02-02 22:38                                                                                 ` Bart Van Assche
  0 siblings, 2 replies; 172+ messages in thread
From: Mike Snitzer @ 2017-02-02 21:04 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

On Thu, Feb 02 2017 at  2:46pm -0500,
Bart Van Assche <Bart.VanAssche@sandisk.com> wrote:

> On Thu, 2017-02-02 at 14:13 -0500, Mike Snitzer wrote:
> > On Thu, Feb 02 2017 at  1:43pm -0500, Bart Van Assche <Bart.VanAssche@sandisk.com> wrote:
> > > On Thu, 2017-02-02 at 13:33 -0500, Mike Snitzer wrote:
> > > > I'll go back over hch's changes to see if I can spot anything.  But is
> > > > this testing using dm_mod.use_bk_mq=Y or are you testing old .request_fn
> > > > dm-multipath?
> > > 
> > > The srp-test software tests multiple configurations: dm-mq on scsi-mq, dm-sq
> > > on scsi-mq and dm-sq on scsi-sq. I have not yet checked which of these
> > > three configurations triggers the kernel crash.
> > 
> > OK, such info is important to provide for crashes like this.  Please let
> > me know once you do.
> 
> Hello Mike,
> 
> Apparently it's the large I/O test (using dm-mq on scsi-mq) that triggers the
> crash:

I've gone over Christoph's "dm: always defer request allocation to the
owner of the request_queue" commit yet again.  Most of that commit's
changes are just mechanical.  I didn't see any problems.

In general, dm_start_request() calls dm_get(md) to take a reference on
the mapped_device.  And rq_completed() calls dm_put(md) to drop the
reference.  The DM device's request_queue (md->queue) should _not_ ever
be torn down before all references on the md have been dropped. But I'll
have to look closer on how/if that is enforced anywhere by coordinating
with block core.

In any case, the crash you reported was that the mapped_device was being
dereferenced after it was freed (at line 187's md->queue).  Which seems
to imply a dm_get/dm_put reference count regression.  But I'm not seeing
where at this point.

> # ~bart/software/infiniband/srp-test/run_tests -r 10
> [ ... ]
> Test /home/bart/software/infiniband/srp-test/tests/02-sq-on-mq succeeded
> Running test /home/bart/software/infiniband/srp-test/tests/03 ...
> Test large transfer sizes with cmd_sg_entries=255
> removing /dev/mapper/mpatht:�[ CRASH ]
> 
> The source code of the test I ran is available at
> https://github.com/bvanassche/srp-test.

Any progress on getting this to work without requiring infiniband HW?

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-02-02 21:04                                                                               ` Mike Snitzer
@ 2017-02-02 21:10                                                                                 ` Mike Snitzer
  2017-02-03  0:20                                                                                   ` Bart Van Assche
  2017-02-02 22:38                                                                                 ` Bart Van Assche
  1 sibling, 1 reply; 172+ messages in thread
From: Mike Snitzer @ 2017-02-02 21:10 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

On Thu, Feb 02 2017 at  4:04pm -0500,
Mike Snitzer <snitzer@redhat.com> wrote:

> On Thu, Feb 02 2017 at  2:46pm -0500,
> Bart Van Assche <Bart.VanAssche@sandisk.com> wrote:
> 
> > On Thu, 2017-02-02 at 14:13 -0500, Mike Snitzer wrote:
> > > On Thu, Feb 02 2017 at  1:43pm -0500, Bart Van Assche <Bart.VanAssche@sandisk.com> wrote:
> > > > On Thu, 2017-02-02 at 13:33 -0500, Mike Snitzer wrote:
> > > > > I'll go back over hch's changes to see if I can spot anything.  But is
> > > > > this testing using dm_mod.use_bk_mq=Y or are you testing old .request_fn
> > > > > dm-multipath?
> > > > 
> > > > The srp-test software tests multiple configurations: dm-mq on scsi-mq, dm-sq
> > > > on scsi-mq and dm-sq on scsi-sq. I have not yet checked which of these
> > > > three configurations triggers the kernel crash.
> > > 
> > > OK, such info is important to provide for crashes like this.  Please let
> > > me know once you do.
> > 
> > Hello Mike,
> > 
> > Apparently it's the large I/O test (using dm-mq on scsi-mq) that triggers the
> > crash:
> 
> I've gone over Christoph's "dm: always defer request allocation to the
> owner of the request_queue" commit yet again.  Most of that commit's
> changes are just mechanical.  I didn't see any problems.
> 
> In general, dm_start_request() calls dm_get(md) to take a reference on
> the mapped_device.  And rq_completed() calls dm_put(md) to drop the
> reference.  The DM device's request_queue (md->queue) should _not_ ever
> be torn down before all references on the md have been dropped. But I'll
> have to look closer on how/if that is enforced anywhere by coordinating
> with block core.
> 
> In any case, the crash you reported was that the mapped_device was being
> dereferenced after it was freed (at line 187's md->queue).  Which seems
> to imply a dm_get/dm_put reference count regression.  But I'm not seeing
> where at this point.

Maybe it isn't a regression but something about Christoph's changes
causes a race to present itself?

Care to try moving the dm_get(md) at the end of dm_start_request() to
the beginning of dm_start_request() and report back on whether it helps
at all?

Thanks,
Mike

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-02-02 21:04                                                                               ` Mike Snitzer
  2017-02-02 21:10                                                                                 ` Mike Snitzer
@ 2017-02-02 22:38                                                                                 ` Bart Van Assche
  1 sibling, 0 replies; 172+ messages in thread
From: Bart Van Assche @ 2017-02-02 22:38 UTC (permalink / raw)
  To: snitzer; +Cc: hch, linux-block, axboe

On Thu, 2017-02-02 at 16:04 -0500, Mike Snitzer wrote:
> Any progress on getting this to work without requiring infiniband HW?

Hello Mike,

Intructions for running these tests over SoftRoCE have been added to
the README.md file in https://github.com/bvanassche/srp-test. However,
I'm not sure the SoftRoCE driver is already stable enough to run these
tests on top of that driver.

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-02-02 21:10                                                                                 ` Mike Snitzer
@ 2017-02-03  0:20                                                                                   ` Bart Van Assche
  2017-02-03  0:42                                                                                     ` Mike Snitzer
  0 siblings, 1 reply; 172+ messages in thread
From: Bart Van Assche @ 2017-02-03  0:20 UTC (permalink / raw)
  To: snitzer; +Cc: hch, linux-block, axboe

On Thu, 2017-02-02 at 16:10 -0500, Mike Snitzer wrote:
> Care to try moving the dm_get(md) at the end of dm_start_request() to
> the beginning of dm_start_request() and report back on whether it helps
> at all?

Hello Mike,

Sorry but I don't see how that could make a difference. While we are at it:
since dm_start_request() calls dm_get() and since rq_completed() calls
dm_put(), calls to these two functions should always be paired. There is
only one dm_start_request() call in the dm-mq code, namely the one in
dm_mq_queue_rq(). However, if map_request() returns DM_MAPIO_REQUEUE then
rq_completed() is called twice: a first time by
dm_requeue_original_request() and a second time by the dm_mq_queue_rq().
Do you agree with this?

Thanks,

Bart.=

^ permalink raw reply	[flat|nested] 172+ messages in thread

* Re: split scsi passthrough fields out of struct request V2
  2017-02-03  0:20                                                                                   ` Bart Van Assche
@ 2017-02-03  0:42                                                                                     ` Mike Snitzer
  0 siblings, 0 replies; 172+ messages in thread
From: Mike Snitzer @ 2017-02-03  0:42 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: hch, linux-block, axboe

On Thu, Feb 02 2017 at  7:20pm -0500,
Bart Van Assche <Bart.VanAssche@sandisk.com> wrote:

> On Thu, 2017-02-02 at 16:10 -0500, Mike Snitzer wrote:
> > Care to try moving the dm_get(md) at the end of dm_start_request() to
> > the beginning of dm_start_request() and report back on whether it helps
> > at all?
> 
> Hello Mike,
> 
> Sorry but I don't see how that could make a difference.

Yeah, I thought about it further after I suggested it and agree that it
shouldn't make a difference (request isn't actually issued during
dm_start_request).

> While we are at it:
> since dm_start_request() calls dm_get() and since rq_completed() calls
> dm_put(), calls to these two functions should always be paired. There is
> only one dm_start_request() call in the dm-mq code, namely the one in
> dm_mq_queue_rq(). However, if map_request() returns DM_MAPIO_REQUEUE then
> rq_completed() is called twice: a first time by
> dm_requeue_original_request() and a second time by the dm_mq_queue_rq().
> Do you agree with this?

Not seeing it.

DM_MAPIO_DELAY_REQUEUE will call dm_requeue_original_request(), but
dm_mq_queue_rq() won't call rq_completed() in that case.

And for DM_MAPIO_REQUEUE dm_mq_queue_rq() will call rq_completed().

^ permalink raw reply	[flat|nested] 172+ messages in thread

end of thread, other threads:[~2017-02-03  0:42 UTC | newest]

Thread overview: 172+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-25 17:25 split scsi passthrough fields out of struct request V2 Christoph Hellwig
2017-01-25 17:25 ` [PATCH 01/18] block: add a op_is_flush helper Christoph Hellwig
2017-01-26  2:58   ` Martin K. Petersen
2017-01-26 22:38   ` Bart Van Assche
2017-01-25 17:25 ` [PATCH 02/18] md: cleanup bio op / flags handling in raid1_write_request Christoph Hellwig
2017-01-26  2:59   ` Martin K. Petersen
2017-01-26 23:18   ` Bart Van Assche
2017-01-26 23:18     ` Bart Van Assche
2017-01-25 17:25 ` [PATCH 03/18] block: fix elevator init check Christoph Hellwig
2017-01-26  3:01   ` Martin K. Petersen
2017-01-26 23:21   ` Bart Van Assche
2017-01-26 23:21     ` Bart Van Assche
2017-01-25 17:25 ` [PATCH 04/18] block: simplify blk_init_allocated_queue Christoph Hellwig
2017-01-26  3:02   ` Martin K. Petersen
2017-01-26 23:27   ` Bart Van Assche
2017-01-26 23:27     ` Bart Van Assche
2017-01-25 17:25 ` [PATCH 05/18] block: allow specifying size for extra command data Christoph Hellwig
2017-01-26  3:15   ` Martin K. Petersen
2017-01-27 16:12     ` Christoph Hellwig
2017-01-27 16:12       ` Christoph Hellwig
2017-01-27 17:21       ` Bart Van Assche
2017-01-27 17:21         ` Bart Van Assche
2017-01-27 17:26         ` Jens Axboe
2017-01-27 17:26           ` Jens Axboe
2017-01-27 17:30           ` Bart Van Assche
2017-01-27 17:30             ` Bart Van Assche
2017-01-27 17:33             ` Jens Axboe
2017-01-25 17:25 ` [PATCH 06/18] dm: remove incomple BLOCK_PC support Christoph Hellwig
2017-01-27 17:32   ` Bart Van Assche
2017-01-27 17:32     ` Bart Van Assche
2017-01-25 17:25 ` [PATCH 07/18] dm: always defer request allocation to the owner of the request_queue Christoph Hellwig
2017-01-27 16:34   ` Mike Snitzer
2017-01-27 16:34     ` Mike Snitzer
2017-01-27 16:36     ` Christoph Hellwig
2017-01-27 16:36       ` Christoph Hellwig
2017-01-27 16:44       ` Mike Snitzer
2017-01-27 16:44         ` Mike Snitzer
2017-01-25 17:25 ` [PATCH 08/18] scsi_dh_rdac: switch to scsi_execute_req_flags() Christoph Hellwig
2017-01-26  3:18   ` Martin K. Petersen
2017-01-25 17:25 ` [PATCH 09/18] scsi_dh_emc: " Christoph Hellwig
2017-01-26  3:19   ` Martin K. Petersen
2017-01-25 17:25 ` [PATCH 10/18] scsi_dh_hp_sw: " Christoph Hellwig
2017-01-26  3:20   ` Martin K. Petersen
2017-01-26  3:20     ` Martin K. Petersen
2017-01-25 17:25 ` [PATCH 11/18] scsi: remove gfp_flags member in scsi_host_cmd_pool Christoph Hellwig
2017-01-26  3:21   ` Martin K. Petersen
2017-01-27 17:38   ` Bart Van Assche
2017-01-27 17:38     ` Bart Van Assche
2017-01-25 17:25 ` [PATCH 12/18] scsi: respect unchecked_isa_dma for blk-mq Christoph Hellwig
2017-01-26  3:23   ` Martin K. Petersen
2017-01-27 17:45   ` Bart Van Assche
2017-01-27 17:45     ` Bart Van Assche
2017-01-25 17:25 ` [PATCH 13/18] scsi: remove scsi_cmd_dma_pool Christoph Hellwig
2017-01-26  3:24   ` Martin K. Petersen
2017-01-27 17:51   ` Bart Van Assche
2017-01-27 17:51     ` Bart Van Assche
2017-01-25 17:25 ` [PATCH 14/18] scsi: remove __scsi_alloc_queue Christoph Hellwig
2017-01-26  3:25   ` Martin K. Petersen
2017-01-27 17:58   ` Bart Van Assche
2017-01-27 17:58     ` Bart Van Assche
2017-01-28  8:23     ` hch
2017-01-25 17:25 ` [PATCH 15/18] scsi: allocate scsi_cmnd structures as part of struct request Christoph Hellwig
2017-01-26  3:30   ` Martin K. Petersen
2017-01-27 18:39   ` Bart Van Assche
2017-01-27 18:39     ` Bart Van Assche
2017-01-28  8:25     ` hch
2017-01-28  8:25       ` hch
2017-01-25 17:25 ` [PATCH 16/18] block/bsg: move queue creation into bsg_setup_queue Christoph Hellwig
2017-01-27 18:48   ` Bart Van Assche
2017-01-27 18:48     ` Bart Van Assche
2017-01-25 17:25 ` [PATCH 17/18] block: split scsi_request out of struct request Christoph Hellwig
2017-01-25 17:25 ` [PATCH 18/18] block: don't assign cmd_flags in __blk_rq_prep_clone Christoph Hellwig
2017-01-26  3:31   ` Martin K. Petersen
2017-01-26 18:29 ` split scsi passthrough fields out of struct request V2 Bart Van Assche
2017-01-26 18:29   ` Bart Van Assche
2017-01-26 18:44   ` Jens Axboe
2017-01-26 18:52     ` Bart Van Assche
2017-01-26 18:52       ` Bart Van Assche
2017-01-26 18:57       ` Jens Axboe
2017-01-26 18:59         ` hch
2017-01-26 19:01           ` Jens Axboe
2017-01-26 19:01             ` Jens Axboe
2017-01-26 20:47             ` [dm-devel] " Bart Van Assche
2017-01-26 20:54               ` Jens Axboe
2017-01-26 20:54                 ` [dm-devel] " Jens Axboe
2017-01-26 21:01                 ` Bart Van Assche
2017-01-26 21:01                   ` Bart Van Assche
2017-01-26 21:12                   ` Jens Axboe
2017-01-26 21:12                     ` [dm-devel] " Jens Axboe
2017-01-26 21:47                     ` Bart Van Assche
2017-01-26 21:47                       ` [dm-devel] " Bart Van Assche
2017-01-26 21:51                       ` Jens Axboe
2017-01-26 21:51                         ` [dm-devel] " Jens Axboe
2017-01-26 23:14                         ` Bart Van Assche
2017-01-26 23:14                           ` Bart Van Assche
2017-01-26 23:26                           ` Jens Axboe
2017-01-26 23:26                             ` Jens Axboe
2017-01-26 23:26                             ` [dm-devel] " Jens Axboe
2017-01-26 23:47                             ` Bart Van Assche
2017-01-26 23:47                               ` [dm-devel] " Bart Van Assche
2017-01-26 23:50                               ` Jens Axboe
2017-01-27  0:33                                 ` Jens Axboe
2017-01-27  0:33                                   ` [dm-devel] " Jens Axboe
2017-01-27  0:38                                 ` Bart Van Assche
2017-01-27  0:38                                   ` Bart Van Assche
2017-01-27  0:41                                   ` Jens Axboe
2017-01-27  1:15                                     ` Bart Van Assche
2017-01-27  1:15                                       ` [dm-devel] " Bart Van Assche
2017-01-27  1:22                                       ` Jens Axboe
2017-01-27  1:22                                         ` [dm-devel] " Jens Axboe
2017-01-27  6:40                                         ` Jens Axboe
2017-01-27  8:04                                           ` Jens Axboe
2017-01-27  8:04                                             ` [dm-devel] " Jens Axboe
2017-01-27 16:52                                             ` Bart Van Assche
2017-01-27 16:52                                               ` [dm-devel] " Bart Van Assche
2017-01-27 16:56                                               ` Jens Axboe
2017-01-27 16:56                                                 ` [dm-devel] " Jens Axboe
2017-01-27 17:03                                                 ` Bart Van Assche
2017-01-27 17:03                                                   ` [dm-devel] " Bart Van Assche
2017-01-31  1:12                                                 ` Bart Van Assche
2017-01-31  1:12                                                   ` Bart Van Assche
2017-01-31  1:38                                                   ` Jens Axboe
2017-01-31  1:38                                                     ` Jens Axboe
2017-01-31  4:13                                                     ` Jens Axboe
2017-01-31  4:13                                                       ` [dm-devel] " Jens Axboe
2017-01-31 21:35                                                     ` Bart Van Assche
2017-01-31 21:35                                                       ` Bart Van Assche
2017-01-31 21:55                                                       ` Bart Van Assche
2017-01-31 21:55                                                         ` Bart Van Assche
2017-01-31 21:58                                                         ` Jens Axboe
2017-02-01  1:01                                                           ` Bart Van Assche
2017-02-01  6:38                                                             ` Jens Axboe
2017-02-01 16:46                                                               ` Bart Van Assche
2017-02-01 17:13                                                                 ` Jens Axboe
2017-02-01 17:28                                                                   ` Bart Van Assche
2017-02-01 19:21                                                                   ` Bart Van Assche
2017-02-01 22:01                                                                   ` Bart Van Assche
2017-02-02 17:27                                                                     ` Bart Van Assche
2017-02-02 18:33                                                                       ` Mike Snitzer
2017-02-02 18:43                                                                         ` Bart Van Assche
2017-02-02 19:13                                                                           ` Mike Snitzer
2017-02-02 19:46                                                                             ` Bart Van Assche
2017-02-02 21:04                                                                               ` Mike Snitzer
2017-02-02 21:10                                                                                 ` Mike Snitzer
2017-02-03  0:20                                                                                   ` Bart Van Assche
2017-02-03  0:42                                                                                     ` Mike Snitzer
2017-02-02 22:38                                                                                 ` Bart Van Assche
2017-01-27 17:02                                         ` [dm-devel] " Bart Van Assche
2017-01-27 17:02                                           ` Bart Van Assche
2017-01-27 16:11 ` Jens Axboe
2017-01-27 16:11   ` Jens Axboe
2017-01-27 16:17   ` Christoph Hellwig
2017-01-27 16:17     ` Christoph Hellwig
2017-01-27 16:21     ` Jens Axboe
2017-01-27 16:21       ` Jens Axboe
2017-01-27 16:23       ` Christoph Hellwig
2017-01-27 16:23         ` Christoph Hellwig
2017-01-27 16:27         ` Jens Axboe
2017-01-27 16:27           ` Jens Axboe
2017-01-27 16:34           ` Christoph Hellwig
2017-01-27 16:34             ` Christoph Hellwig
2017-01-27 16:38             ` Jens Axboe
2017-01-27 16:38               ` Jens Axboe
2017-01-27 16:42               ` Christoph Hellwig
2017-01-27 16:42                 ` Christoph Hellwig
2017-01-27 16:58                 ` Jens Axboe
2017-01-27 16:58                   ` Jens Axboe
2017-01-27 21:27 ` Bart Van Assche
2017-01-27 21:27   ` Bart Van Assche
2017-01-28  8:29   ` hch
2017-01-30  6:58   ` Hannes Reinecke
2017-01-30  6:58     ` Hannes Reinecke

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.