Since commit 1d9bd5161ba3 ("blk-mq: replace timeout synchronization with a RCU and generation based scheme") I observe lockdep complaints (full message attached): [ 21.763369] CPU0 CPU1 [ 21.763370] ---- ---- [ 21.763371] lock(&rq->gstate_seq); [ 21.763374] local_irq_disable(); [ 21.763375] lock(&(&dq->lock)->rlock); [ 21.763377] lock(&rq->gstate_seq); [ 21.763379] [ 21.763380] lock(&(&dq->lock)->rlock); [ 21.763382] *** DEADLOCK *** This only happens in combination with DASD (s390 specific) and another blk-mq driver (scsi, null_blk). The distinctive behavior of DASD is that it calls blk_mq_start_request with irqs disabled. This is a false positive since gstate_seq on CPU0 is from a different request queue / block driver than gstate_seq on CPU1. Possible fixes are to use local_irq_save/restore in blk_mq_start_request. Or, since it's a false positive, teach lockdep that these are different locks - like below: ------>8 diff --git a/block/blk-mq.c b/block/blk-mq.c index 0dc9e341c2a7..d1a2c2fa015d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2040,7 +2040,7 @@ static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, return ret; } - seqcount_init(&rq->gstate_seq); + __seqcount_init(&rq->gstate_seq, "&rq->gstate_seq", set->rq_seqcount_key); u64_stats_init(&rq->aborted_gstate_sync); return 0; } @@ -2774,7 +2774,7 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) * requested depth down, if if it too large. In that case, the set * value will be stored in set->queue_depth. */ -int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) +int __blk_mq_alloc_tag_set(struct blk_mq_tag_set *set, struct lock_class_key *key) { int ret; @@ -2825,6 +2825,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (!set->mq_map) goto out_free_tags; + set->rq_seqcount_key = key; ret = blk_mq_update_queue_map(set); if (ret) goto out_free_mq_map; @@ -2846,7 +2847,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) set->tags = NULL; return ret; } -EXPORT_SYMBOL(blk_mq_alloc_tag_set); +EXPORT_SYMBOL(__blk_mq_alloc_tag_set); void blk_mq_free_tag_set(struct blk_mq_tag_set *set) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e3986f4b3461..1fb8bc1fdb07 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -85,6 +85,8 @@ struct blk_mq_tag_set { struct mutex tag_list_lock; struct list_head tag_list; + + struct lock_class_key *rq_seqcount_key; }; struct blk_mq_queue_data { @@ -201,7 +203,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, int blk_mq_register_dev(struct device *, struct request_queue *); void blk_mq_unregister_dev(struct device *, struct request_queue *); -int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); +int __blk_mq_alloc_tag_set(struct blk_mq_tag_set *set, struct lock_class_key *key); + +static inline int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) +{ + static struct lock_class_key _seqcount_key; + + return __blk_mq_alloc_tag_set(set, &_seqcount_key); +} + void blk_mq_free_tag_set(struct blk_mq_tag_set *set); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);