* [PATCH] null_blk: Fix scheduling in atomic with zoned mode
@ 2020-11-06 11:01 Damien Le Moal
2020-11-06 13:55 ` Christoph Hellwig
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Damien Le Moal @ 2020-11-06 11:01 UTC (permalink / raw)
To: linux-block, Jens Axboe
Commit aa1c09cb65e2 ("null_blk: Fix locking in zoned mode") changed
zone locking to using the potentially sleeping wait_on_bit_io()
function. This is acceptable when memory backing is enabled as the
device queue is in that case marked as blocking, but this triggers a
scheduling while in atomic context with memory backing disabled.
Fix this by relying solely on the device zone spinlock for zone
information protection without temporarily releasing this lock around
null_process_cmd() execution in null_zone_write(). This is OK to do
since when memory backing is disabled, command processing does not
block and the memory backing lock nullb->lock is unused. This solution
avoids the overhead of having to mark a zoned null_blk device queue as
blocking when memory backing is unused.
This patch also adds comments to the zone locking code to explain the
unusual locking scheme.
Reported-by: kernel test robot <lkp@intel.com>
Fixes: aa1c09cb65e2 ("null_blk: Fix locking in zoned mode")
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
---
drivers/block/null_blk.h | 2 +-
drivers/block/null_blk_zoned.c | 47 ++++++++++++++++++++++------------
2 files changed, 32 insertions(+), 17 deletions(-)
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index cfd00ad40355..c24d9b5ad81a 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -47,7 +47,7 @@ struct nullb_device {
unsigned int nr_zones_closed;
struct blk_zone *zones;
sector_t zone_size_sects;
- spinlock_t zone_dev_lock;
+ spinlock_t zone_lock;
unsigned long *zone_locks;
unsigned long size; /* device size in MB */
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index 8775acbb4f8f..beb34b4f76b0 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -46,11 +46,20 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
if (!dev->zones)
return -ENOMEM;
- spin_lock_init(&dev->zone_dev_lock);
- dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
- if (!dev->zone_locks) {
- kvfree(dev->zones);
- return -ENOMEM;
+ /*
+ * With memory backing, the zone_lock spinlock needs to be temporarily
+ * released to avoid scheduling in atomic context. To guarantee zone
+ * information protection, use a bitmap to lock zones with
+ * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing
+ * implies that the queue is marked with BLK_MQ_F_BLOCKING.
+ */
+ spin_lock_init(&dev->zone_lock);
+ if (dev->memory_backed) {
+ dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
+ if (!dev->zone_locks) {
+ kvfree(dev->zones);
+ return -ENOMEM;
+ }
}
if (dev->zone_nr_conv >= dev->nr_zones) {
@@ -137,12 +146,17 @@ void null_free_zoned_dev(struct nullb_device *dev)
static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
{
- wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
+ if (dev->memory_backed)
+ wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
+ spin_lock_irq(&dev->zone_lock);
}
static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
{
- clear_and_wake_up_bit(zno, dev->zone_locks);
+ spin_unlock_irq(&dev->zone_lock);
+
+ if (dev->memory_backed)
+ clear_and_wake_up_bit(zno, dev->zone_locks);
}
int null_report_zones(struct gendisk *disk, sector_t sector,
@@ -322,7 +336,6 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
null_lock_zone(dev, zno);
- spin_lock(&dev->zone_dev_lock);
switch (zone->cond) {
case BLK_ZONE_COND_FULL:
@@ -375,9 +388,17 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
zone->cond = BLK_ZONE_COND_IMP_OPEN;
- spin_unlock(&dev->zone_dev_lock);
+ /*
+ * Memory backing allocation may sleep: release the zone_lock spinlock
+ * to avoid scheduling in atomic context. Zone operation atomicity is
+ * still guaranteed through the zone_locks bitmap.
+ */
+ if (dev->memory_backed)
+ spin_unlock_irq(&dev->zone_lock);
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
- spin_lock(&dev->zone_dev_lock);
+ if (dev->memory_backed)
+ spin_lock_irq(&dev->zone_lock);
+
if (ret != BLK_STS_OK)
goto unlock;
@@ -392,7 +413,6 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
ret = BLK_STS_OK;
unlock:
- spin_unlock(&dev->zone_dev_lock);
null_unlock_zone(dev, zno);
return ret;
@@ -516,9 +536,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
null_lock_zone(dev, i);
zone = &dev->zones[i];
if (zone->cond != BLK_ZONE_COND_EMPTY) {
- spin_lock(&dev->zone_dev_lock);
null_reset_zone(dev, zone);
- spin_unlock(&dev->zone_dev_lock);
trace_nullb_zone_op(cmd, i, zone->cond);
}
null_unlock_zone(dev, i);
@@ -530,7 +548,6 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
zone = &dev->zones[zone_no];
null_lock_zone(dev, zone_no);
- spin_lock(&dev->zone_dev_lock);
switch (op) {
case REQ_OP_ZONE_RESET:
@@ -550,8 +567,6 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
break;
}
- spin_unlock(&dev->zone_dev_lock);
-
if (ret == BLK_STS_OK)
trace_nullb_zone_op(cmd, zone_no, zone->cond);
--
2.26.2
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] null_blk: Fix scheduling in atomic with zoned mode
2020-11-06 11:01 [PATCH] null_blk: Fix scheduling in atomic with zoned mode Damien Le Moal
@ 2020-11-06 13:55 ` Christoph Hellwig
2020-11-06 14:08 ` Damien Le Moal
2020-11-06 14:19 ` Christoph Hellwig
2020-11-06 16:37 ` Jens Axboe
2 siblings, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2020-11-06 13:55 UTC (permalink / raw)
To: Damien Le Moal; +Cc: linux-block, Jens Axboe
On Fri, Nov 06, 2020 at 08:01:41PM +0900, Damien Le Moal wrote:
> Commit aa1c09cb65e2 ("null_blk: Fix locking in zoned mode") changed
> zone locking to using the potentially sleeping wait_on_bit_io()
> function. This is acceptable when memory backing is enabled as the
> device queue is in that case marked as blocking, but this triggers a
> scheduling while in atomic context with memory backing disabled.
>
> Fix this by relying solely on the device zone spinlock for zone
> information protection without temporarily releasing this lock around
> null_process_cmd() execution in null_zone_write(). This is OK to do
> since when memory backing is disabled, command processing does not
> block and the memory backing lock nullb->lock is unused. This solution
> avoids the overhead of having to mark a zoned null_blk device queue as
> blocking when memory backing is unused.
>
> This patch also adds comments to the zone locking code to explain the
> unusual locking scheme.
>
> Reported-by: kernel test robot <lkp@intel.com>
> Fixes: aa1c09cb65e2 ("null_blk: Fix locking in zoned mode")
> Cc: stable@vger.kernel.org
> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
> ---
> drivers/block/null_blk.h | 2 +-
> drivers/block/null_blk_zoned.c | 47 ++++++++++++++++++++++------------
> 2 files changed, 32 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
> index cfd00ad40355..c24d9b5ad81a 100644
> --- a/drivers/block/null_blk.h
> +++ b/drivers/block/null_blk.h
> @@ -47,7 +47,7 @@ struct nullb_device {
> unsigned int nr_zones_closed;
> struct blk_zone *zones;
> sector_t zone_size_sects;
> - spinlock_t zone_dev_lock;
> + spinlock_t zone_lock;
> unsigned long *zone_locks;
>
> unsigned long size; /* device size in MB */
> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
> index 8775acbb4f8f..beb34b4f76b0 100644
> --- a/drivers/block/null_blk_zoned.c
> +++ b/drivers/block/null_blk_zoned.c
> @@ -46,11 +46,20 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
> if (!dev->zones)
> return -ENOMEM;
>
> - spin_lock_init(&dev->zone_dev_lock);
> - dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
> - if (!dev->zone_locks) {
> - kvfree(dev->zones);
> - return -ENOMEM;
> + /*
> + * With memory backing, the zone_lock spinlock needs to be temporarily
> + * released to avoid scheduling in atomic context. To guarantee zone
> + * information protection, use a bitmap to lock zones with
> + * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing
> + * implies that the queue is marked with BLK_MQ_F_BLOCKING.
> + */
> + spin_lock_init(&dev->zone_lock);
> + if (dev->memory_backed) {
> + dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
> + if (!dev->zone_locks) {
> + kvfree(dev->zones);
> + return -ENOMEM;
> + }
> }
>
> if (dev->zone_nr_conv >= dev->nr_zones) {
> @@ -137,12 +146,17 @@ void null_free_zoned_dev(struct nullb_device *dev)
>
> static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
> {
> - wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
> + if (dev->memory_backed)
> + wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
> + spin_lock_irq(&dev->zone_lock);
> }
>
> static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
> {
> - clear_and_wake_up_bit(zno, dev->zone_locks);
> + spin_unlock_irq(&dev->zone_lock);
> +
> + if (dev->memory_backed)
> + clear_and_wake_up_bit(zno, dev->zone_locks);
> }
>
> int null_report_zones(struct gendisk *disk, sector_t sector,
> @@ -322,7 +336,6 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
> return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
>
> null_lock_zone(dev, zno);
> - spin_lock(&dev->zone_dev_lock);
>
> switch (zone->cond) {
> case BLK_ZONE_COND_FULL:
> @@ -375,9 +388,17 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
> if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
> zone->cond = BLK_ZONE_COND_IMP_OPEN;
>
> - spin_unlock(&dev->zone_dev_lock);
> + /*
> + * Memory backing allocation may sleep: release the zone_lock spinlock
> + * to avoid scheduling in atomic context. Zone operation atomicity is
> + * still guaranteed through the zone_locks bitmap.
> + */
> + if (dev->memory_backed)
> + spin_unlock_irq(&dev->zone_lock);
> ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
> - spin_lock(&dev->zone_dev_lock);
> + if (dev->memory_backed)
> + spin_lock_irq(&dev->zone_lock);
Do we actually need to take zone_lock at all for the memory_backed
case? Should the !memory_backed just use a per-zone spinlock?
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] null_blk: Fix scheduling in atomic with zoned mode
2020-11-06 13:55 ` Christoph Hellwig
@ 2020-11-06 14:08 ` Damien Le Moal
0 siblings, 0 replies; 8+ messages in thread
From: Damien Le Moal @ 2020-11-06 14:08 UTC (permalink / raw)
To: hch; +Cc: linux-block, Jens Axboe
On 2020/11/06 22:55, Christoph Hellwig wrote:
> On Fri, Nov 06, 2020 at 08:01:41PM +0900, Damien Le Moal wrote:
>> Commit aa1c09cb65e2 ("null_blk: Fix locking in zoned mode") changed
>> zone locking to using the potentially sleeping wait_on_bit_io()
>> function. This is acceptable when memory backing is enabled as the
>> device queue is in that case marked as blocking, but this triggers a
>> scheduling while in atomic context with memory backing disabled.
>>
>> Fix this by relying solely on the device zone spinlock for zone
>> information protection without temporarily releasing this lock around
>> null_process_cmd() execution in null_zone_write(). This is OK to do
>> since when memory backing is disabled, command processing does not
>> block and the memory backing lock nullb->lock is unused. This solution
>> avoids the overhead of having to mark a zoned null_blk device queue as
>> blocking when memory backing is unused.
>>
>> This patch also adds comments to the zone locking code to explain the
>> unusual locking scheme.
>>
>> Reported-by: kernel test robot <lkp@intel.com>
>> Fixes: aa1c09cb65e2 ("null_blk: Fix locking in zoned mode")
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
>> ---
>> drivers/block/null_blk.h | 2 +-
>> drivers/block/null_blk_zoned.c | 47 ++++++++++++++++++++++------------
>> 2 files changed, 32 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
>> index cfd00ad40355..c24d9b5ad81a 100644
>> --- a/drivers/block/null_blk.h
>> +++ b/drivers/block/null_blk.h
>> @@ -47,7 +47,7 @@ struct nullb_device {
>> unsigned int nr_zones_closed;
>> struct blk_zone *zones;
>> sector_t zone_size_sects;
>> - spinlock_t zone_dev_lock;
>> + spinlock_t zone_lock;
>> unsigned long *zone_locks;
>>
>> unsigned long size; /* device size in MB */
>> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
>> index 8775acbb4f8f..beb34b4f76b0 100644
>> --- a/drivers/block/null_blk_zoned.c
>> +++ b/drivers/block/null_blk_zoned.c
>> @@ -46,11 +46,20 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
>> if (!dev->zones)
>> return -ENOMEM;
>>
>> - spin_lock_init(&dev->zone_dev_lock);
>> - dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
>> - if (!dev->zone_locks) {
>> - kvfree(dev->zones);
>> - return -ENOMEM;
>> + /*
>> + * With memory backing, the zone_lock spinlock needs to be temporarily
>> + * released to avoid scheduling in atomic context. To guarantee zone
>> + * information protection, use a bitmap to lock zones with
>> + * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing
>> + * implies that the queue is marked with BLK_MQ_F_BLOCKING.
>> + */
>> + spin_lock_init(&dev->zone_lock);
>> + if (dev->memory_backed) {
>> + dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
>> + if (!dev->zone_locks) {
>> + kvfree(dev->zones);
>> + return -ENOMEM;
>> + }
>> }
>>
>> if (dev->zone_nr_conv >= dev->nr_zones) {
>> @@ -137,12 +146,17 @@ void null_free_zoned_dev(struct nullb_device *dev)
>>
>> static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
>> {
>> - wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
>> + if (dev->memory_backed)
>> + wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
>> + spin_lock_irq(&dev->zone_lock);
>> }
>>
>> static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
>> {
>> - clear_and_wake_up_bit(zno, dev->zone_locks);
>> + spin_unlock_irq(&dev->zone_lock);
>> +
>> + if (dev->memory_backed)
>> + clear_and_wake_up_bit(zno, dev->zone_locks);
>> }
>>
>> int null_report_zones(struct gendisk *disk, sector_t sector,
>> @@ -322,7 +336,6 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>> return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
>>
>> null_lock_zone(dev, zno);
>> - spin_lock(&dev->zone_dev_lock);
>>
>> switch (zone->cond) {
>> case BLK_ZONE_COND_FULL:
>> @@ -375,9 +388,17 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
>> if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
>> zone->cond = BLK_ZONE_COND_IMP_OPEN;
>>
>> - spin_unlock(&dev->zone_dev_lock);
>> + /*
>> + * Memory backing allocation may sleep: release the zone_lock spinlock
>> + * to avoid scheduling in atomic context. Zone operation atomicity is
>> + * still guaranteed through the zone_locks bitmap.
>> + */
>> + if (dev->memory_backed)
>> + spin_unlock_irq(&dev->zone_lock);
>> ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
>> - spin_lock(&dev->zone_dev_lock);
>> + if (dev->memory_backed)
>> + spin_lock_irq(&dev->zone_lock);
>
> Do we actually need to take zone_lock at all for the memory_backed
> case? Should the !memory_backed just use a per-zone spinlock?
For your first question, yes we do: the per zone wait_on_bit_io() lock serialize
zone operations per zone but we need a device level zone lock for protecting the
counters (imp open, exp open and closed) for zone resources management.
For the !memory backed case, I wondered the same but decided to just reuse the
device zone spinlock as the single lock for all zones. Not sure if a per zone
spinlock would improve performance by that much given that the lock time frame
becomes really tiny without the memory copy. I will try that to see how
performance changes.
That said, I would prefer to add such change as a different patch for next cycle
and keep this patch as a bug fix though.
--
Damien Le Moal
Western Digital Research
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] null_blk: Fix scheduling in atomic with zoned mode
2020-11-06 11:01 [PATCH] null_blk: Fix scheduling in atomic with zoned mode Damien Le Moal
2020-11-06 13:55 ` Christoph Hellwig
@ 2020-11-06 14:19 ` Christoph Hellwig
2020-11-06 14:24 ` Damien Le Moal
2020-11-06 16:37 ` Jens Axboe
2 siblings, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2020-11-06 14:19 UTC (permalink / raw)
To: Damien Le Moal; +Cc: linux-block, Jens Axboe
Looks good for now:
Reviewed-by: Christoph Hellwig <hch@lst.de>
although I think we should address the global lock for the
!memory_backed case rather sooner than later.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] null_blk: Fix scheduling in atomic with zoned mode
2020-11-06 14:19 ` Christoph Hellwig
@ 2020-11-06 14:24 ` Damien Le Moal
0 siblings, 0 replies; 8+ messages in thread
From: Damien Le Moal @ 2020-11-06 14:24 UTC (permalink / raw)
To: hch; +Cc: linux-block, Jens Axboe
On 2020/11/06 23:19, Christoph Hellwig wrote:
> Looks good for now:
>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
>
> although I think we should address the global lock for the
> !memory_backed case rather sooner than later.
OK. I will send something next week.
--
Damien Le Moal
Western Digital Research
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] null_blk: Fix scheduling in atomic with zoned mode
2020-11-06 11:01 [PATCH] null_blk: Fix scheduling in atomic with zoned mode Damien Le Moal
2020-11-06 13:55 ` Christoph Hellwig
2020-11-06 14:19 ` Christoph Hellwig
@ 2020-11-06 16:37 ` Jens Axboe
2 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2020-11-06 16:37 UTC (permalink / raw)
To: Damien Le Moal, linux-block
On 11/6/20 4:01 AM, Damien Le Moal wrote:
> Commit aa1c09cb65e2 ("null_blk: Fix locking in zoned mode") changed
> zone locking to using the potentially sleeping wait_on_bit_io()
> function. This is acceptable when memory backing is enabled as the
> device queue is in that case marked as blocking, but this triggers a
> scheduling while in atomic context with memory backing disabled.
>
> Fix this by relying solely on the device zone spinlock for zone
> information protection without temporarily releasing this lock around
> null_process_cmd() execution in null_zone_write(). This is OK to do
> since when memory backing is disabled, command processing does not
> block and the memory backing lock nullb->lock is unused. This solution
> avoids the overhead of having to mark a zoned null_blk device queue as
> blocking when memory backing is unused.
>
> This patch also adds comments to the zone locking code to explain the
> unusual locking scheme.
Applied for 5.10, though I do agree that the locking for non-memory
backed should go. The whole point of null_blk is to be able to
benchmark the underlying layers, and if we end up having null_blk
be the limiting factor, then it's pointless.
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] null_blk: Fix scheduling in atomic with zoned mode
2020-11-10 7:36 ` [PATCH] null_blk: Fix scheduling in atomic with zoned mode Damien Le Moal
@ 2020-11-17 10:29 ` Greg Kroah-Hartman
0 siblings, 0 replies; 8+ messages in thread
From: Greg Kroah-Hartman @ 2020-11-17 10:29 UTC (permalink / raw)
To: Damien Le Moal; +Cc: stable, Jens Axboe
On Tue, Nov 10, 2020 at 04:36:05PM +0900, Damien Le Moal wrote:
> commit e1777d099728a76a8f8090f89649aac961e7e530 upstream.
>
> Commit aa1c09cb65e2 ("null_blk: Fix locking in zoned mode") changed
> zone locking to using the potentially sleeping wait_on_bit_io()
> function. This is acceptable when memory backing is enabled as the
> device queue is in that case marked as blocking, but this triggers a
> scheduling while in atomic context with memory backing disabled.
>
> Fix this by relying solely on the device zone spinlock for zone
> information protection without temporarily releasing this lock around
> null_process_cmd() execution in null_zone_write(). This is OK to do
> since when memory backing is disabled, command processing does not
> block and the memory backing lock nullb->lock is unused. This solution
> avoids the overhead of having to mark a zoned null_blk device queue as
> blocking when memory backing is unused.
>
> This patch also adds comments to the zone locking code to explain the
> unusual locking scheme.
>
> Fixes: aa1c09cb65e2 ("null_blk: Fix locking in zoned mode")
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Cc: stable@vger.kernel.org
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
> drivers/block/null_blk.h | 1 +
> drivers/block/null_blk_zoned.c | 31 +++++++++++++++++++++++++------
> 2 files changed, 26 insertions(+), 6 deletions(-)
Now queued up, thanks.
greg k-h
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH] null_blk: Fix scheduling in atomic with zoned mode
2020-11-09 10:25 FAILED: patch "[PATCH] null_blk: Fix scheduling in atomic with zoned mode" failed to apply to 5.9-stable tree gregkh
@ 2020-11-10 7:36 ` Damien Le Moal
2020-11-17 10:29 ` Greg Kroah-Hartman
0 siblings, 1 reply; 8+ messages in thread
From: Damien Le Moal @ 2020-11-10 7:36 UTC (permalink / raw)
To: Greg Kroah-Hartman, stable; +Cc: Jens Axboe
commit e1777d099728a76a8f8090f89649aac961e7e530 upstream.
Commit aa1c09cb65e2 ("null_blk: Fix locking in zoned mode") changed
zone locking to using the potentially sleeping wait_on_bit_io()
function. This is acceptable when memory backing is enabled as the
device queue is in that case marked as blocking, but this triggers a
scheduling while in atomic context with memory backing disabled.
Fix this by relying solely on the device zone spinlock for zone
information protection without temporarily releasing this lock around
null_process_cmd() execution in null_zone_write(). This is OK to do
since when memory backing is disabled, command processing does not
block and the memory backing lock nullb->lock is unused. This solution
avoids the overhead of having to mark a zoned null_blk device queue as
blocking when memory backing is unused.
This patch also adds comments to the zone locking code to explain the
unusual locking scheme.
Fixes: aa1c09cb65e2 ("null_blk: Fix locking in zoned mode")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
drivers/block/null_blk.h | 1 +
drivers/block/null_blk_zoned.c | 31 +++++++++++++++++++++++++------
2 files changed, 26 insertions(+), 6 deletions(-)
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index 206309ecc7e4..7562cd6cd681 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -44,6 +44,7 @@ struct nullb_device {
unsigned int nr_zones;
struct blk_zone *zones;
sector_t zone_size_sects;
+ spinlock_t zone_lock;
unsigned long *zone_locks;
unsigned long size; /* device size in MB */
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index 495713d6c989..d9102327357c 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -46,10 +46,20 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
if (!dev->zones)
return -ENOMEM;
- dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
- if (!dev->zone_locks) {
- kvfree(dev->zones);
- return -ENOMEM;
+ /*
+ * With memory backing, the zone_lock spinlock needs to be temporarily
+ * released to avoid scheduling in atomic context. To guarantee zone
+ * information protection, use a bitmap to lock zones with
+ * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing
+ * implies that the queue is marked with BLK_MQ_F_BLOCKING.
+ */
+ spin_lock_init(&dev->zone_lock);
+ if (dev->memory_backed) {
+ dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
+ if (!dev->zone_locks) {
+ kvfree(dev->zones);
+ return -ENOMEM;
+ }
}
if (dev->zone_nr_conv >= dev->nr_zones) {
@@ -118,12 +128,16 @@ void null_free_zoned_dev(struct nullb_device *dev)
static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
{
- wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
+ if (dev->memory_backed)
+ wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
+ spin_lock_irq(&dev->zone_lock);
}
static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
{
- clear_and_wake_up_bit(zno, dev->zone_locks);
+ spin_unlock_irq(&dev->zone_lock);
+ if (dev->memory_backed)
+ clear_and_wake_up_bit(zno, dev->zone_locks);
}
int null_report_zones(struct gendisk *disk, sector_t sector,
@@ -233,7 +247,12 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
zone->cond = BLK_ZONE_COND_IMP_OPEN;
+ if (dev->memory_backed)
+ spin_unlock_irq(&dev->zone_lock);
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
+ if (dev->memory_backed)
+ spin_lock_irq(&dev->zone_lock);
+
if (ret != BLK_STS_OK)
break;
--
2.26.2
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2020-11-17 10:28 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-06 11:01 [PATCH] null_blk: Fix scheduling in atomic with zoned mode Damien Le Moal
2020-11-06 13:55 ` Christoph Hellwig
2020-11-06 14:08 ` Damien Le Moal
2020-11-06 14:19 ` Christoph Hellwig
2020-11-06 14:24 ` Damien Le Moal
2020-11-06 16:37 ` Jens Axboe
2020-11-09 10:25 FAILED: patch "[PATCH] null_blk: Fix scheduling in atomic with zoned mode" failed to apply to 5.9-stable tree gregkh
2020-11-10 7:36 ` [PATCH] null_blk: Fix scheduling in atomic with zoned mode Damien Le Moal
2020-11-17 10:29 ` Greg Kroah-Hartman
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.