* [dm-devel] [PATCH] dm: fix deadlock when swapping to encrypted device
@ 2021-02-10 16:50 Mikulas Patocka
2021-02-10 19:23 ` [dm-devel] " Mike Snitzer
0 siblings, 1 reply; 3+ messages in thread
From: Mikulas Patocka @ 2021-02-10 16:50 UTC (permalink / raw)
To: Mike Snitzer, Milan Broz, Ondrej Kozina; +Cc: dm-devel
Hi
Here I'm sending the patch that fixes swapping to dm-crypt.
The logic that limits the number of in-progress I/Os was moved to generic
device mapper. A dm target can activate it by setting ti->limit_swap. The
actual limit can be set in /sys/module/dm_mod/parameters/swap_ios.
This patch only limits swap bios (those with REQ_SWAP set). I don't limit
other bios, because limiting them causes performance degradation due to
cache line bouncing when taking the semaphore - and there are no reports
that non-swap I/O on dm crypt causes deadlocks.
Mikulas
From: Mikulas Patocka <mpatocka@redhat.com>
The system would deadlock when swapping to a dm-crypt device. The reason
is that for each incoming write bio, dm-crypt allocates memory that holds
encrypted data. These excessive allocations exhaust all the memory and the
result is either deadlock or OOM trigger.
This patch limits the number of in-flight swap bios, so that the memory
consumed by dm-crypt is limited. The limit is enforced if the target set
the "limit_swap" variable and if the bio has REQ_SWAP set.
Non-swap bios are not affected becuase taking the semaphore would cause
performance degradation.
This is similar to request-based drivers - they will also block when the
number of requests is over the limit.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
---
drivers/md/dm-core.h | 4 ++
drivers/md/dm-crypt.c | 1
drivers/md/dm.c | 61 ++++++++++++++++++++++++++++++++++++++++++
include/linux/device-mapper.h | 5 +++
4 files changed, 71 insertions(+)
Index: linux-2.6/drivers/md/dm.c
===================================================================
--- linux-2.6.orig/drivers/md/dm.c 2021-02-10 15:04:53.000000000 +0100
+++ linux-2.6/drivers/md/dm.c 2021-02-10 16:29:04.000000000 +0100
@@ -148,6 +148,16 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_
#define DM_NUMA_NODE NUMA_NO_NODE
static int dm_numa_node = DM_NUMA_NODE;
+#define DEFAULT_SWAP_IOS (8 * 1048576 / PAGE_SIZE)
+static int swap_ios = DEFAULT_SWAP_IOS;
+static int get_swap_ios(void)
+{
+ int latch = READ_ONCE(swap_ios);
+ if (unlikely(latch <= 0))
+ latch = DEFAULT_SWAP_IOS;
+ return latch;
+}
+
/*
* For mempools pre-allocation at the table loading time.
*/
@@ -969,6 +979,11 @@ void disable_write_zeroes(struct mapped_
limits->max_write_zeroes_sectors = 0;
}
+static bool swap_io_limit(struct dm_target *ti, struct bio *bio)
+{
+ return unlikely(ti->limit_swap) && unlikely((bio->bi_opf & REQ_SWAP) != 0);
+}
+
static void clone_endio(struct bio *bio)
{
blk_status_t error = bio->bi_status;
@@ -1019,6 +1034,11 @@ static void clone_endio(struct bio *bio)
}
}
+ if (unlikely(swap_io_limit(tio->ti, bio))) {
+ struct mapped_device *md = io->md;
+ up(&md->swap_ios_semaphore);
+ }
+
free_tio(tio);
dec_pending(io, error);
}
@@ -1252,6 +1272,22 @@ void dm_accept_partial_bio(struct bio *b
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
+static noinline void __set_swap_io_limit(struct mapped_device *md, int latch)
+{
+ mutex_lock(&md->swap_ios_lock);
+ while (latch < md->swap_ios) {
+ cond_resched();
+ down(&md->swap_ios_semaphore);
+ md->swap_ios--;
+ }
+ while (latch > md->swap_ios) {
+ cond_resched();
+ up(&md->swap_ios_semaphore);
+ md->swap_ios++;
+ }
+ mutex_unlock(&md->swap_ios_lock);
+}
+
static blk_qc_t __map_bio(struct dm_target_io *tio)
{
int r;
@@ -1271,6 +1307,15 @@ static blk_qc_t __map_bio(struct dm_targ
atomic_inc(&io->io_count);
sector = clone->bi_iter.bi_sector;
+ if (unlikely(swap_io_limit(ti, clone))) {
+ struct mapped_device *md = io->md;
+ int latch = get_swap_ios();
+ if (unlikely(latch != md->swap_ios)) {
+ __set_swap_io_limit(md, latch);
+ }
+ down(&md->swap_ios_semaphore);
+ }
+
r = ti->type->map(ti, clone);
switch (r) {
case DM_MAPIO_SUBMITTED:
@@ -1281,10 +1326,18 @@ static blk_qc_t __map_bio(struct dm_targ
ret = submit_bio_noacct(clone);
break;
case DM_MAPIO_KILL:
+ if (unlikely(swap_io_limit(ti, clone))) {
+ struct mapped_device *md = io->md;
+ up(&md->swap_ios_semaphore);
+ }
free_tio(tio);
dec_pending(io, BLK_STS_IOERR);
break;
case DM_MAPIO_REQUEUE:
+ if (unlikely(swap_io_limit(ti, clone))) {
+ struct mapped_device *md = io->md;
+ up(&md->swap_ios_semaphore);
+ }
free_tio(tio);
dec_pending(io, BLK_STS_DM_REQUEUE);
break;
@@ -1747,6 +1800,7 @@ static void cleanup_mapped_device(struct
mutex_destroy(&md->suspend_lock);
mutex_destroy(&md->type_lock);
mutex_destroy(&md->table_devices_lock);
+ mutex_destroy(&md->swap_ios_lock);
dm_mq_cleanup_mapped_device(md);
}
@@ -1814,6 +1868,10 @@ static struct mapped_device *alloc_dev(i
init_waitqueue_head(&md->eventq);
init_completion(&md->kobj_holder.completion);
+ md->swap_ios = get_swap_ios();
+ sema_init(&md->swap_ios_semaphore, md->swap_ios);
+ mutex_init(&md->swap_ios_lock);
+
md->disk->major = _major;
md->disk->first_minor = minor;
md->disk->fops = &dm_blk_dops;
@@ -3097,6 +3155,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios,
module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
+module_param(swap_ios, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(swap_ios, "The number of swap I/Os in flight");
+
MODULE_DESCRIPTION(DM_NAME " driver");
MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
MODULE_LICENSE("GPL");
Index: linux-2.6/drivers/md/dm-core.h
===================================================================
--- linux-2.6.orig/drivers/md/dm-core.h 2021-02-10 15:04:53.000000000 +0100
+++ linux-2.6/drivers/md/dm-core.h 2021-02-10 15:04:53.000000000 +0100
@@ -102,6 +102,10 @@ struct mapped_device {
/* kobject and completion */
struct dm_kobject_holder kobj_holder;
+ int swap_ios;
+ struct semaphore swap_ios_semaphore;
+ struct mutex swap_ios_lock;
+
struct dm_stats stats;
/* for blk-mq request-based DM support */
Index: linux-2.6/drivers/md/dm-crypt.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-crypt.c 2021-01-28 17:33:30.000000000 +0100
+++ linux-2.6/drivers/md/dm-crypt.c 2021-02-10 15:53:55.000000000 +0100
@@ -3324,6 +3324,7 @@ static int crypt_ctr(struct dm_target *t
wake_up_process(cc->write_thread);
ti->num_flush_bios = 1;
+ ti->limit_swap = true;
return 0;
Index: linux-2.6/include/linux/device-mapper.h
===================================================================
--- linux-2.6.orig/include/linux/device-mapper.h 2020-11-25 13:40:44.000000000 +0100
+++ linux-2.6/include/linux/device-mapper.h 2021-02-10 15:52:54.000000000 +0100
@@ -325,6 +325,11 @@ struct dm_target {
* whether or not its underlying devices have support.
*/
bool discards_supported:1;
+
+ /*
+ * Set if we need to limit the number of in-flight bios when swapping.
+ */
+ bool limit_swap:1;
};
void *dm_per_bio_data(struct bio *bio, size_t data_size);
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [dm-devel] dm: fix deadlock when swapping to encrypted device
2021-02-10 16:50 [dm-devel] [PATCH] dm: fix deadlock when swapping to encrypted device Mikulas Patocka
@ 2021-02-10 19:23 ` Mike Snitzer
2021-02-10 20:26 ` [dm-devel] [PATCH v2] " Mikulas Patocka
0 siblings, 1 reply; 3+ messages in thread
From: Mike Snitzer @ 2021-02-10 19:23 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: Ondrej Kozina, dm-devel, Milan Broz
On Wed, Feb 10 2021 at 11:50am -0500,
Mikulas Patocka <mpatocka@redhat.com> wrote:
> Hi
>
> Here I'm sending the patch that fixes swapping to dm-crypt.
>
> The logic that limits the number of in-progress I/Os was moved to generic
> device mapper. A dm target can activate it by setting ti->limit_swap. The
> actual limit can be set in /sys/module/dm_mod/parameters/swap_ios.
>
> This patch only limits swap bios (those with REQ_SWAP set). I don't limit
> other bios, because limiting them causes performance degradation due to
> cache line bouncing when taking the semaphore - and there are no reports
> that non-swap I/O on dm crypt causes deadlocks.
>
> Mikulas
>
>
>
> From: Mikulas Patocka <mpatocka@redhat.com>
>
> The system would deadlock when swapping to a dm-crypt device. The reason
> is that for each incoming write bio, dm-crypt allocates memory that holds
> encrypted data. These excessive allocations exhaust all the memory and the
> result is either deadlock or OOM trigger.
>
> This patch limits the number of in-flight swap bios, so that the memory
> consumed by dm-crypt is limited. The limit is enforced if the target set
> the "limit_swap" variable and if the bio has REQ_SWAP set.
>
> Non-swap bios are not affected becuase taking the semaphore would cause
> performance degradation.
>
> This is similar to request-based drivers - they will also block when the
> number of requests is over the limit.
>
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> Cc: stable@vger.kernel.org
>
> ---
> drivers/md/dm-core.h | 4 ++
> drivers/md/dm-crypt.c | 1
> drivers/md/dm.c | 61 ++++++++++++++++++++++++++++++++++++++++++
> include/linux/device-mapper.h | 5 +++
> 4 files changed, 71 insertions(+)
>
> Index: linux-2.6/drivers/md/dm.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm.c 2021-02-10 15:04:53.000000000 +0100
> +++ linux-2.6/drivers/md/dm.c 2021-02-10 16:29:04.000000000 +0100
> @@ -1271,6 +1307,15 @@ static blk_qc_t __map_bio(struct dm_targ
> atomic_inc(&io->io_count);
> sector = clone->bi_iter.bi_sector;
>
> + if (unlikely(swap_io_limit(ti, clone))) {
> + struct mapped_device *md = io->md;
> + int latch = get_swap_ios();
> + if (unlikely(latch != md->swap_ios)) {
> + __set_swap_io_limit(md, latch);
> + }
Don't need these curly braces...
> + down(&md->swap_ios_semaphore);
> + }
> +
> r = ti->type->map(ti, clone);
> switch (r) {
> case DM_MAPIO_SUBMITTED:
> @@ -1814,6 +1868,10 @@ static struct mapped_device *alloc_dev(i
> init_waitqueue_head(&md->eventq);
> init_completion(&md->kobj_holder.completion);
>
> + md->swap_ios = get_swap_ios();
> + sema_init(&md->swap_ios_semaphore, md->swap_ios);
> + mutex_init(&md->swap_ios_lock);
> +
> md->disk->major = _major;
> md->disk->first_minor = minor;
> md->disk->fops = &dm_blk_dops;
This is only applicable for bio-based DM. But probably not worth
avoiding the setup for request-based...
> @@ -3097,6 +3155,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios,
> module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
> MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
>
> +module_param(swap_ios, int, S_IRUGO | S_IWUSR);
> +MODULE_PARM_DESC(swap_ios, "The number of swap I/Os in flight");
> +
Can you please rename this to modparam to "swap_bios"? And rename other
variables/members, etc (e.g. "swap_bios_semaphore", "swap_bios_lock",
etc)?
> Index: linux-2.6/include/linux/device-mapper.h
> ===================================================================
> --- linux-2.6.orig/include/linux/device-mapper.h 2020-11-25 13:40:44.000000000 +0100
> +++ linux-2.6/include/linux/device-mapper.h 2021-02-10 15:52:54.000000000 +0100
> @@ -325,6 +325,11 @@ struct dm_target {
> * whether or not its underlying devices have support.
> */
> bool discards_supported:1;
> +
> + /*
> + * Set if we need to limit the number of in-flight bios when swapping.
> + */
> + bool limit_swap:1;
> };
>
> void *dm_per_bio_data(struct bio *bio, size_t data_size);
Please rename to "limit_swap_bios".
Other than these nits this looks good to me.
Once you send v2 I can get it staged for 5.12.
Thanks,
Mike
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] 3+ messages in thread
* [dm-devel] [PATCH v2] dm: fix deadlock when swapping to encrypted device
2021-02-10 19:23 ` [dm-devel] " Mike Snitzer
@ 2021-02-10 20:26 ` Mikulas Patocka
0 siblings, 0 replies; 3+ messages in thread
From: Mikulas Patocka @ 2021-02-10 20:26 UTC (permalink / raw)
To: Mike Snitzer; +Cc: Ondrej Kozina, dm-devel, Milan Broz
The system would deadlock when swapping to a dm-crypt device. The reason
is that for each incoming write bio, dm-crypt allocates memory that holds
encrypted data. These excessive allocations exhaust all the memory and the
result is either deadlock or OOM trigger.
This patch limits the number of in-flight swap bios, so that the memory
consumed by dm-crypt is limited. The limit is enforced if the target set
the "limit_swap_bios" variable and if the bio has REQ_SWAP set.
Non-swap bios are not affected becuase taking the semaphore would cause
performance degradation.
This is similar to request-based drivers - they will also block when the
number of requests is over the limit.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
---
drivers/md/dm-core.h | 4 ++
drivers/md/dm-crypt.c | 1
drivers/md/dm.c | 60 ++++++++++++++++++++++++++++++++++++++++++
include/linux/device-mapper.h | 5 +++
4 files changed, 70 insertions(+)
Index: linux-2.6/drivers/md/dm.c
===================================================================
--- linux-2.6.orig/drivers/md/dm.c 2021-02-10 15:04:53.000000000 +0100
+++ linux-2.6/drivers/md/dm.c 2021-02-10 21:22:21.000000000 +0100
@@ -148,6 +148,16 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_
#define DM_NUMA_NODE NUMA_NO_NODE
static int dm_numa_node = DM_NUMA_NODE;
+#define DEFAULT_SWAP_BIOS (8 * 1048576 / PAGE_SIZE)
+static int swap_bios = DEFAULT_SWAP_BIOS;
+static int get_swap_bios(void)
+{
+ int latch = READ_ONCE(swap_bios);
+ if (unlikely(latch <= 0))
+ latch = DEFAULT_SWAP_BIOS;
+ return latch;
+}
+
/*
* For mempools pre-allocation at the table loading time.
*/
@@ -969,6 +979,11 @@ void disable_write_zeroes(struct mapped_
limits->max_write_zeroes_sectors = 0;
}
+static bool swap_bios_limit(struct dm_target *ti, struct bio *bio)
+{
+ return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios);
+}
+
static void clone_endio(struct bio *bio)
{
blk_status_t error = bio->bi_status;
@@ -1019,6 +1034,11 @@ static void clone_endio(struct bio *bio)
}
}
+ if (unlikely(swap_bios_limit(tio->ti, bio))) {
+ struct mapped_device *md = io->md;
+ up(&md->swap_bios_semaphore);
+ }
+
free_tio(tio);
dec_pending(io, error);
}
@@ -1252,6 +1272,22 @@ void dm_accept_partial_bio(struct bio *b
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
+static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
+{
+ mutex_lock(&md->swap_bios_lock);
+ while (latch < md->swap_bios) {
+ cond_resched();
+ down(&md->swap_bios_semaphore);
+ md->swap_bios--;
+ }
+ while (latch > md->swap_bios) {
+ cond_resched();
+ up(&md->swap_bios_semaphore);
+ md->swap_bios++;
+ }
+ mutex_unlock(&md->swap_bios_lock);
+}
+
static blk_qc_t __map_bio(struct dm_target_io *tio)
{
int r;
@@ -1271,6 +1307,14 @@ static blk_qc_t __map_bio(struct dm_targ
atomic_inc(&io->io_count);
sector = clone->bi_iter.bi_sector;
+ if (unlikely(swap_bios_limit(ti, clone))) {
+ struct mapped_device *md = io->md;
+ int latch = get_swap_bios();
+ if (unlikely(latch != md->swap_bios))
+ __set_swap_bios_limit(md, latch);
+ down(&md->swap_bios_semaphore);
+ }
+
r = ti->type->map(ti, clone);
switch (r) {
case DM_MAPIO_SUBMITTED:
@@ -1281,10 +1325,18 @@ static blk_qc_t __map_bio(struct dm_targ
ret = submit_bio_noacct(clone);
break;
case DM_MAPIO_KILL:
+ if (unlikely(swap_bios_limit(ti, clone))) {
+ struct mapped_device *md = io->md;
+ up(&md->swap_bios_semaphore);
+ }
free_tio(tio);
dec_pending(io, BLK_STS_IOERR);
break;
case DM_MAPIO_REQUEUE:
+ if (unlikely(swap_bios_limit(ti, clone))) {
+ struct mapped_device *md = io->md;
+ up(&md->swap_bios_semaphore);
+ }
free_tio(tio);
dec_pending(io, BLK_STS_DM_REQUEUE);
break;
@@ -1747,6 +1799,7 @@ static void cleanup_mapped_device(struct
mutex_destroy(&md->suspend_lock);
mutex_destroy(&md->type_lock);
mutex_destroy(&md->table_devices_lock);
+ mutex_destroy(&md->swap_bios_lock);
dm_mq_cleanup_mapped_device(md);
}
@@ -1814,6 +1867,10 @@ static struct mapped_device *alloc_dev(i
init_waitqueue_head(&md->eventq);
init_completion(&md->kobj_holder.completion);
+ md->swap_bios = get_swap_bios();
+ sema_init(&md->swap_bios_semaphore, md->swap_bios);
+ mutex_init(&md->swap_bios_lock);
+
md->disk->major = _major;
md->disk->first_minor = minor;
md->disk->fops = &dm_blk_dops;
@@ -3097,6 +3154,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios,
module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
+module_param(swap_bios, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(swap_bios, "The number of swap I/Os in flight");
+
MODULE_DESCRIPTION(DM_NAME " driver");
MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
MODULE_LICENSE("GPL");
Index: linux-2.6/drivers/md/dm-core.h
===================================================================
--- linux-2.6.orig/drivers/md/dm-core.h 2021-02-10 15:04:53.000000000 +0100
+++ linux-2.6/drivers/md/dm-core.h 2021-02-10 21:07:08.000000000 +0100
@@ -102,6 +102,10 @@ struct mapped_device {
/* kobject and completion */
struct dm_kobject_holder kobj_holder;
+ int swap_bios;
+ struct semaphore swap_bios_semaphore;
+ struct mutex swap_bios_lock;
+
struct dm_stats stats;
/* for blk-mq request-based DM support */
Index: linux-2.6/drivers/md/dm-crypt.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-crypt.c 2021-01-28 17:33:30.000000000 +0100
+++ linux-2.6/drivers/md/dm-crypt.c 2021-02-10 21:06:33.000000000 +0100
@@ -3324,6 +3324,7 @@ static int crypt_ctr(struct dm_target *t
wake_up_process(cc->write_thread);
ti->num_flush_bios = 1;
+ ti->limit_swap_bios = true;
return 0;
Index: linux-2.6/include/linux/device-mapper.h
===================================================================
--- linux-2.6.orig/include/linux/device-mapper.h 2020-11-25 13:40:44.000000000 +0100
+++ linux-2.6/include/linux/device-mapper.h 2021-02-10 21:06:24.000000000 +0100
@@ -325,6 +325,11 @@ struct dm_target {
* whether or not its underlying devices have support.
*/
bool discards_supported:1;
+
+ /*
+ * Set if we need to limit the number of in-flight bios when swapping.
+ */
+ bool limit_swap_bios:1;
};
void *dm_per_bio_data(struct bio *bio, size_t data_size);
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-02-10 20:26 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-10 16:50 [dm-devel] [PATCH] dm: fix deadlock when swapping to encrypted device Mikulas Patocka
2021-02-10 19:23 ` [dm-devel] " Mike Snitzer
2021-02-10 20:26 ` [dm-devel] [PATCH v2] " Mikulas Patocka
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).