* [dm-devel] [PATCH 1/2] dm era: Recover committed writeset after crash
2021-01-22 15:19 [dm-devel] [PATCH 0/2] dm era: Fix bugs that lead to lost writes after crash Nikos Tsironis
@ 2021-01-22 15:19 ` Nikos Tsironis
2021-01-22 15:19 ` [dm-devel] [PATCH 2/2] dm era: Update in-core bitset after committing the metadata Nikos Tsironis
2021-02-09 14:23 ` [dm-devel] [PATCH 0/2] dm era: Fix bugs that lead to lost writes after crash Nikos Tsironis
2 siblings, 0 replies; 4+ messages in thread
From: Nikos Tsironis @ 2021-01-22 15:19 UTC (permalink / raw)
To: snitzer, agk, dm-devel; +Cc: ejt, ntsironis
Following a system crash, dm-era fails to recover the committed writeset
for the current era, leading to lost writes. That is, we lose the
information about what blocks were written during the affected era.
dm-era assumes that the writeset of the current era is archived when the
device is suspended. So, when resuming the device, it just moves on to
the next era, ignoring the committed writeset.
This assumption holds when the device is properly shut down. But, when
the system crashes, the code that suspends the target never runs, so the
writeset for the current era is not archived.
There are three issues that cause the committed writeset to get lost:
1. dm-era doesn't load the committed writeset when opening the metadata
2. The code that resizes the metadata wipes the information about the
committed writeset (assuming it was loaded at step 1)
3. era_preresume() starts a new era, without taking into account that
the current era might not have been archived, due to a system crash.
To fix this:
1. Load the committed writeset when opening the metadata
2. Fix the code that resizes the metadata to make sure it doesn't wipe
the loaded writeset
3. Fix era_preresume() to check for a loaded writeset and archive it,
before starting a new era.
Fixes: eec40579d84873 ("dm: add era target")
Cc: stable@vger.kernel.org # v3.15+
Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
---
drivers/md/dm-era-target.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index b24e3839bb3a..854b1be8b452 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -71,8 +71,6 @@ static size_t bitset_size(unsigned nr_bits)
*/
static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks)
{
- ws->md.nr_bits = nr_blocks;
- ws->md.root = INVALID_WRITESET_ROOT;
ws->bits = vzalloc(bitset_size(nr_blocks));
if (!ws->bits) {
DMERR("%s: couldn't allocate in memory bitset", __func__);
@@ -85,12 +83,14 @@ static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks)
/*
* Wipes the in-core bitset, and creates a new on disk bitset.
*/
-static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws)
+static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws,
+ dm_block_t nr_blocks)
{
int r;
- memset(ws->bits, 0, bitset_size(ws->md.nr_bits));
+ memset(ws->bits, 0, bitset_size(nr_blocks));
+ ws->md.nr_bits = nr_blocks;
r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root);
if (r) {
DMERR("%s: setup_on_disk_bitset failed", __func__);
@@ -579,6 +579,7 @@ static int open_metadata(struct era_metadata *md)
md->nr_blocks = le32_to_cpu(disk->nr_blocks);
md->current_era = le32_to_cpu(disk->current_era);
+ ws_unpack(&disk->current_writeset, &md->current_writeset->md);
md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root);
md->era_array_root = le64_to_cpu(disk->era_array_root);
md->metadata_snap = le64_to_cpu(disk->metadata_snap);
@@ -870,7 +871,6 @@ static int metadata_era_archive(struct era_metadata *md)
}
ws_pack(&md->current_writeset->md, &value);
- md->current_writeset->md.root = INVALID_WRITESET_ROOT;
keys[0] = md->current_era;
__dm_bless_for_disk(&value);
@@ -882,6 +882,7 @@ static int metadata_era_archive(struct era_metadata *md)
return r;
}
+ md->current_writeset->md.root = INVALID_WRITESET_ROOT;
md->archived_writesets = true;
return 0;
@@ -898,7 +899,7 @@ static int metadata_new_era(struct era_metadata *md)
int r;
struct writeset *new_writeset = next_writeset(md);
- r = writeset_init(&md->bitset_info, new_writeset);
+ r = writeset_init(&md->bitset_info, new_writeset, md->nr_blocks);
if (r) {
DMERR("%s: writeset_init failed", __func__);
return r;
@@ -951,7 +952,7 @@ static int metadata_commit(struct era_metadata *md)
int r;
struct dm_block *sblock;
- if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) {
+ if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) {
r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root,
&md->current_writeset->md.root);
if (r) {
@@ -1565,7 +1566,7 @@ static int era_preresume(struct dm_target *ti)
start_worker(era);
- r = in_worker0(era, metadata_new_era);
+ r = in_worker0(era, metadata_era_rollover);
if (r) {
DMERR("%s: metadata_era_rollover failed", __func__);
return r;
--
2.11.0
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [dm-devel] [PATCH 2/2] dm era: Update in-core bitset after committing the metadata
2021-01-22 15:19 [dm-devel] [PATCH 0/2] dm era: Fix bugs that lead to lost writes after crash Nikos Tsironis
2021-01-22 15:19 ` [dm-devel] [PATCH 1/2] dm era: Recover committed writeset " Nikos Tsironis
@ 2021-01-22 15:19 ` Nikos Tsironis
2021-02-09 14:23 ` [dm-devel] [PATCH 0/2] dm era: Fix bugs that lead to lost writes after crash Nikos Tsironis
2 siblings, 0 replies; 4+ messages in thread
From: Nikos Tsironis @ 2021-01-22 15:19 UTC (permalink / raw)
To: snitzer, agk, dm-devel; +Cc: ejt, ntsironis
In case of a system crash, dm-era might fail to mark blocks as written
in its metadata, although the corresponding writes to these blocks were
passed down to the origin device and completed successfully.
Consider the following sequence of events:
1. We write to a block that has not been yet written in the current era
2. era_map() checks the in-core bitmap for the current era and sees
that the block is not marked as written.
3. The write is deferred for submission after the metadata have been
updated and committed.
4. The worker thread processes the deferred write
(process_deferred_bios()) and marks the block as written in the
in-core bitmap, **before** committing the metadata.
5. The worker thread starts committing the metadata.
6. We do more writes that map to the same block as the write of step (1)
7. era_map() checks the in-core bitmap and sees that the block is marked
as written, **although the metadata have not been committed yet**.
8. These writes are passed down to the origin device immediately and the
device reports them as completed.
9. The system crashes, e.g., power failure, before the commit from step
(5) finishes.
When the system recovers and we query the dm-era target for the list of
written blocks it doesn't report the aforementioned block as written,
although the writes of step (6) completed successfully.
The issue is that era_map() decides whether to defer or not a write
based on non committed information. The root cause of the bug is that we
update the in-core bitmap, **before** committing the metadata.
Fix this by updating the in-core bitmap **after** successfully
committing the metadata.
Fixes: eec40579d84873 ("dm: add era target")
Cc: stable@vger.kernel.org # v3.15+
Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
---
drivers/md/dm-era-target.c | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 854b1be8b452..62f679faf9e7 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -134,7 +134,7 @@ static int writeset_test_and_set(struct dm_disk_bitset *info,
{
int r;
- if (!test_and_set_bit(block, ws->bits)) {
+ if (!test_bit(block, ws->bits)) {
r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root);
if (r) {
/* FIXME: fail mode */
@@ -1226,8 +1226,10 @@ static void process_deferred_bios(struct era *era)
int r;
struct bio_list deferred_bios, marked_bios;
struct bio *bio;
+ struct blk_plug plug;
bool commit_needed = false;
bool failed = false;
+ struct writeset *ws = era->md->current_writeset;
bio_list_init(&deferred_bios);
bio_list_init(&marked_bios);
@@ -1237,9 +1239,11 @@ static void process_deferred_bios(struct era *era)
bio_list_init(&era->deferred_bios);
spin_unlock(&era->deferred_lock);
+ if (bio_list_empty(&deferred_bios))
+ return;
+
while ((bio = bio_list_pop(&deferred_bios))) {
- r = writeset_test_and_set(&era->md->bitset_info,
- era->md->current_writeset,
+ r = writeset_test_and_set(&era->md->bitset_info, ws,
get_block(era, bio));
if (r < 0) {
/*
@@ -1247,7 +1251,6 @@ static void process_deferred_bios(struct era *era)
* FIXME: finish.
*/
failed = true;
-
} else if (r == 0)
commit_needed = true;
@@ -1263,9 +1266,19 @@ static void process_deferred_bios(struct era *era)
if (failed)
while ((bio = bio_list_pop(&marked_bios)))
bio_io_error(bio);
- else
- while ((bio = bio_list_pop(&marked_bios)))
+ else {
+ blk_start_plug(&plug);
+ while ((bio = bio_list_pop(&marked_bios))) {
+ /*
+ * Only update the in-core writeset if the on-disk one
+ * was updated too.
+ */
+ if (commit_needed)
+ set_bit(get_block(era, bio), ws->bits);
submit_bio_noacct(bio);
+ }
+ blk_finish_plug(&plug);
+ }
}
static void process_rpc_calls(struct era *era)
--
2.11.0
--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply related [flat|nested] 4+ messages in thread