All of lore.kernel.org
 help / color / mirror / Atom feed
* FAILED: patch "[PATCH] dm writecache: fix incorrect flush sequence when doing SSD" failed to apply to 4.19-stable tree
@ 2020-02-07 10:34 gregkh
  2020-02-07 10:54 ` Mikulas Patocka
  0 siblings, 1 reply; 3+ messages in thread
From: gregkh @ 2020-02-07 10:34 UTC (permalink / raw)
  To: mpatocka, snitzer; +Cc: stable


The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable@vger.kernel.org>.

thanks,

greg k-h

------------------ original commit in Linus's tree ------------------

From aa9509209c5ac2f0b35d01a922bf9ae072d0c2fc Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 8 Jan 2020 10:46:05 -0500
Subject: [PATCH] dm writecache: fix incorrect flush sequence when doing SSD
 mode commit

When committing state, the function writecache_flush does the following:
1. write metadata (writecache_commit_flushed)
2. flush disk cache (writecache_commit_flushed)
3. wait for data writes to complete (writecache_wait_for_ios)
4. increase superblock seq_count
5. write the superblock
6. flush disk cache

It may happen that at step 3, when we wait for some write to finish, the
disk may report the write as finished, but the write only hit the disk
cache and it is not yet stored in persistent storage. At step 5 we write
the superblock - it may happen that the superblock is written before the
write that we waited for in step 3. If the machine crashes, it may result
in incorrect data being returned after reboot.

In order to fix the bug, we must swap steps 2 and 3 in the above sequence,
so that we first wait for writes to complete and then flush the disk
cache.

Fixes: 48debafe4f2f ("dm: add writecache target")
Cc: stable@vger.kernel.org # 4.18+
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 7d727a72aa13..9b0a3bf6a4a1 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -442,7 +442,13 @@ static void writecache_notify_io(unsigned long error, void *context)
 		complete(&endio->c);
 }
 
-static void ssd_commit_flushed(struct dm_writecache *wc)
+static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
+{
+	wait_event(wc->bio_in_progress_wait[direction],
+		   !atomic_read(&wc->bio_in_progress[direction]));
+}
+
+static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
 {
 	struct dm_io_region region;
 	struct dm_io_request req;
@@ -488,17 +494,20 @@ static void ssd_commit_flushed(struct dm_writecache *wc)
 	writecache_notify_io(0, &endio);
 	wait_for_completion_io(&endio.c);
 
+	if (wait_for_ios)
+		writecache_wait_for_ios(wc, WRITE);
+
 	writecache_disk_flush(wc, wc->ssd_dev);
 
 	memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size);
 }
 
-static void writecache_commit_flushed(struct dm_writecache *wc)
+static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
 {
 	if (WC_MODE_PMEM(wc))
 		wmb();
 	else
-		ssd_commit_flushed(wc);
+		ssd_commit_flushed(wc, wait_for_ios);
 }
 
 static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
@@ -522,12 +531,6 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
 		writecache_error(wc, r, "error flushing metadata: %d", r);
 }
 
-static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
-{
-	wait_event(wc->bio_in_progress_wait[direction],
-		   !atomic_read(&wc->bio_in_progress[direction]));
-}
-
 #define WFE_RETURN_FOLLOWING	1
 #define WFE_LOWEST_SEQ		2
 
@@ -724,15 +727,12 @@ static void writecache_flush(struct dm_writecache *wc)
 		e = e2;
 		cond_resched();
 	}
-	writecache_commit_flushed(wc);
-
-	if (!WC_MODE_PMEM(wc))
-		writecache_wait_for_ios(wc, WRITE);
+	writecache_commit_flushed(wc, true);
 
 	wc->seq_count++;
 	pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
 	writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count);
-	writecache_commit_flushed(wc);
+	writecache_commit_flushed(wc, false);
 
 	wc->overwrote_committed = false;
 
@@ -756,7 +756,7 @@ static void writecache_flush(struct dm_writecache *wc)
 	}
 
 	if (need_flush_after_free)
-		writecache_commit_flushed(wc);
+		writecache_commit_flushed(wc, false);
 }
 
 static void writecache_flush_work(struct work_struct *work)
@@ -809,7 +809,7 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_
 	}
 
 	if (discarded_something)
-		writecache_commit_flushed(wc);
+		writecache_commit_flushed(wc, false);
 }
 
 static bool writecache_wait_for_writeback(struct dm_writecache *wc)
@@ -958,7 +958,7 @@ static void writecache_resume(struct dm_target *ti)
 
 	if (need_flush) {
 		writecache_flush_all_metadata(wc);
-		writecache_commit_flushed(wc);
+		writecache_commit_flushed(wc, false);
 	}
 
 	wc_unlock(wc);
@@ -1342,7 +1342,7 @@ static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *
 			wc->writeback_size--;
 			n_walked++;
 			if (unlikely(n_walked >= ENDIO_LATENCY)) {
-				writecache_commit_flushed(wc);
+				writecache_commit_flushed(wc, false);
 				wc_unlock(wc);
 				wc_lock(wc);
 				n_walked = 0;
@@ -1423,7 +1423,7 @@ static int writecache_endio_thread(void *data)
 			writecache_wait_for_ios(wc, READ);
 		}
 
-		writecache_commit_flushed(wc);
+		writecache_commit_flushed(wc, false);
 
 		wc_unlock(wc);
 	}
@@ -1766,10 +1766,10 @@ static int init_memory(struct dm_writecache *wc)
 		write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);
 
 	writecache_flush_all_metadata(wc);
-	writecache_commit_flushed(wc);
+	writecache_commit_flushed(wc, false);
 	pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
 	writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic);
-	writecache_commit_flushed(wc);
+	writecache_commit_flushed(wc, false);
 
 	return 0;
 }


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: FAILED: patch "[PATCH] dm writecache: fix incorrect flush sequence when doing SSD" failed to apply to 4.19-stable tree
  2020-02-07 10:34 FAILED: patch "[PATCH] dm writecache: fix incorrect flush sequence when doing SSD" failed to apply to 4.19-stable tree gregkh
@ 2020-02-07 10:54 ` Mikulas Patocka
  2020-02-07 12:12   ` Greg KH
  0 siblings, 1 reply; 3+ messages in thread
From: Mikulas Patocka @ 2020-02-07 10:54 UTC (permalink / raw)
  To: gregkh; +Cc: snitzer, stable



On Fri, 7 Feb 2020, gregkh@linuxfoundation.org wrote:

> 
> The patch below does not apply to the 4.19-stable tree.
> If someone wants it applied there, or to any other stable or longterm
> tree, then please email the backport, including the original git commit
> id to <stable@vger.kernel.org>.
> 
> thanks,
> 
> greg k-h

Hi

Here I'm sending updated patch for 4.19.

Mikulas
 

commit aa9509209c5ac2f0b35d01a922bf9ae072d0c2fc
Author: Mikulas Patocka <mpatocka@redhat.com>
Date:   Wed Jan 8 10:46:05 2020 -0500

    dm writecache: fix incorrect flush sequence when doing SSD mode commit
    
    When committing state, the function writecache_flush does the following:
    1. write metadata (writecache_commit_flushed)
    2. flush disk cache (writecache_commit_flushed)
    3. wait for data writes to complete (writecache_wait_for_ios)
    4. increase superblock seq_count
    5. write the superblock
    6. flush disk cache
    
    It may happen that at step 3, when we wait for some write to finish, the
    disk may report the write as finished, but the write only hit the disk
    cache and it is not yet stored in persistent storage. At step 5 we write
    the superblock - it may happen that the superblock is written before the
    write that we waited for in step 3. If the machine crashes, it may result
    in incorrect data being returned after reboot.
    
    In order to fix the bug, we must swap steps 2 and 3 in the above sequence,
    so that we first wait for writes to complete and then flush the disk
    cache.
    
    Fixes: 48debafe4f2f ("dm: add writecache target")
    Cc: stable@vger.kernel.org # 4.18+
    Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
    Signed-off-by: Mike Snitzer <snitzer@redhat.com>

---
 drivers/md/dm-writecache.c |   41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

Index: linux-stable/drivers/md/dm-writecache.c
===================================================================
--- linux-stable.orig/drivers/md/dm-writecache.c	2020-02-07 11:49:46.000000000 +0100
+++ linux-stable/drivers/md/dm-writecache.c	2020-02-07 11:50:11.000000000 +0100
@@ -447,7 +447,13 @@ static void writecache_notify_io(unsigne
 		complete(&endio->c);
 }
 
-static void ssd_commit_flushed(struct dm_writecache *wc)
+static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
+{
+	wait_event(wc->bio_in_progress_wait[direction],
+		   !atomic_read(&wc->bio_in_progress[direction]));
+}
+
+static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
 {
 	struct dm_io_region region;
 	struct dm_io_request req;
@@ -493,17 +499,20 @@ static void ssd_commit_flushed(struct dm
 	writecache_notify_io(0, &endio);
 	wait_for_completion_io(&endio.c);
 
+	if (wait_for_ios)
+		writecache_wait_for_ios(wc, WRITE);
+
 	writecache_disk_flush(wc, wc->ssd_dev);
 
 	memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size);
 }
 
-static void writecache_commit_flushed(struct dm_writecache *wc)
+static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
 {
 	if (WC_MODE_PMEM(wc))
 		wmb();
 	else
-		ssd_commit_flushed(wc);
+		ssd_commit_flushed(wc, wait_for_ios);
 }
 
 static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
@@ -527,12 +536,6 @@ static void writecache_disk_flush(struct
 		writecache_error(wc, r, "error flushing metadata: %d", r);
 }
 
-static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
-{
-	wait_event(wc->bio_in_progress_wait[direction],
-		   !atomic_read(&wc->bio_in_progress[direction]));
-}
-
 #define WFE_RETURN_FOLLOWING	1
 #define WFE_LOWEST_SEQ		2
 
@@ -730,14 +733,12 @@ static void writecache_flush(struct dm_w
 		e = e2;
 		cond_resched();
 	}
-	writecache_commit_flushed(wc);
-
-	writecache_wait_for_ios(wc, WRITE);
+	writecache_commit_flushed(wc, true);
 
 	wc->seq_count++;
 	pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
 	writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count);
-	writecache_commit_flushed(wc);
+	writecache_commit_flushed(wc, false);
 
 	wc->overwrote_committed = false;
 
@@ -761,7 +762,7 @@ static void writecache_flush(struct dm_w
 	}
 
 	if (need_flush_after_free)
-		writecache_commit_flushed(wc);
+		writecache_commit_flushed(wc, false);
 }
 
 static void writecache_flush_work(struct work_struct *work)
@@ -814,7 +815,7 @@ static void writecache_discard(struct dm
 	}
 
 	if (discarded_something)
-		writecache_commit_flushed(wc);
+		writecache_commit_flushed(wc, false);
 }
 
 static bool writecache_wait_for_writeback(struct dm_writecache *wc)
@@ -963,7 +964,7 @@ erase_this:
 
 	if (need_flush) {
 		writecache_flush_all_metadata(wc);
-		writecache_commit_flushed(wc);
+		writecache_commit_flushed(wc, false);
 	}
 
 	wc_unlock(wc);
@@ -1347,7 +1348,7 @@ static void __writecache_endio_pmem(stru
 			wc->writeback_size--;
 			n_walked++;
 			if (unlikely(n_walked >= ENDIO_LATENCY)) {
-				writecache_commit_flushed(wc);
+				writecache_commit_flushed(wc, false);
 				wc_unlock(wc);
 				wc_lock(wc);
 				n_walked = 0;
@@ -1428,7 +1429,7 @@ pop_from_list:
 			writecache_wait_for_ios(wc, READ);
 		}
 
-		writecache_commit_flushed(wc);
+		writecache_commit_flushed(wc, false);
 
 		wc_unlock(wc);
 	}
@@ -1759,10 +1760,10 @@ static int init_memory(struct dm_writeca
 		write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);
 
 	writecache_flush_all_metadata(wc);
-	writecache_commit_flushed(wc);
+	writecache_commit_flushed(wc, false);
 	pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
 	writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic);
-	writecache_commit_flushed(wc);
+	writecache_commit_flushed(wc, false);
 
 	return 0;
 }


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: FAILED: patch "[PATCH] dm writecache: fix incorrect flush sequence when doing SSD" failed to apply to 4.19-stable tree
  2020-02-07 10:54 ` Mikulas Patocka
@ 2020-02-07 12:12   ` Greg KH
  0 siblings, 0 replies; 3+ messages in thread
From: Greg KH @ 2020-02-07 12:12 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: snitzer, stable

On Fri, Feb 07, 2020 at 05:54:29AM -0500, Mikulas Patocka wrote:
> 
> 
> On Fri, 7 Feb 2020, gregkh@linuxfoundation.org wrote:
> 
> > 
> > The patch below does not apply to the 4.19-stable tree.
> > If someone wants it applied there, or to any other stable or longterm
> > tree, then please email the backport, including the original git commit
> > id to <stable@vger.kernel.org>.
> > 
> > thanks,
> > 
> > greg k-h
> 
> Hi
> 
> Here I'm sending updated patch for 4.19.

Thanks, now queued up.

greg k-h

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-02-07 12:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-07 10:34 FAILED: patch "[PATCH] dm writecache: fix incorrect flush sequence when doing SSD" failed to apply to 4.19-stable tree gregkh
2020-02-07 10:54 ` Mikulas Patocka
2020-02-07 12:12   ` Greg KH

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.