All of lore.kernel.org
 help / color / mirror / Atom feed
* [dm-devel] [PATCH] dm-integrity: Fix flush with external metadata device
@ 2020-12-20 13:02 Lukas Straub
  2020-12-29  9:47 ` Lukas Straub
  2021-01-04 20:30 ` [dm-devel] " Mike Snitzer
  0 siblings, 2 replies; 8+ messages in thread
From: Lukas Straub @ 2020-12-20 13:02 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka, Mike Snitzer, Alasdair Kergon


[-- Attachment #1.1: Type: text/plain, Size: 1481 bytes --]

With an external metadata device, flush requests aren't passed down
to the data device.

Fix this by issuing flush in the right places: In integrity_commit
when not in journal mode, in do_journal_write after writing the
contents of the journal to the disk and in dm_integrity_postsuspend.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 drivers/md/dm-integrity.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 5a7a1b90e671..a26ed65869f6 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2196,6 +2196,8 @@ static void integrity_commit(struct work_struct *w)
 	if (unlikely(ic->mode != 'J')) {
 		spin_unlock_irq(&ic->endio_wait.lock);
 		dm_integrity_flush_buffers(ic);
+		if (ic->meta_dev)
+			blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
 		goto release_flush_bios;
 	}
 
@@ -2410,6 +2412,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
 	wait_for_completion_io(&comp.comp);
 
 	dm_integrity_flush_buffers(ic);
+	if (ic->meta_dev)
+		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
+
 }
 
 static void integrity_writer(struct work_struct *w)
@@ -2949,6 +2954,9 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
 #endif
 	}
 
+	if (ic->meta_dev)
+		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
+
 	BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
 
 	ic->journal_uptodate = true;
-- 
2.20.1

[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

[-- Attachment #2: Type: text/plain, Size: 93 bytes --]

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [dm-devel] [PATCH] dm-integrity: Fix flush with external metadata device
  2020-12-20 13:02 [dm-devel] [PATCH] dm-integrity: Fix flush with external metadata device Lukas Straub
@ 2020-12-29  9:47 ` Lukas Straub
  2021-01-04 20:30 ` [dm-devel] " Mike Snitzer
  1 sibling, 0 replies; 8+ messages in thread
From: Lukas Straub @ 2020-12-29  9:47 UTC (permalink / raw)
  To: dm-devel; +Cc: Mikulas Patocka, Mike Snitzer, Alasdair Kergon


[-- Attachment #1.1: Type: text/plain, Size: 1659 bytes --]

On Sun, 20 Dec 2020 14:02:22 +0100
Lukas Straub <lukasstraub2@web.de> wrote:

> With an external metadata device, flush requests aren't passed down
> to the data device.
> 
> Fix this by issuing flush in the right places: In integrity_commit
> when not in journal mode, in do_journal_write after writing the
> contents of the journal to the disk and in dm_integrity_postsuspend.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>  drivers/md/dm-integrity.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> index 5a7a1b90e671..a26ed65869f6 100644
> --- a/drivers/md/dm-integrity.c
> +++ b/drivers/md/dm-integrity.c
> @@ -2196,6 +2196,8 @@ static void integrity_commit(struct work_struct *w)
>  	if (unlikely(ic->mode != 'J')) {
>  		spin_unlock_irq(&ic->endio_wait.lock);
>  		dm_integrity_flush_buffers(ic);
> +		if (ic->meta_dev)
> +			blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
>  		goto release_flush_bios;
>  	}
>  
> @@ -2410,6 +2412,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
>  	wait_for_completion_io(&comp.comp);
>  
>  	dm_integrity_flush_buffers(ic);
> +	if (ic->meta_dev)
> +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> +
>  }
>  
>  static void integrity_writer(struct work_struct *w)
> @@ -2949,6 +2954,9 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
>  #endif
>  	}
>  
> +	if (ic->meta_dev)
> +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> +
>  	BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
>  
>  	ic->journal_uptodate = true;

Ping...

-- 


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

[-- Attachment #2: Type: text/plain, Size: 93 bytes --]

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dm-devel] dm-integrity: Fix flush with external metadata device
  2020-12-20 13:02 [dm-devel] [PATCH] dm-integrity: Fix flush with external metadata device Lukas Straub
  2020-12-29  9:47 ` Lukas Straub
@ 2021-01-04 20:30 ` Mike Snitzer
  2021-01-08 16:12   ` Mikulas Patocka
  1 sibling, 1 reply; 8+ messages in thread
From: Mike Snitzer @ 2021-01-04 20:30 UTC (permalink / raw)
  To: Lukas Straub; +Cc: dm-devel, Mikulas Patocka

On Sun, Dec 20 2020 at  8:02am -0500,
Lukas Straub <lukasstraub2@web.de> wrote:

> With an external metadata device, flush requests aren't passed down
> to the data device.
> 
> Fix this by issuing flush in the right places: In integrity_commit
> when not in journal mode, in do_journal_write after writing the
> contents of the journal to the disk and in dm_integrity_postsuspend.
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
>  drivers/md/dm-integrity.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> index 5a7a1b90e671..a26ed65869f6 100644
> --- a/drivers/md/dm-integrity.c
> +++ b/drivers/md/dm-integrity.c
> @@ -2196,6 +2196,8 @@ static void integrity_commit(struct work_struct *w)
>  	if (unlikely(ic->mode != 'J')) {
>  		spin_unlock_irq(&ic->endio_wait.lock);
>  		dm_integrity_flush_buffers(ic);
> +		if (ic->meta_dev)
> +			blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
>  		goto release_flush_bios;
>  	}
>  
> @@ -2410,6 +2412,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
>  	wait_for_completion_io(&comp.comp);
>  
>  	dm_integrity_flush_buffers(ic);
> +	if (ic->meta_dev)
> +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> +
>  }
>  
>  static void integrity_writer(struct work_struct *w)
> @@ -2949,6 +2954,9 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
>  #endif
>  	}
>  
> +	if (ic->meta_dev)
> +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> +
>  	BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
>  
>  	ic->journal_uptodate = true;
> -- 
> 2.20.1


Seems like a pretty bad oversight... but shouldn't you also make sure to
flush the data device _before_ the metadata is flushed?

Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dm-devel] dm-integrity: Fix flush with external metadata device
  2021-01-04 20:30 ` [dm-devel] " Mike Snitzer
@ 2021-01-08 16:12   ` Mikulas Patocka
  2021-01-08 16:15     ` [dm-devel] [PATCH] " Mikulas Patocka
  2021-01-08 18:38     ` [dm-devel] " Mike Snitzer
  0 siblings, 2 replies; 8+ messages in thread
From: Mikulas Patocka @ 2021-01-08 16:12 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, Lukas Straub



On Mon, 4 Jan 2021, Mike Snitzer wrote:

> On Sun, Dec 20 2020 at  8:02am -0500,
> Lukas Straub <lukasstraub2@web.de> wrote:
> 
> > With an external metadata device, flush requests aren't passed down
> > to the data device.
> > 
> > Fix this by issuing flush in the right places: In integrity_commit
> > when not in journal mode, in do_journal_write after writing the
> > contents of the journal to the disk and in dm_integrity_postsuspend.
> > 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > ---
> >  drivers/md/dm-integrity.c | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index 5a7a1b90e671..a26ed65869f6 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -2196,6 +2196,8 @@ static void integrity_commit(struct work_struct *w)
> >  	if (unlikely(ic->mode != 'J')) {
> >  		spin_unlock_irq(&ic->endio_wait.lock);
> >  		dm_integrity_flush_buffers(ic);
> > +		if (ic->meta_dev)
> > +			blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> >  		goto release_flush_bios;
> >  	}
> >  
> > @@ -2410,6 +2412,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
> >  	wait_for_completion_io(&comp.comp);
> >  
> >  	dm_integrity_flush_buffers(ic);
> > +	if (ic->meta_dev)
> > +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> > +
> >  }
> >  
> >  static void integrity_writer(struct work_struct *w)
> > @@ -2949,6 +2954,9 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
> >  #endif
> >  	}
> >  
> > +	if (ic->meta_dev)
> > +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> > +
> >  	BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
> >  
> >  	ic->journal_uptodate = true;
> > -- 
> > 2.20.1
> 
> 
> Seems like a pretty bad oversight... but shouldn't you also make sure to
> flush the data device _before_ the metadata is flushed?
> 
> Mike

I think, ordering is not a problem.

A disk may flush its cache spontaneously anytime, so it doesn't matter in 
which order do we flush them. Similarly a dm-bufio buffer may be flushed 
anytime - if the machine is running out of memory and a dm-bufio shrinker 
is called.

I'll send another patch for this - I've created a patch that flushes the 
metadata device cache and data device cache in parallel, so that 
performance degradation is reduced.

My patch also doesn't use GFP_NOIO allocation - which can in theory 
deadlock if we are swapping on dm-integrity device.

Mikulas

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [dm-devel] [PATCH] dm-integrity: Fix flush with external metadata device
  2021-01-08 16:12   ` Mikulas Patocka
@ 2021-01-08 16:15     ` Mikulas Patocka
  2021-01-10 10:04       ` Lukas Straub
  2021-01-08 18:38     ` [dm-devel] " Mike Snitzer
  1 sibling, 1 reply; 8+ messages in thread
From: Mikulas Patocka @ 2021-01-08 16:15 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, Lukas Straub

With external metadata device, flush requests are not passed down to the
data device.

Fix this by submitting the flush request in dm_integrity_flush_buffers. In
order to not degrade performance, we overlap the data device flush with
the metadata device flush.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reported-by: Lukas Straub <lukasstraub2@web.de>
Cc: stable@vger.kernel.org

---
 drivers/md/dm-bufio.c     |    6 ++++
 drivers/md/dm-integrity.c |   60 +++++++++++++++++++++++++++++++++++++---------
 include/linux/dm-bufio.h  |    1 
 3 files changed, 56 insertions(+), 11 deletions(-)

Index: linux-2.6/drivers/md/dm-integrity.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-integrity.c	2021-01-07 17:22:39.000000000 +0100
+++ linux-2.6/drivers/md/dm-integrity.c	2021-01-08 15:51:19.000000000 +0100
@@ -1379,12 +1379,52 @@ thorough_test:
 #undef MAY_BE_HASH
 }
 
-static void dm_integrity_flush_buffers(struct dm_integrity_c *ic)
+struct flush_request {
+	struct dm_io_request io_req;
+	struct dm_io_region io_reg;
+	struct dm_integrity_c *ic;
+	struct completion comp;
+};
+
+static void flush_notify(unsigned long error, void *fr_)
+{
+	struct flush_request *fr = fr_;
+	if (unlikely(error != 0))
+		dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO);
+	complete(&fr->comp);
+}
+
+static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data)
 {
 	int r;
+
+	struct flush_request fr;
+
+	if (!ic->meta_dev)
+		flush_data = false;
+	if (flush_data) {
+		fr.io_req.bi_op = REQ_OP_WRITE,
+		fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
+		fr.io_req.mem.type = DM_IO_KMEM,
+		fr.io_req.mem.ptr.addr = NULL,
+		fr.io_req.notify.fn = flush_notify,
+		fr.io_req.notify.context = &fr;
+		fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio),
+		fr.io_reg.bdev = ic->dev->bdev,
+		fr.io_reg.sector = 0,
+		fr.io_reg.count = 0,
+		fr.ic = ic;
+		init_completion(&fr.comp);
+		r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL);
+		BUG_ON(r);
+	}
+
 	r = dm_bufio_write_dirty_buffers(ic->bufio);
 	if (unlikely(r))
 		dm_integrity_io_error(ic, "writing tags", r);
+
+	if (flush_data)
+		wait_for_completion(&fr.comp);
 }
 
 static void sleep_on_endio_wait(struct dm_integrity_c *ic)
@@ -2110,7 +2150,7 @@ offload_to_thread:
 
 	if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) {
 		integrity_metadata(&dio->work);
-		dm_integrity_flush_buffers(ic);
+		dm_integrity_flush_buffers(ic, false);
 
 		dio->in_flight = (atomic_t)ATOMIC_INIT(1);
 		dio->completion = NULL;
@@ -2195,7 +2235,7 @@ static void integrity_commit(struct work
 	flushes = bio_list_get(&ic->flush_bio_list);
 	if (unlikely(ic->mode != 'J')) {
 		spin_unlock_irq(&ic->endio_wait.lock);
-		dm_integrity_flush_buffers(ic);
+		dm_integrity_flush_buffers(ic, true);
 		goto release_flush_bios;
 	}
 
@@ -2409,7 +2449,7 @@ skip_io:
 	complete_journal_op(&comp);
 	wait_for_completion_io(&comp.comp);
 
-	dm_integrity_flush_buffers(ic);
+	dm_integrity_flush_buffers(ic, true);
 }
 
 static void integrity_writer(struct work_struct *w)
@@ -2451,7 +2491,7 @@ static void recalc_write_super(struct dm
 {
 	int r;
 
-	dm_integrity_flush_buffers(ic);
+	dm_integrity_flush_buffers(ic, false);
 	if (dm_integrity_failed(ic))
 		return;
 
@@ -2654,7 +2694,7 @@ static void bitmap_flush_work(struct wor
 	unsigned long limit;
 	struct bio *bio;
 
-	dm_integrity_flush_buffers(ic);
+	dm_integrity_flush_buffers(ic, false);
 
 	range.logical_sector = 0;
 	range.n_sectors = ic->provided_data_sectors;
@@ -2663,9 +2703,7 @@ static void bitmap_flush_work(struct wor
 	add_new_range_and_wait(ic, &range);
 	spin_unlock_irq(&ic->endio_wait.lock);
 
-	dm_integrity_flush_buffers(ic);
-	if (ic->meta_dev)
-		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
+	dm_integrity_flush_buffers(ic, true);
 
 	limit = ic->provided_data_sectors;
 	if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
@@ -2934,11 +2972,11 @@ static void dm_integrity_postsuspend(str
 		if (ic->meta_dev)
 			queue_work(ic->writer_wq, &ic->writer_work);
 		drain_workqueue(ic->writer_wq);
-		dm_integrity_flush_buffers(ic);
+		dm_integrity_flush_buffers(ic, true);
 	}
 
 	if (ic->mode == 'B') {
-		dm_integrity_flush_buffers(ic);
+		dm_integrity_flush_buffers(ic, true);
 #if 1
 		/* set to 0 to test bitmap replay code */
 		init_journal(ic, 0, ic->journal_sections, 0);
Index: linux-2.6/include/linux/dm-bufio.h
===================================================================
--- linux-2.6.orig/include/linux/dm-bufio.h	2020-09-05 10:01:42.000000000 +0200
+++ linux-2.6/include/linux/dm-bufio.h	2021-01-08 15:12:31.000000000 +0100
@@ -150,6 +150,7 @@ void dm_bufio_set_minimum_buffers(struct
 
 unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
 sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
+struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c);
 sector_t dm_bufio_get_block_number(struct dm_buffer *b);
 void *dm_bufio_get_block_data(struct dm_buffer *b);
 void *dm_bufio_get_aux_data(struct dm_buffer *b);
Index: linux-2.6/drivers/md/dm-bufio.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-bufio.c	2021-01-08 15:11:20.000000000 +0100
+++ linux-2.6/drivers/md/dm-bufio.c	2021-01-08 15:12:25.000000000 +0100
@@ -1534,6 +1534,12 @@ sector_t dm_bufio_get_device_size(struct
 }
 EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
 
+struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c)
+{
+	return c->dm_io;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client);
+
 sector_t dm_bufio_get_block_number(struct dm_buffer *b)
 {
 	return b->block;

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dm-devel] dm-integrity: Fix flush with external metadata device
  2021-01-08 16:12   ` Mikulas Patocka
  2021-01-08 16:15     ` [dm-devel] [PATCH] " Mikulas Patocka
@ 2021-01-08 18:38     ` Mike Snitzer
  2021-01-08 19:00       ` Mikulas Patocka
  1 sibling, 1 reply; 8+ messages in thread
From: Mike Snitzer @ 2021-01-08 18:38 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Lukas Straub

On Fri, Jan 08 2021 at 11:12am -0500,
Mikulas Patocka <mpatocka@redhat.com> wrote:

> 
> 
> On Mon, 4 Jan 2021, Mike Snitzer wrote:
> 
> > On Sun, Dec 20 2020 at  8:02am -0500,
> > Lukas Straub <lukasstraub2@web.de> wrote:
> > 
> > > With an external metadata device, flush requests aren't passed down
> > > to the data device.
> > > 
> > > Fix this by issuing flush in the right places: In integrity_commit
> > > when not in journal mode, in do_journal_write after writing the
> > > contents of the journal to the disk and in dm_integrity_postsuspend.
> > > 
> > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > ---
> > >  drivers/md/dm-integrity.c | 8 ++++++++
> > >  1 file changed, 8 insertions(+)
> > > 
> > > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > > index 5a7a1b90e671..a26ed65869f6 100644
> > > --- a/drivers/md/dm-integrity.c
> > > +++ b/drivers/md/dm-integrity.c
> > > @@ -2196,6 +2196,8 @@ static void integrity_commit(struct work_struct *w)
> > >  	if (unlikely(ic->mode != 'J')) {
> > >  		spin_unlock_irq(&ic->endio_wait.lock);
> > >  		dm_integrity_flush_buffers(ic);
> > > +		if (ic->meta_dev)
> > > +			blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> > >  		goto release_flush_bios;
> > >  	}
> > >  
> > > @@ -2410,6 +2412,9 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
> > >  	wait_for_completion_io(&comp.comp);
> > >  
> > >  	dm_integrity_flush_buffers(ic);
> > > +	if (ic->meta_dev)
> > > +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> > > +
> > >  }
> > >  
> > >  static void integrity_writer(struct work_struct *w)
> > > @@ -2949,6 +2954,9 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
> > >  #endif
> > >  	}
> > >  
> > > +	if (ic->meta_dev)
> > > +		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> > > +
> > >  	BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
> > >  
> > >  	ic->journal_uptodate = true;
> > > -- 
> > > 2.20.1
> > 
> > 
> > Seems like a pretty bad oversight... but shouldn't you also make sure to
> > flush the data device _before_ the metadata is flushed?
> > 
> > Mike
> 
> I think, ordering is not a problem.
> 
> A disk may flush its cache spontaneously anytime, so it doesn't matter in 
> which order do we flush them. Similarly a dm-bufio buffer may be flushed 
> anytime - if the machine is running out of memory and a dm-bufio shrinker 
> is called.
> 
> I'll send another patch for this - I've created a patch that flushes the 
> metadata device cache and data device cache in parallel, so that 
> performance degradation is reduced.
> 
> My patch also doesn't use GFP_NOIO allocation - which can in theory 
> deadlock if we are swapping on dm-integrity device.

OK, I see your patch, but my concern about ordering was more to do with
crash consistency.  What if metadata is updated but data isn't?

On the surface, your approach of issuing the flushes in parallel seems
to expose us to inconsistency upon recovery from a crash.
If that isn't the case please explain why not.

Thanks,
Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dm-devel] dm-integrity: Fix flush with external metadata device
  2021-01-08 18:38     ` [dm-devel] " Mike Snitzer
@ 2021-01-08 19:00       ` Mikulas Patocka
  0 siblings, 0 replies; 8+ messages in thread
From: Mikulas Patocka @ 2021-01-08 19:00 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, Lukas Straub



On Fri, 8 Jan 2021, Mike Snitzer wrote:

> > > Seems like a pretty bad oversight... but shouldn't you also make sure to
> > > flush the data device _before_ the metadata is flushed?
> > > 
> > > Mike
> > 
> > I think, ordering is not a problem.
> > 
> > A disk may flush its cache spontaneously anytime, so it doesn't matter in 
> > which order do we flush them. Similarly a dm-bufio buffer may be flushed 
> > anytime - if the machine is running out of memory and a dm-bufio shrinker 
> > is called.
> > 
> > I'll send another patch for this - I've created a patch that flushes the 
> > metadata device cache and data device cache in parallel, so that 
> > performance degradation is reduced.
> > 
> > My patch also doesn't use GFP_NOIO allocation - which can in theory 
> > deadlock if we are swapping on dm-integrity device.
> 
> OK, I see your patch, but my concern about ordering was more to do with
> crash consistency.  What if metadata is updated but data isn't?

In journal mode: do_journal_write will copy data from the journal to 
appropriate places and then flushes both data and metadata device. If a 
crash happens during flush, the journal will be replayed on next reboot - 
the replay operation will overwrite both data and metadata from the 
journal.

In bitmap mode: bitmap_flush_work will first issue flush on both data and 
metadata device, and then clear the dirty bits in a bitmap. If a crash 
happens during the flush, the bits in the bitmap are still set and the 
checksums will be recalculated on next reboot.

> On the surface, your approach of issuing the flushes in parallel seems
> to expose us to inconsistency upon recovery from a crash.
> If that isn't the case please explain why not.

The disk may flush its internal cache anytime. So, if you do 
"blkdev_issue_flush(disk1); blkdev_issue_flush(disk2);" there is no 
ordering guarantee at all. The firmware in disk2 may flush the cache 
spontaneously, before you called blkdev_issue_flush(disk1).

> Thanks,
> Mike

Mikulas

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dm-devel] [PATCH] dm-integrity: Fix flush with external metadata device
  2021-01-08 16:15     ` [dm-devel] [PATCH] " Mikulas Patocka
@ 2021-01-10 10:04       ` Lukas Straub
  0 siblings, 0 replies; 8+ messages in thread
From: Lukas Straub @ 2021-01-10 10:04 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: dm-devel, Mike Snitzer


[-- Attachment #1.1: Type: text/plain, Size: 6411 bytes --]

On Fri, 8 Jan 2021 11:15:56 -0500 (EST)
Mikulas Patocka <mpatocka@redhat.com> wrote:

> With external metadata device, flush requests are not passed down to the
> data device.
> 
> Fix this by submitting the flush request in dm_integrity_flush_buffers. In
> order to not degrade performance, we overlap the data device flush with
> the metadata device flush.
> 
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> Reported-by: Lukas Straub <lukasstraub2@web.de>
> Cc: stable@vger.kernel.org

Looks good to me.
Reviewed-by: Lukas Straub <lukasstraub2@web.de>

Regards,
Lukas Straub

> ---
>  drivers/md/dm-bufio.c     |    6 ++++
>  drivers/md/dm-integrity.c |   60 +++++++++++++++++++++++++++++++++++++---------
>  include/linux/dm-bufio.h  |    1 
>  3 files changed, 56 insertions(+), 11 deletions(-)
> 
> Index: linux-2.6/drivers/md/dm-integrity.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-integrity.c	2021-01-07 17:22:39.000000000 +0100
> +++ linux-2.6/drivers/md/dm-integrity.c	2021-01-08 15:51:19.000000000 +0100
> @@ -1379,12 +1379,52 @@ thorough_test:
>  #undef MAY_BE_HASH
>  }
>  
> -static void dm_integrity_flush_buffers(struct dm_integrity_c *ic)
> +struct flush_request {
> +	struct dm_io_request io_req;
> +	struct dm_io_region io_reg;
> +	struct dm_integrity_c *ic;
> +	struct completion comp;
> +};
> +
> +static void flush_notify(unsigned long error, void *fr_)
> +{
> +	struct flush_request *fr = fr_;
> +	if (unlikely(error != 0))
> +		dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO);
> +	complete(&fr->comp);
> +}
> +
> +static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data)
>  {
>  	int r;
> +
> +	struct flush_request fr;
> +
> +	if (!ic->meta_dev)
> +		flush_data = false;
> +	if (flush_data) {
> +		fr.io_req.bi_op = REQ_OP_WRITE,
> +		fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
> +		fr.io_req.mem.type = DM_IO_KMEM,
> +		fr.io_req.mem.ptr.addr = NULL,
> +		fr.io_req.notify.fn = flush_notify,
> +		fr.io_req.notify.context = &fr;
> +		fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio),
> +		fr.io_reg.bdev = ic->dev->bdev,
> +		fr.io_reg.sector = 0,
> +		fr.io_reg.count = 0,
> +		fr.ic = ic;
> +		init_completion(&fr.comp);
> +		r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL);
> +		BUG_ON(r);
> +	}
> +
>  	r = dm_bufio_write_dirty_buffers(ic->bufio);
>  	if (unlikely(r))
>  		dm_integrity_io_error(ic, "writing tags", r);
> +
> +	if (flush_data)
> +		wait_for_completion(&fr.comp);
>  }
>  
>  static void sleep_on_endio_wait(struct dm_integrity_c *ic)
> @@ -2110,7 +2150,7 @@ offload_to_thread:
>  
>  	if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) {
>  		integrity_metadata(&dio->work);
> -		dm_integrity_flush_buffers(ic);
> +		dm_integrity_flush_buffers(ic, false);
>  
>  		dio->in_flight = (atomic_t)ATOMIC_INIT(1);
>  		dio->completion = NULL;
> @@ -2195,7 +2235,7 @@ static void integrity_commit(struct work
>  	flushes = bio_list_get(&ic->flush_bio_list);
>  	if (unlikely(ic->mode != 'J')) {
>  		spin_unlock_irq(&ic->endio_wait.lock);
> -		dm_integrity_flush_buffers(ic);
> +		dm_integrity_flush_buffers(ic, true);
>  		goto release_flush_bios;
>  	}
>  
> @@ -2409,7 +2449,7 @@ skip_io:
>  	complete_journal_op(&comp);
>  	wait_for_completion_io(&comp.comp);
>  
> -	dm_integrity_flush_buffers(ic);
> +	dm_integrity_flush_buffers(ic, true);
>  }
>  
>  static void integrity_writer(struct work_struct *w)
> @@ -2451,7 +2491,7 @@ static void recalc_write_super(struct dm
>  {
>  	int r;
>  
> -	dm_integrity_flush_buffers(ic);
> +	dm_integrity_flush_buffers(ic, false);
>  	if (dm_integrity_failed(ic))
>  		return;
>  
> @@ -2654,7 +2694,7 @@ static void bitmap_flush_work(struct wor
>  	unsigned long limit;
>  	struct bio *bio;
>  
> -	dm_integrity_flush_buffers(ic);
> +	dm_integrity_flush_buffers(ic, false);
>  
>  	range.logical_sector = 0;
>  	range.n_sectors = ic->provided_data_sectors;
> @@ -2663,9 +2703,7 @@ static void bitmap_flush_work(struct wor
>  	add_new_range_and_wait(ic, &range);
>  	spin_unlock_irq(&ic->endio_wait.lock);
>  
> -	dm_integrity_flush_buffers(ic);
> -	if (ic->meta_dev)
> -		blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
> +	dm_integrity_flush_buffers(ic, true);
>  
>  	limit = ic->provided_data_sectors;
>  	if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
> @@ -2934,11 +2972,11 @@ static void dm_integrity_postsuspend(str
>  		if (ic->meta_dev)
>  			queue_work(ic->writer_wq, &ic->writer_work);
>  		drain_workqueue(ic->writer_wq);
> -		dm_integrity_flush_buffers(ic);
> +		dm_integrity_flush_buffers(ic, true);
>  	}
>  
>  	if (ic->mode == 'B') {
> -		dm_integrity_flush_buffers(ic);
> +		dm_integrity_flush_buffers(ic, true);
>  #if 1
>  		/* set to 0 to test bitmap replay code */
>  		init_journal(ic, 0, ic->journal_sections, 0);
> Index: linux-2.6/include/linux/dm-bufio.h
> ===================================================================
> --- linux-2.6.orig/include/linux/dm-bufio.h	2020-09-05 10:01:42.000000000 +0200
> +++ linux-2.6/include/linux/dm-bufio.h	2021-01-08 15:12:31.000000000 +0100
> @@ -150,6 +150,7 @@ void dm_bufio_set_minimum_buffers(struct
>  
>  unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
>  sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
> +struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c);
>  sector_t dm_bufio_get_block_number(struct dm_buffer *b);
>  void *dm_bufio_get_block_data(struct dm_buffer *b);
>  void *dm_bufio_get_aux_data(struct dm_buffer *b);
> Index: linux-2.6/drivers/md/dm-bufio.c
> ===================================================================
> --- linux-2.6.orig/drivers/md/dm-bufio.c	2021-01-08 15:11:20.000000000 +0100
> +++ linux-2.6/drivers/md/dm-bufio.c	2021-01-08 15:12:25.000000000 +0100
> @@ -1534,6 +1534,12 @@ sector_t dm_bufio_get_device_size(struct
>  }
>  EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
>  
> +struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c)
> +{
> +	return c->dm_io;
> +}
> +EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client);
> +
>  sector_t dm_bufio_get_block_number(struct dm_buffer *b)
>  {
>  	return b->block;
> 



-- 


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

[-- Attachment #2: Type: text/plain, Size: 93 bytes --]

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-01-10 10:09 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-20 13:02 [dm-devel] [PATCH] dm-integrity: Fix flush with external metadata device Lukas Straub
2020-12-29  9:47 ` Lukas Straub
2021-01-04 20:30 ` [dm-devel] " Mike Snitzer
2021-01-08 16:12   ` Mikulas Patocka
2021-01-08 16:15     ` [dm-devel] [PATCH] " Mikulas Patocka
2021-01-10 10:04       ` Lukas Straub
2021-01-08 18:38     ` [dm-devel] " Mike Snitzer
2021-01-08 19:00       ` Mikulas Patocka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.