linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] dm-integrity: Prevent RMW for full metadata buffer writes
@ 2020-02-27 13:26 Lukas Straub
  2020-03-24 17:18 ` Mike Snitzer
  2020-03-30 16:34 ` [dm-devel] " Mikulas Patocka
  0 siblings, 2 replies; 8+ messages in thread
From: Lukas Straub @ 2020-02-27 13:26 UTC (permalink / raw)
  To: linux-kernel; +Cc: Alasdair Kergon, Mike Snitzer, dm-devel

If a full metadata buffer is being written, don't read it first. This
prevents a read-modify-write cycle and increases performance on HDDs
considerably.

To do this we now calculate the checksums for all sectors in the bio in one
go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
which now checks if we overwrite the whole buffer.

Benchmarking with a 5400RPM HDD with bitmap mode:
integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress

Without patch:
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s

With patch:
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
Hello Everyone,
So here is v2, now checking if we overwrite a whole metadata buffer instead
of the (buggy) check if we overwrite a whole tag area before.
Performance stayed the same (with --buffer-sectors=1).

The only quirk now is that it advertises a very big optimal io size in the
standard configuration (where buffer_sectors=128). Is this a Problem?

v2:
 -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
 -fix optimal io size to check if we overwrite a whole metadata buffer

Regards,
Lukas Straub

 drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 34 deletions(-)

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index b225b3e445fa..a6d3cf1406df 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
 		if (unlikely(r))
 			return r;

-		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
-		if (IS_ERR(data))
-			return PTR_ERR(data);
+		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
+		if (op == TAG_WRITE && *metadata_offset == 0 \
+			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
+			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
+			if (IS_ERR(data))
+				return PTR_ERR(data);
+		} else {
+			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
+			if (IS_ERR(data))
+				return PTR_ERR(data);
+		}

 		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
 		dp = data + *metadata_offset;
@@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
 {
 	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
 	struct dm_integrity_c *ic = dio->ic;
+	unsigned sectors_to_process = dio->range.n_sectors;
+	sector_t sector = dio->range.logical_sector;

 	int r;

@@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
 		struct bio_vec bv;
 		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
 		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
-		char *checksums;
+		char *checksums, *checksums_ptr;
 		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
 		char checksums_onstack[HASH_MAX_DIGESTSIZE];
-		unsigned sectors_to_process = dio->range.n_sectors;
-		sector_t sector = dio->range.logical_sector;

 		if (unlikely(ic->mode == 'R'))
 			goto skip_io;

-		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
+		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
 				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
 		if (!checksums) {
 			checksums = checksums_onstack;
@@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
 			}
 		}

+		checksums_ptr = checksums;
 		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
 			unsigned pos;
-			char *mem, *checksums_ptr;
-
-again:
+			char *mem;
 			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
 			pos = 0;
-			checksums_ptr = checksums;
 			do {
 				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
-				checksums_ptr += ic->tag_size;
-				sectors_to_process -= ic->sectors_per_block;
+
+				if (likely(checksums != checksums_onstack)) {
+					checksums_ptr += ic->tag_size;
+				} else {
+					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
+								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
+					if (unlikely(r))
+						goto internal_hash_error;
+				}
+
 				pos += ic->sectors_per_block << SECTOR_SHIFT;
 				sector += ic->sectors_per_block;
-			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
+				sectors_to_process -= ic->sectors_per_block;
+			} while (pos < bv.bv_len && sectors_to_process);
 			kunmap_atomic(mem);

-			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
-						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
-			if (unlikely(r)) {
-				if (r > 0) {
-					DMERR_LIMIT("Checksum failed at sector 0x%llx",
-						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
-					r = -EILSEQ;
-					atomic64_inc(&ic->number_of_mismatches);
-				}
-				if (likely(checksums != checksums_onstack))
-					kfree(checksums);
-				goto error;
-			}
-
 			if (!sectors_to_process)
 				break;
+		}

-			if (unlikely(pos < bv.bv_len)) {
-				bv.bv_offset += pos;
-				bv.bv_len -= pos;
-				goto again;
+		if (likely(checksums != checksums_onstack)) {
+			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
+						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
+						!dio->write ? TAG_CMP : TAG_WRITE);
+			if (unlikely(r)) {
+				kfree(checksums);
+				goto internal_hash_error;
 			}
+			kfree(checksums);
 		}

-		if (likely(checksums != checksums_onstack))
-			kfree(checksums);
 	} else {
 		struct bio_integrity_payload *bip = dio->orig_bi_integrity;

@@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
 skip_io:
 	dec_in_flight(dio);
 	return;
+internal_hash_error:
+	if (r > 0) {
+		DMERR_LIMIT("Checksum failed at sector 0x%llx",
+				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
+		r = -EILSEQ;
+		atomic64_inc(&ic->number_of_mismatches);
+	}
 error:
 	dio->bi_status = errno_to_blk_status(r);
 	dec_in_flight(dio);
@@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
 		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
 		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
 	}
+
+	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
 }

 static void calculate_journal_section_size(struct dm_integrity_c *ic)
--
2.20.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] dm-integrity: Prevent RMW for full metadata buffer writes
  2020-02-27 13:26 [PATCH v2] dm-integrity: Prevent RMW for full metadata buffer writes Lukas Straub
@ 2020-03-24 17:18 ` Mike Snitzer
  2020-03-24 18:59   ` Lukas Straub
  2020-03-30 16:34 ` [dm-devel] " Mikulas Patocka
  1 sibling, 1 reply; 8+ messages in thread
From: Mike Snitzer @ 2020-03-24 17:18 UTC (permalink / raw)
  To: Lukas Straub; +Cc: linux-kernel, Alasdair Kergon, dm-devel, Mikulas Patocka

On Thu, Feb 27 2020 at  8:26am -0500,
Lukas Straub <lukasstraub2@web.de> wrote:

> If a full metadata buffer is being written, don't read it first. This
> prevents a read-modify-write cycle and increases performance on HDDs
> considerably.
> 
> To do this we now calculate the checksums for all sectors in the bio in one
> go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> which now checks if we overwrite the whole buffer.
> 
> Benchmarking with a 5400RPM HDD with bitmap mode:
> integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> 
> Without patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> 
> With patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> ---
> Hello Everyone,
> So here is v2, now checking if we overwrite a whole metadata buffer instead
> of the (buggy) check if we overwrite a whole tag area before.
> Performance stayed the same (with --buffer-sectors=1).
> 
> The only quirk now is that it advertises a very big optimal io size in the
> standard configuration (where buffer_sectors=128). Is this a Problem?
> 
> v2:
>  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
>  -fix optimal io size to check if we overwrite a whole metadata buffer
> 
> Regards,
> Lukas Straub


Not sure why you didn't cc Mikulas but I just checked with him and he
thinks:

You're only seeing a boost because you're using 512-byte sector and
512-byte buffer. Patch helps that case but hurts in most other cases
(due to small buffers).

Mike


>  drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
>  1 file changed, 47 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> index b225b3e445fa..a6d3cf1406df 100644
> --- a/drivers/md/dm-integrity.c
> +++ b/drivers/md/dm-integrity.c
> @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
>  		if (unlikely(r))
>  			return r;
> 
> -		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> -		if (IS_ERR(data))
> -			return PTR_ERR(data);
> +		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
> +		if (op == TAG_WRITE && *metadata_offset == 0 \
> +			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> +			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> +			if (IS_ERR(data))
> +				return PTR_ERR(data);
> +		} else {
> +			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> +			if (IS_ERR(data))
> +				return PTR_ERR(data);
> +		}
> 
>  		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
>  		dp = data + *metadata_offset;
> @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
>  {
>  	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
>  	struct dm_integrity_c *ic = dio->ic;
> +	unsigned sectors_to_process = dio->range.n_sectors;
> +	sector_t sector = dio->range.logical_sector;
> 
>  	int r;
> 
> @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
>  		struct bio_vec bv;
>  		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
>  		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> -		char *checksums;
> +		char *checksums, *checksums_ptr;
>  		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
>  		char checksums_onstack[HASH_MAX_DIGESTSIZE];
> -		unsigned sectors_to_process = dio->range.n_sectors;
> -		sector_t sector = dio->range.logical_sector;
> 
>  		if (unlikely(ic->mode == 'R'))
>  			goto skip_io;
> 
> -		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> +		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
>  				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
>  		if (!checksums) {
>  			checksums = checksums_onstack;
> @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
>  			}
>  		}
> 
> +		checksums_ptr = checksums;
>  		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
>  			unsigned pos;
> -			char *mem, *checksums_ptr;
> -
> -again:
> +			char *mem;
>  			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
>  			pos = 0;
> -			checksums_ptr = checksums;
>  			do {
>  				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> -				checksums_ptr += ic->tag_size;
> -				sectors_to_process -= ic->sectors_per_block;
> +
> +				if (likely(checksums != checksums_onstack)) {
> +					checksums_ptr += ic->tag_size;
> +				} else {
> +					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> +								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> +					if (unlikely(r))
> +						goto internal_hash_error;
> +				}
> +
>  				pos += ic->sectors_per_block << SECTOR_SHIFT;
>  				sector += ic->sectors_per_block;
> -			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> +				sectors_to_process -= ic->sectors_per_block;
> +			} while (pos < bv.bv_len && sectors_to_process);
>  			kunmap_atomic(mem);
> 
> -			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> -						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> -			if (unlikely(r)) {
> -				if (r > 0) {
> -					DMERR_LIMIT("Checksum failed at sector 0x%llx",
> -						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> -					r = -EILSEQ;
> -					atomic64_inc(&ic->number_of_mismatches);
> -				}
> -				if (likely(checksums != checksums_onstack))
> -					kfree(checksums);
> -				goto error;
> -			}
> -
>  			if (!sectors_to_process)
>  				break;
> +		}
> 
> -			if (unlikely(pos < bv.bv_len)) {
> -				bv.bv_offset += pos;
> -				bv.bv_len -= pos;
> -				goto again;
> +		if (likely(checksums != checksums_onstack)) {
> +			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> +						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> +						!dio->write ? TAG_CMP : TAG_WRITE);
> +			if (unlikely(r)) {
> +				kfree(checksums);
> +				goto internal_hash_error;
>  			}
> +			kfree(checksums);
>  		}
> 
> -		if (likely(checksums != checksums_onstack))
> -			kfree(checksums);
>  	} else {
>  		struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> 
> @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
>  skip_io:
>  	dec_in_flight(dio);
>  	return;
> +internal_hash_error:
> +	if (r > 0) {
> +		DMERR_LIMIT("Checksum failed at sector 0x%llx",
> +				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> +		r = -EILSEQ;
> +		atomic64_inc(&ic->number_of_mismatches);
> +	}
>  error:
>  	dio->bi_status = errno_to_blk_status(r);
>  	dec_in_flight(dio);
> @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
>  		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
>  		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
>  	}
> +
> +	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
>  }
> 
>  static void calculate_journal_section_size(struct dm_integrity_c *ic)
> --
> 2.20.1
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] dm-integrity: Prevent RMW for full metadata buffer writes
  2020-03-24 17:18 ` Mike Snitzer
@ 2020-03-24 18:59   ` Lukas Straub
  2020-03-24 19:11     ` Mike Snitzer
  0 siblings, 1 reply; 8+ messages in thread
From: Lukas Straub @ 2020-03-24 18:59 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: linux-kernel, Alasdair Kergon, dm-devel, Mikulas Patocka

[-- Attachment #1: Type: text/plain, Size: 8674 bytes --]

On Tue, 24 Mar 2020 13:18:22 -0400
Mike Snitzer <snitzer@redhat.com> wrote:

> On Thu, Feb 27 2020 at  8:26am -0500,
> Lukas Straub <lukasstraub2@web.de> wrote:
> 
> > If a full metadata buffer is being written, don't read it first. This
> > prevents a read-modify-write cycle and increases performance on HDDs
> > considerably.
> > 
> > To do this we now calculate the checksums for all sectors in the bio in one
> > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > which now checks if we overwrite the whole buffer.
> > 
> > Benchmarking with a 5400RPM HDD with bitmap mode:
> > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > 
> > Without patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> > 
> > With patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> > 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > ---
> > Hello Everyone,
> > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > of the (buggy) check if we overwrite a whole tag area before.
> > Performance stayed the same (with --buffer-sectors=1).
> > 
> > The only quirk now is that it advertises a very big optimal io size in the
> > standard configuration (where buffer_sectors=128). Is this a Problem?
> > 
> > v2:
> >  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> >  -fix optimal io size to check if we overwrite a whole metadata buffer
> > 
> > Regards,
> > Lukas Straub  
> 
> 
> Not sure why you didn't cc Mikulas but I just checked with him and he
> thinks:
> 
> You're only seeing a boost because you're using 512-byte sector and
> 512-byte buffer. Patch helps that case but hurts in most other cases
> (due to small buffers).

Hmm, buffer-sectors is still user configurable. If the user wants fast
write performance on slow HDDs he can set a small buffer-sector and
benefit from this patch. With the default buffer-sectors=128 it
shouldn't have any impact.

Regards,
Lukas Straub


> Mike
> 
> 
> >  drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
> >  1 file changed, 47 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index b225b3e445fa..a6d3cf1406df 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
> >  		if (unlikely(r))
> >  			return r;
> > 
> > -		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > -		if (IS_ERR(data))
> > -			return PTR_ERR(data);
> > +		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
> > +		if (op == TAG_WRITE && *metadata_offset == 0 \
> > +			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> > +			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		} else {
> > +			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		}
> > 
> >  		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
> >  		dp = data + *metadata_offset;
> > @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
> >  {
> >  	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
> >  	struct dm_integrity_c *ic = dio->ic;
> > +	unsigned sectors_to_process = dio->range.n_sectors;
> > +	sector_t sector = dio->range.logical_sector;
> > 
> >  	int r;
> > 
> > @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
> >  		struct bio_vec bv;
> >  		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
> >  		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> > -		char *checksums;
> > +		char *checksums, *checksums_ptr;
> >  		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
> >  		char checksums_onstack[HASH_MAX_DIGESTSIZE];
> > -		unsigned sectors_to_process = dio->range.n_sectors;
> > -		sector_t sector = dio->range.logical_sector;
> > 
> >  		if (unlikely(ic->mode == 'R'))
> >  			goto skip_io;
> > 
> > -		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > +		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> >  				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> >  		if (!checksums) {
> >  			checksums = checksums_onstack;
> > @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
> >  			}
> >  		}
> > 
> > +		checksums_ptr = checksums;
> >  		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> >  			unsigned pos;
> > -			char *mem, *checksums_ptr;
> > -
> > -again:
> > +			char *mem;
> >  			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> >  			pos = 0;
> > -			checksums_ptr = checksums;
> >  			do {
> >  				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> > -				checksums_ptr += ic->tag_size;
> > -				sectors_to_process -= ic->sectors_per_block;
> > +
> > +				if (likely(checksums != checksums_onstack)) {
> > +					checksums_ptr += ic->tag_size;
> > +				} else {
> > +					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> > +					if (unlikely(r))
> > +						goto internal_hash_error;
> > +				}
> > +
> >  				pos += ic->sectors_per_block << SECTOR_SHIFT;
> >  				sector += ic->sectors_per_block;
> > -			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> > +				sectors_to_process -= ic->sectors_per_block;
> > +			} while (pos < bv.bv_len && sectors_to_process);
> >  			kunmap_atomic(mem);
> > 
> > -			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > -						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> > -			if (unlikely(r)) {
> > -				if (r > 0) {
> > -					DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > -						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > -					r = -EILSEQ;
> > -					atomic64_inc(&ic->number_of_mismatches);
> > -				}
> > -				if (likely(checksums != checksums_onstack))
> > -					kfree(checksums);
> > -				goto error;
> > -			}
> > -
> >  			if (!sectors_to_process)
> >  				break;
> > +		}
> > 
> > -			if (unlikely(pos < bv.bv_len)) {
> > -				bv.bv_offset += pos;
> > -				bv.bv_len -= pos;
> > -				goto again;
> > +		if (likely(checksums != checksums_onstack)) {
> > +			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> > +						!dio->write ? TAG_CMP : TAG_WRITE);
> > +			if (unlikely(r)) {
> > +				kfree(checksums);
> > +				goto internal_hash_error;
> >  			}
> > +			kfree(checksums);
> >  		}
> > 
> > -		if (likely(checksums != checksums_onstack))
> > -			kfree(checksums);
> >  	} else {
> >  		struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> > 
> > @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
> >  skip_io:
> >  	dec_in_flight(dio);
> >  	return;
> > +internal_hash_error:
> > +	if (r > 0) {
> > +		DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > +				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > +		r = -EILSEQ;
> > +		atomic64_inc(&ic->number_of_mismatches);
> > +	}
> >  error:
> >  	dio->bi_status = errno_to_blk_status(r);
> >  	dec_in_flight(dio);
> > @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
> >  		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
> >  		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
> >  	}
> > +
> > +	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
> >  }
> > 
> >  static void calculate_journal_section_size(struct dm_integrity_c *ic)
> > --
> > 2.20.1
> >   
> 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] dm-integrity: Prevent RMW for full metadata buffer writes
  2020-03-24 18:59   ` Lukas Straub
@ 2020-03-24 19:11     ` Mike Snitzer
  2020-03-26 11:24       ` Lukas Straub
  0 siblings, 1 reply; 8+ messages in thread
From: Mike Snitzer @ 2020-03-24 19:11 UTC (permalink / raw)
  To: Lukas Straub; +Cc: linux-kernel, Alasdair Kergon, dm-devel, Mikulas Patocka

On Tue, Mar 24 2020 at  2:59pm -0400,
Lukas Straub <lukasstraub2@web.de> wrote:

> On Tue, 24 Mar 2020 13:18:22 -0400
> Mike Snitzer <snitzer@redhat.com> wrote:
> 
> > On Thu, Feb 27 2020 at  8:26am -0500,
> > Lukas Straub <lukasstraub2@web.de> wrote:
> > 
> > > If a full metadata buffer is being written, don't read it first. This
> > > prevents a read-modify-write cycle and increases performance on HDDs
> > > considerably.
> > > 
> > > To do this we now calculate the checksums for all sectors in the bio in one
> > > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > > which now checks if we overwrite the whole buffer.
> > > 
> > > Benchmarking with a 5400RPM HDD with bitmap mode:
> > > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > > 
> > > Without patch:
> > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> > > 
> > > With patch:
> > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> > > 
> > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > ---
> > > Hello Everyone,
> > > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > > of the (buggy) check if we overwrite a whole tag area before.
> > > Performance stayed the same (with --buffer-sectors=1).
> > > 
> > > The only quirk now is that it advertises a very big optimal io size in the
> > > standard configuration (where buffer_sectors=128). Is this a Problem?
> > > 
> > > v2:
> > >  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> > >  -fix optimal io size to check if we overwrite a whole metadata buffer
> > > 
> > > Regards,
> > > Lukas Straub  
> > 
> > 
> > Not sure why you didn't cc Mikulas but I just checked with him and he
> > thinks:
> > 
> > You're only seeing a boost because you're using 512-byte sector and
> > 512-byte buffer. Patch helps that case but hurts in most other cases
> > (due to small buffers).
> 
> Hmm, buffer-sectors is still user configurable. If the user wants fast
> write performance on slow HDDs he can set a small buffer-sector and
> benefit from this patch. With the default buffer-sectors=128 it
> shouldn't have any impact.

OK, if you'd be willing to conduct tests to prove there is no impact
with larger buffers that'd certainly help reinforce your position and
make it easier for me to take your patch.

But if you're just testing against slow HDD testbeds then the test
coverage isn't wide enough.

Thanks,
Mike


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] dm-integrity: Prevent RMW for full metadata buffer writes
  2020-03-24 19:11     ` Mike Snitzer
@ 2020-03-26 11:24       ` Lukas Straub
  0 siblings, 0 replies; 8+ messages in thread
From: Lukas Straub @ 2020-03-26 11:24 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: linux-kernel, Alasdair Kergon, dm-devel, Mikulas Patocka

[-- Attachment #1: Type: text/plain, Size: 6489 bytes --]

On Tue, 24 Mar 2020 15:11:49 -0400
Mike Snitzer <snitzer@redhat.com> wrote:

> On Tue, Mar 24 2020 at  2:59pm -0400,
> Lukas Straub <lukasstraub2@web.de> wrote:
> 
> > On Tue, 24 Mar 2020 13:18:22 -0400
> > Mike Snitzer <snitzer@redhat.com> wrote:
> >   
> > > On Thu, Feb 27 2020 at  8:26am -0500,
> > > Lukas Straub <lukasstraub2@web.de> wrote:
> > >   
> > > > If a full metadata buffer is being written, don't read it first. This
> > > > prevents a read-modify-write cycle and increases performance on HDDs
> > > > considerably.
> > > > 
> > > > To do this we now calculate the checksums for all sectors in the bio in one
> > > > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > > > which now checks if we overwrite the whole buffer.
> > > > 
> > > > Benchmarking with a 5400RPM HDD with bitmap mode:
> > > > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > > > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > > > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > > > 
> > > > Without patch:
> > > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> > > > 
> > > > With patch:
> > > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> > > > 
> > > > Signed-off-by: Lukas Straub <lukasstraub2@web.de>
> > > > ---
> > > > Hello Everyone,
> > > > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > > > of the (buggy) check if we overwrite a whole tag area before.
> > > > Performance stayed the same (with --buffer-sectors=1).
> > > > 
> > > > The only quirk now is that it advertises a very big optimal io size in the
> > > > standard configuration (where buffer_sectors=128). Is this a Problem?
> > > > 
> > > > v2:
> > > >  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> > > >  -fix optimal io size to check if we overwrite a whole metadata buffer
> > > > 
> > > > Regards,
> > > > Lukas Straub    
> > > 
> > > 
> > > Not sure why you didn't cc Mikulas but I just checked with him and he
> > > thinks:
> > > 
> > > You're only seeing a boost because you're using 512-byte sector and
> > > 512-byte buffer. Patch helps that case but hurts in most other cases
> > > (due to small buffers).  
> > 
> > Hmm, buffer-sectors is still user configurable. If the user wants fast
> > write performance on slow HDDs he can set a small buffer-sector and
> > benefit from this patch. With the default buffer-sectors=128 it
> > shouldn't have any impact.  
> 
> OK, if you'd be willing to conduct tests to prove there is no impact
> with larger buffers that'd certainly help reinforce your position and
> make it easier for me to take your patch.
> 
> But if you're just testing against slow HDD testbeds then the test
> coverage isn't wide enough.
> 
> Thanks,
> Mike
> 

Sure,
This time testing with an Samsung 850 EVO SSD:

without patch:

root@teststore:/home/lukas# integritysetup format --no-wipe --batch-mode -I crc32c -B /dev/sdc1
root@teststore:/home/lukas# integritysetup open -I crc32c -B /dev/sdc1 ssda_integ
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
4177264640 bytes (4.2 GB, 3.9 GiB) copied, 28 s, 149 MB/s
65536+0 records in
65536+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 28.8946 s, 149 MB/s
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=4M count=$((256*4)) conv=fsync oflag=direct status=progress
4211081216 bytes (4.2 GB, 3.9 GiB) copied, 22 s, 191 MB/s
1024+0 records in
1024+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 22.6355 s, 190 MB/s
root@teststore:/home/lukas# time mkfs.ext4 /dev/mapper/ssda_integ 4g
mke2fs 1.45.5 (07-Jan-2020)
Creating filesystem with 1048576 4k blocks and 262144 inodes
Filesystem UUID: 679c4808-d549-4576-a8ef-4c46c78c6070
Superblock backups stored on blocks:
        32768, 98304, 163840, 229376, 294912, 819200, 884736

Allocating group tables: done
Writing inode tables: done
Creating journal (16384 blocks): done
Writing superblocks and filesystem accounting information: done


real    0m0.318s
user    0m0.016s
sys     0m0.001s
root@teststore:/home/lukas# mount /dev/mapper/ssda_integ /mnt/
root@teststore:/home/lukas# cd /mnt/
root@teststore:/mnt# time tar -xJf /home/lukas/linux-5.5.4.tar.xz

real    0m18.708s
user    0m14.351s
sys     0m8.585s
root@teststore:/mnt# time rm -rf linux-5.5.4/

real    0m3.226s
user    0m0.117s
sys     0m2.958s


with patch:

root@teststore:/home/lukas# integritysetup format --no-wipe --batch-mode -I crc32c -B /dev/sdc1
root@teststore:/home/lukas# integritysetup open -I crc32c -B /dev/sdc1 ssda_integ
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
4169007104 bytes (4.2 GB, 3.9 GiB) copied, 27 s, 154 MB/s
65536+0 records in
65536+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 27.9165 s, 154 MB/s
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=4M count=$((256*4)) conv=fsync oflag=direct status=progress
4169138176 bytes (4.2 GB, 3.9 GiB) copied, 22 s, 189 MB/s
1024+0 records in
1024+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 22.9181 s, 187 MB/s
root@teststore:/home/lukas# time mkfs.ext4 /dev/mapper/ssda_integ 4g
mke2fs 1.45.5 (07-Jan-2020)
Creating filesystem with 1048576 4k blocks and 262144 inodes
Filesystem UUID: 9a9decad-19e2-46a4-8cc5-ce27238829f2
Superblock backups stored on blocks: 
        32768, 98304, 163840, 229376, 294912, 819200, 884736

Allocating group tables: done                            
Writing inode tables: done                            
Creating journal (16384 blocks): done
Writing superblocks and filesystem accounting information: done 


real    0m0.341s
user    0m0.000s
sys     0m0.016s
root@teststore:/home/lukas# mount /dev/mapper/ssda_integ /mnt/
root@teststore:/home/lukas# cd /mnt/
root@teststore:/mnt# time tar -xJf /home/lukas/linux-5.5.4.tar.xz

real    0m18.409s
user    0m14.191s
sys     0m8.585s
root@teststore:/mnt# time rm -rf linux-5.5.4/

real    0m3.200s
user    0m0.133s
sys     0m2.979s


Regards,
Lukas Straub

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dm-devel] [PATCH v2] dm-integrity: Prevent RMW for full metadata buffer writes
  2020-02-27 13:26 [PATCH v2] dm-integrity: Prevent RMW for full metadata buffer writes Lukas Straub
  2020-03-24 17:18 ` Mike Snitzer
@ 2020-03-30 16:34 ` Mikulas Patocka
  2020-03-31 12:06   ` Lukas Straub
  2020-04-03 11:58   ` Lukas Straub
  1 sibling, 2 replies; 8+ messages in thread
From: Mikulas Patocka @ 2020-03-30 16:34 UTC (permalink / raw)
  To: Lukas Straub; +Cc: linux-kernel, dm-devel, Mike Snitzer, Alasdair Kergon

Hi

I tested it on my rotational disk:


On Thu, 27 Feb 2020, Lukas Straub wrote:

> If a full metadata buffer is being written, don't read it first. This
> prevents a read-modify-write cycle and increases performance on HDDs
> considerably.
> 
> To do this we now calculate the checksums for all sectors in the bio in one
> go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> which now checks if we overwrite the whole buffer.
> 
> Benchmarking with a 5400RPM HDD with bitmap mode:
> integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> 
> Without patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s

I get 42.3 MB/s. (it seems that my disk has better prefetch than yours).

> With patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s

I get 110 MB/s.

> Signed-off-by: Lukas Straub <lukasstraub2@web.de>

BUT: when I removed "--buffer-sectors=1" from the command line, I've got 
these numbers:
without the patch: 101 MB/s
with the patch: 101 MB/s

So, you crippled performance with "--buffer-sectors=1" and then made a 
patch to restore it.

The patch only helps 10%.

Mikulas


> ---
> Hello Everyone,
> So here is v2, now checking if we overwrite a whole metadata buffer instead
> of the (buggy) check if we overwrite a whole tag area before.
> Performance stayed the same (with --buffer-sectors=1).
> 
> The only quirk now is that it advertises a very big optimal io size in the
> standard configuration (where buffer_sectors=128). Is this a Problem?
> 
> v2:
>  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
>  -fix optimal io size to check if we overwrite a whole metadata buffer
> 
> Regards,
> Lukas Straub
> 
>  drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
>  1 file changed, 47 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> index b225b3e445fa..a6d3cf1406df 100644
> --- a/drivers/md/dm-integrity.c
> +++ b/drivers/md/dm-integrity.c
> @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
>  		if (unlikely(r))
>  			return r;
> 
> -		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> -		if (IS_ERR(data))
> -			return PTR_ERR(data);
> +		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
> +		if (op == TAG_WRITE && *metadata_offset == 0 \
> +			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> +			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> +			if (IS_ERR(data))
> +				return PTR_ERR(data);
> +		} else {
> +			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> +			if (IS_ERR(data))
> +				return PTR_ERR(data);
> +		}
> 
>  		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
>  		dp = data + *metadata_offset;
> @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
>  {
>  	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
>  	struct dm_integrity_c *ic = dio->ic;
> +	unsigned sectors_to_process = dio->range.n_sectors;
> +	sector_t sector = dio->range.logical_sector;
> 
>  	int r;
> 
> @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
>  		struct bio_vec bv;
>  		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
>  		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> -		char *checksums;
> +		char *checksums, *checksums_ptr;
>  		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
>  		char checksums_onstack[HASH_MAX_DIGESTSIZE];
> -		unsigned sectors_to_process = dio->range.n_sectors;
> -		sector_t sector = dio->range.logical_sector;
> 
>  		if (unlikely(ic->mode == 'R'))
>  			goto skip_io;
> 
> -		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> +		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
>  				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
>  		if (!checksums) {
>  			checksums = checksums_onstack;
> @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
>  			}
>  		}
> 
> +		checksums_ptr = checksums;
>  		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
>  			unsigned pos;
> -			char *mem, *checksums_ptr;
> -
> -again:
> +			char *mem;
>  			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
>  			pos = 0;
> -			checksums_ptr = checksums;
>  			do {
>  				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> -				checksums_ptr += ic->tag_size;
> -				sectors_to_process -= ic->sectors_per_block;
> +
> +				if (likely(checksums != checksums_onstack)) {
> +					checksums_ptr += ic->tag_size;
> +				} else {
> +					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> +								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> +					if (unlikely(r))
> +						goto internal_hash_error;
> +				}
> +
>  				pos += ic->sectors_per_block << SECTOR_SHIFT;
>  				sector += ic->sectors_per_block;
> -			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> +				sectors_to_process -= ic->sectors_per_block;
> +			} while (pos < bv.bv_len && sectors_to_process);
>  			kunmap_atomic(mem);
> 
> -			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> -						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> -			if (unlikely(r)) {
> -				if (r > 0) {
> -					DMERR_LIMIT("Checksum failed at sector 0x%llx",
> -						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> -					r = -EILSEQ;
> -					atomic64_inc(&ic->number_of_mismatches);
> -				}
> -				if (likely(checksums != checksums_onstack))
> -					kfree(checksums);
> -				goto error;
> -			}
> -
>  			if (!sectors_to_process)
>  				break;
> +		}
> 
> -			if (unlikely(pos < bv.bv_len)) {
> -				bv.bv_offset += pos;
> -				bv.bv_len -= pos;
> -				goto again;
> +		if (likely(checksums != checksums_onstack)) {
> +			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> +						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> +						!dio->write ? TAG_CMP : TAG_WRITE);
> +			if (unlikely(r)) {
> +				kfree(checksums);
> +				goto internal_hash_error;
>  			}
> +			kfree(checksums);
>  		}
> 
> -		if (likely(checksums != checksums_onstack))
> -			kfree(checksums);
>  	} else {
>  		struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> 
> @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
>  skip_io:
>  	dec_in_flight(dio);
>  	return;
> +internal_hash_error:
> +	if (r > 0) {
> +		DMERR_LIMIT("Checksum failed at sector 0x%llx",
> +				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> +		r = -EILSEQ;
> +		atomic64_inc(&ic->number_of_mismatches);
> +	}
>  error:
>  	dio->bi_status = errno_to_blk_status(r);
>  	dec_in_flight(dio);
> @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
>  		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
>  		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
>  	}
> +
> +	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
>  }
> 
>  static void calculate_journal_section_size(struct dm_integrity_c *ic)
> --
> 2.20.1
> 
> 
> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dm-devel] [PATCH v2] dm-integrity: Prevent RMW for full metadata buffer writes
  2020-03-30 16:34 ` [dm-devel] " Mikulas Patocka
@ 2020-03-31 12:06   ` Lukas Straub
  2020-04-03 11:58   ` Lukas Straub
  1 sibling, 0 replies; 8+ messages in thread
From: Lukas Straub @ 2020-03-31 12:06 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: linux-kernel, dm-devel, Mike Snitzer, Alasdair Kergon

[-- Attachment #1: Type: text/plain, Size: 8839 bytes --]

On Mon, 30 Mar 2020 12:34:04 -0400 (EDT)
Mikulas Patocka <mpatocka@redhat.com> wrote:

> Hi
> 
> I tested it on my rotational disk:
> 
> 
> On Thu, 27 Feb 2020, Lukas Straub wrote:
> 
> > If a full metadata buffer is being written, don't read it first. This
> > prevents a read-modify-write cycle and increases performance on HDDs
> > considerably.
> > 
> > To do this we now calculate the checksums for all sectors in the bio in one
> > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > which now checks if we overwrite the whole buffer.
> > 
> > Benchmarking with a 5400RPM HDD with bitmap mode:
> > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > 
> > Without patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s  
> 
> I get 42.3 MB/s. (it seems that my disk has better prefetch than yours).
> 
> > With patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s  
> 
> I get 110 MB/s.
> 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>  
> 
> BUT: when I removed "--buffer-sectors=1" from the command line, I've got 
> these numbers:
> without the patch: 101 MB/s

I get 86.5 MB/s.

So in my case it's worth it and as you saw it doesn't (negatively) affect other cases.

Regards,
Lukas Straub

> with the patch: 101 MB/s
> 
> So, you crippled performance with "--buffer-sectors=1" and then made a 
> patch to restore it.
> 
> The patch only helps 10%.
>
> Mikulas
> 
> 
> > ---
> > Hello Everyone,
> > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > of the (buggy) check if we overwrite a whole tag area before.
> > Performance stayed the same (with --buffer-sectors=1).
> > 
> > The only quirk now is that it advertises a very big optimal io size in the
> > standard configuration (where buffer_sectors=128). Is this a Problem?
> > 
> > v2:
> >  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> >  -fix optimal io size to check if we overwrite a whole metadata buffer
> > 
> > Regards,
> > Lukas Straub
> > 
> >  drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
> >  1 file changed, 47 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index b225b3e445fa..a6d3cf1406df 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
> >  		if (unlikely(r))
> >  			return r;
> > 
> > -		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > -		if (IS_ERR(data))
> > -			return PTR_ERR(data);
> > +		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
> > +		if (op == TAG_WRITE && *metadata_offset == 0 \
> > +			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> > +			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		} else {
> > +			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		}
> > 
> >  		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
> >  		dp = data + *metadata_offset;
> > @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
> >  {
> >  	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
> >  	struct dm_integrity_c *ic = dio->ic;
> > +	unsigned sectors_to_process = dio->range.n_sectors;
> > +	sector_t sector = dio->range.logical_sector;
> > 
> >  	int r;
> > 
> > @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
> >  		struct bio_vec bv;
> >  		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
> >  		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> > -		char *checksums;
> > +		char *checksums, *checksums_ptr;
> >  		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
> >  		char checksums_onstack[HASH_MAX_DIGESTSIZE];
> > -		unsigned sectors_to_process = dio->range.n_sectors;
> > -		sector_t sector = dio->range.logical_sector;
> > 
> >  		if (unlikely(ic->mode == 'R'))
> >  			goto skip_io;
> > 
> > -		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > +		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> >  				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> >  		if (!checksums) {
> >  			checksums = checksums_onstack;
> > @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
> >  			}
> >  		}
> > 
> > +		checksums_ptr = checksums;
> >  		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> >  			unsigned pos;
> > -			char *mem, *checksums_ptr;
> > -
> > -again:
> > +			char *mem;
> >  			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> >  			pos = 0;
> > -			checksums_ptr = checksums;
> >  			do {
> >  				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> > -				checksums_ptr += ic->tag_size;
> > -				sectors_to_process -= ic->sectors_per_block;
> > +
> > +				if (likely(checksums != checksums_onstack)) {
> > +					checksums_ptr += ic->tag_size;
> > +				} else {
> > +					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> > +					if (unlikely(r))
> > +						goto internal_hash_error;
> > +				}
> > +
> >  				pos += ic->sectors_per_block << SECTOR_SHIFT;
> >  				sector += ic->sectors_per_block;
> > -			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> > +				sectors_to_process -= ic->sectors_per_block;
> > +			} while (pos < bv.bv_len && sectors_to_process);
> >  			kunmap_atomic(mem);
> > 
> > -			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > -						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> > -			if (unlikely(r)) {
> > -				if (r > 0) {
> > -					DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > -						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > -					r = -EILSEQ;
> > -					atomic64_inc(&ic->number_of_mismatches);
> > -				}
> > -				if (likely(checksums != checksums_onstack))
> > -					kfree(checksums);
> > -				goto error;
> > -			}
> > -
> >  			if (!sectors_to_process)
> >  				break;
> > +		}
> > 
> > -			if (unlikely(pos < bv.bv_len)) {
> > -				bv.bv_offset += pos;
> > -				bv.bv_len -= pos;
> > -				goto again;
> > +		if (likely(checksums != checksums_onstack)) {
> > +			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> > +						!dio->write ? TAG_CMP : TAG_WRITE);
> > +			if (unlikely(r)) {
> > +				kfree(checksums);
> > +				goto internal_hash_error;
> >  			}
> > +			kfree(checksums);
> >  		}
> > 
> > -		if (likely(checksums != checksums_onstack))
> > -			kfree(checksums);
> >  	} else {
> >  		struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> > 
> > @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
> >  skip_io:
> >  	dec_in_flight(dio);
> >  	return;
> > +internal_hash_error:
> > +	if (r > 0) {
> > +		DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > +				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > +		r = -EILSEQ;
> > +		atomic64_inc(&ic->number_of_mismatches);
> > +	}
> >  error:
> >  	dio->bi_status = errno_to_blk_status(r);
> >  	dec_in_flight(dio);
> > @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
> >  		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
> >  		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
> >  	}
> > +
> > +	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
> >  }
> > 
> >  static void calculate_journal_section_size(struct dm_integrity_c *ic)
> > --
> > 2.20.1
> > 
> > 
> > --
> > dm-devel mailing list
> > dm-devel@redhat.com
> > https://www.redhat.com/mailman/listinfo/dm-devel
> >   
> 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [dm-devel] [PATCH v2] dm-integrity: Prevent RMW for full metadata buffer writes
  2020-03-30 16:34 ` [dm-devel] " Mikulas Patocka
  2020-03-31 12:06   ` Lukas Straub
@ 2020-04-03 11:58   ` Lukas Straub
  1 sibling, 0 replies; 8+ messages in thread
From: Lukas Straub @ 2020-04-03 11:58 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: linux-kernel, dm-devel, Mike Snitzer, Alasdair Kergon

[-- Attachment #1: Type: text/plain, Size: 8839 bytes --]

On Mon, 30 Mar 2020 12:34:04 -0400 (EDT)
Mikulas Patocka <mpatocka@redhat.com> wrote:

> Hi
> 
> I tested it on my rotational disk:
> 
> 
> On Thu, 27 Feb 2020, Lukas Straub wrote:
> 
> > If a full metadata buffer is being written, don't read it first. This
> > prevents a read-modify-write cycle and increases performance on HDDs
> > considerably.
> > 
> > To do this we now calculate the checksums for all sectors in the bio in one
> > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > which now checks if we overwrite the whole buffer.
> > 
> > Benchmarking with a 5400RPM HDD with bitmap mode:
> > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > 
> > Without patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s  
> 
> I get 42.3 MB/s. (it seems that my disk has better prefetch than yours).
> 
> > With patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s  
> 
> I get 110 MB/s.
> 
> > Signed-off-by: Lukas Straub <lukasstraub2@web.de>  
> 
> BUT: when I removed "--buffer-sectors=1" from the command line, I've got 
> these numbers:
> without the patch: 101 MB/s

I get 86.5 MB/s.

So in my case it's worth it and as you saw it doesn't (negatively) affect other cases.

Regards,
Lukas Straub

> with the patch: 101 MB/s
> 
> So, you crippled performance with "--buffer-sectors=1" and then made a 
> patch to restore it.
> 
> The patch only helps 10%.
>
> Mikulas
> 
> 
> > ---
> > Hello Everyone,
> > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > of the (buggy) check if we overwrite a whole tag area before.
> > Performance stayed the same (with --buffer-sectors=1).
> > 
> > The only quirk now is that it advertises a very big optimal io size in the
> > standard configuration (where buffer_sectors=128). Is this a Problem?
> > 
> > v2:
> >  -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> >  -fix optimal io size to check if we overwrite a whole metadata buffer
> > 
> > Regards,
> > Lukas Straub
> > 
> >  drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
> >  1 file changed, 47 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index b225b3e445fa..a6d3cf1406df 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
> >  		if (unlikely(r))
> >  			return r;
> > 
> > -		data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > -		if (IS_ERR(data))
> > -			return PTR_ERR(data);
> > +		/* Don't read metadata sectors from disk if we're going to overwrite them completely */
> > +		if (op == TAG_WRITE && *metadata_offset == 0 \
> > +			&& total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> > +			data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		} else {
> > +			data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > +			if (IS_ERR(data))
> > +				return PTR_ERR(data);
> > +		}
> > 
> >  		to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
> >  		dp = data + *metadata_offset;
> > @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
> >  {
> >  	struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
> >  	struct dm_integrity_c *ic = dio->ic;
> > +	unsigned sectors_to_process = dio->range.n_sectors;
> > +	sector_t sector = dio->range.logical_sector;
> > 
> >  	int r;
> > 
> > @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
> >  		struct bio_vec bv;
> >  		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
> >  		struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> > -		char *checksums;
> > +		char *checksums, *checksums_ptr;
> >  		unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
> >  		char checksums_onstack[HASH_MAX_DIGESTSIZE];
> > -		unsigned sectors_to_process = dio->range.n_sectors;
> > -		sector_t sector = dio->range.logical_sector;
> > 
> >  		if (unlikely(ic->mode == 'R'))
> >  			goto skip_io;
> > 
> > -		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > +		checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> >  				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> >  		if (!checksums) {
> >  			checksums = checksums_onstack;
> > @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
> >  			}
> >  		}
> > 
> > +		checksums_ptr = checksums;
> >  		__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> >  			unsigned pos;
> > -			char *mem, *checksums_ptr;
> > -
> > -again:
> > +			char *mem;
> >  			mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> >  			pos = 0;
> > -			checksums_ptr = checksums;
> >  			do {
> >  				integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> > -				checksums_ptr += ic->tag_size;
> > -				sectors_to_process -= ic->sectors_per_block;
> > +
> > +				if (likely(checksums != checksums_onstack)) {
> > +					checksums_ptr += ic->tag_size;
> > +				} else {
> > +					r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +								ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> > +					if (unlikely(r))
> > +						goto internal_hash_error;
> > +				}
> > +
> >  				pos += ic->sectors_per_block << SECTOR_SHIFT;
> >  				sector += ic->sectors_per_block;
> > -			} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> > +				sectors_to_process -= ic->sectors_per_block;
> > +			} while (pos < bv.bv_len && sectors_to_process);
> >  			kunmap_atomic(mem);
> > 
> > -			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > -						checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> > -			if (unlikely(r)) {
> > -				if (r > 0) {
> > -					DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > -						    (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > -					r = -EILSEQ;
> > -					atomic64_inc(&ic->number_of_mismatches);
> > -				}
> > -				if (likely(checksums != checksums_onstack))
> > -					kfree(checksums);
> > -				goto error;
> > -			}
> > -
> >  			if (!sectors_to_process)
> >  				break;
> > +		}
> > 
> > -			if (unlikely(pos < bv.bv_len)) {
> > -				bv.bv_offset += pos;
> > -				bv.bv_len -= pos;
> > -				goto again;
> > +		if (likely(checksums != checksums_onstack)) {
> > +			r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > +						(dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> > +						!dio->write ? TAG_CMP : TAG_WRITE);
> > +			if (unlikely(r)) {
> > +				kfree(checksums);
> > +				goto internal_hash_error;
> >  			}
> > +			kfree(checksums);
> >  		}
> > 
> > -		if (likely(checksums != checksums_onstack))
> > -			kfree(checksums);
> >  	} else {
> >  		struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> > 
> > @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
> >  skip_io:
> >  	dec_in_flight(dio);
> >  	return;
> > +internal_hash_error:
> > +	if (r > 0) {
> > +		DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > +				(unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > +		r = -EILSEQ;
> > +		atomic64_inc(&ic->number_of_mismatches);
> > +	}
> >  error:
> >  	dio->bi_status = errno_to_blk_status(r);
> >  	dec_in_flight(dio);
> > @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
> >  		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
> >  		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
> >  	}
> > +
> > +	blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
> >  }
> > 
> >  static void calculate_journal_section_size(struct dm_integrity_c *ic)
> > --
> > 2.20.1
> > 
> > 
> > --
> > dm-devel mailing list
> > dm-devel@redhat.com
> > https://www.redhat.com/mailman/listinfo/dm-devel
> >   
> 


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-04-03 11:59 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-27 13:26 [PATCH v2] dm-integrity: Prevent RMW for full metadata buffer writes Lukas Straub
2020-03-24 17:18 ` Mike Snitzer
2020-03-24 18:59   ` Lukas Straub
2020-03-24 19:11     ` Mike Snitzer
2020-03-26 11:24       ` Lukas Straub
2020-03-30 16:34 ` [dm-devel] " Mikulas Patocka
2020-03-31 12:06   ` Lukas Straub
2020-04-03 11:58   ` Lukas Straub

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).