All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] an optimization for dm-bufio and dm-integrity
@ 2017-07-19 15:30 Mikulas Patocka
  2017-07-19 15:39 ` Mike Snitzer
  0 siblings, 1 reply; 6+ messages in thread
From: Mikulas Patocka @ 2017-07-19 15:30 UTC (permalink / raw)
  To: Mike Snitzer, Milan Broz; +Cc: dm-devel

Hi Mike

Some times ago, I sent this patch as dm-bufio optimization (it doesn't 
have to write full buffers, it only writes a part of the buffer that 
changed). You delayed the patch until the next kernel.

Will you submit the patch to the current kernel?

Mikulas


---------- Forwarded message ----------
Date: Sun, 30 Apr 2017 17:31:22 -0400 (EDT)
From: Mikulas Patocka <mpatocka@redhat.com>
To: Mike Snitzer <msnitzer@redhat.com>
Subject: [PATCH] an optimization for dm-bufio and dm-integrity

Hi

This is an optimization for dm-bufio and dm-integrity, so that it can 
write only part of the buffer.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/md/dm-bufio.c     |   95 ++++++++++++++++++++++++++++++++--------------
 drivers/md/dm-bufio.h     |    9 ++++
 drivers/md/dm-integrity.c |    2 
 3 files changed, 77 insertions(+), 29 deletions(-)

Index: linux-2.6/drivers/md/dm-bufio.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-bufio.c
+++ linux-2.6/drivers/md/dm-bufio.c
@@ -64,6 +64,12 @@
 #define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT	(PAGE_SIZE << (MAX_ORDER - 1))
 
 /*
+ * Align buffer writes to this boundary.
+ * Tests show that SSDs have the highest IOPS when using 4k writes.
+ */
+#define DM_BUFIO_WRITE_ALIGN		4096
+
+/*
  * dm_buffer->list_mode
  */
 #define LIST_CLEAN	0
@@ -149,6 +155,10 @@ struct dm_buffer {
 	int write_error;
 	unsigned long state;
 	unsigned long last_accessed;
+	unsigned dirty_start;
+	unsigned dirty_end;
+	unsigned write_start;
+	unsigned write_end;
 	struct dm_bufio_client *c;
 	struct list_head write_list;
 	struct bio bio;
@@ -560,7 +570,7 @@ static void dmio_complete(unsigned long
 }
 
 static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
-		     unsigned n_sectors, bio_end_io_t *end_io)
+		     unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
 {
 	int r;
 	struct dm_io_request io_req = {
@@ -578,10 +588,10 @@ static void use_dmio(struct dm_buffer *b
 
 	if (b->data_mode != DATA_MODE_VMALLOC) {
 		io_req.mem.type = DM_IO_KMEM;
-		io_req.mem.ptr.addr = b->data;
+		io_req.mem.ptr.addr = (char *)b->data + offset;
 	} else {
 		io_req.mem.type = DM_IO_VMA;
-		io_req.mem.ptr.vma = b->data;
+		io_req.mem.ptr.vma = (char *)b->data + offset;
 	}
 
 	b->bio.bi_end_io = end_io;
@@ -609,10 +619,10 @@ static void inline_endio(struct bio *bio
 }
 
 static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
-			   unsigned n_sectors, bio_end_io_t *end_io)
+			   unsigned n_sectors, unsigned offset, bio_end_io_t *end_io)
 {
 	char *ptr;
-	int len;
+	unsigned len;
 
 	bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS);
 	b->bio.bi_iter.bi_sector = sector;
@@ -625,29 +635,20 @@ static void use_inline_bio(struct dm_buf
 	b->bio.bi_private = end_io;
 	bio_set_op_attrs(&b->bio, rw, 0);
 
-	/*
-	 * We assume that if len >= PAGE_SIZE ptr is page-aligned.
-	 * If len < PAGE_SIZE the buffer doesn't cross page boundary.
-	 */
-	ptr = b->data;
+	ptr = (char *)b->data + offset;
 	len = n_sectors << SECTOR_SHIFT;
 
-	if (len >= PAGE_SIZE)
-		BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
-	else
-		BUG_ON((unsigned long)ptr & (len - 1));
-
 	do {
-		if (!bio_add_page(&b->bio, virt_to_page(ptr),
-				  len < PAGE_SIZE ? len : PAGE_SIZE,
+		unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len);
+		if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step,
 				  offset_in_page(ptr))) {
 			BUG_ON(b->c->block_size <= PAGE_SIZE);
-			use_dmio(b, rw, sector, n_sectors, end_io);
+			use_dmio(b, rw, sector, n_sectors, offset, end_io);
 			return;
 		}
 
-		len -= PAGE_SIZE;
-		ptr += PAGE_SIZE;
+		len -= this_step;
+		ptr += this_step;
 	} while (len > 0);
 
 	submit_bio(&b->bio);
@@ -657,18 +658,33 @@ static void submit_io(struct dm_buffer *
 {
 	unsigned n_sectors;
 	sector_t sector;
-
-	if (rw == WRITE && b->c->write_callback)
-		b->c->write_callback(b);
+	unsigned offset, end;
 
 	sector = (b->block << b->c->sectors_per_block_bits) + b->c->start;
-	n_sectors = 1 << b->c->sectors_per_block_bits;
+
+	if (rw != WRITE) {
+		n_sectors = 1 << b->c->sectors_per_block_bits;
+		offset = 0;
+	} else {
+		if (b->c->write_callback)
+			b->c->write_callback(b);
+		offset = b->write_start;
+		end = b->write_end;
+		offset &= -DM_BUFIO_WRITE_ALIGN;
+		end += DM_BUFIO_WRITE_ALIGN - 1;
+		end &= -DM_BUFIO_WRITE_ALIGN;
+		if (unlikely(end > b->c->block_size))
+			end = b->c->block_size;
+
+		sector += offset >> SECTOR_SHIFT;
+		n_sectors = (end - offset) >> SECTOR_SHIFT;
+	}
 
 	if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) &&
 	    b->data_mode != DATA_MODE_VMALLOC)
-		use_inline_bio(b, rw, sector, n_sectors, end_io);
+		use_inline_bio(b, rw, sector, n_sectors, offset, end_io);
 	else
-		use_dmio(b, rw, sector, n_sectors, end_io);
+		use_dmio(b, rw, sector, n_sectors, offset, end_io);
 }
 
 /*----------------------------------------------------------------
@@ -719,6 +735,9 @@ static void __write_dirty_buffer(struct
 	clear_bit(B_DIRTY, &b->state);
 	wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
 
+	b->write_start = b->dirty_start;
+	b->write_end = b->dirty_end;
+
 	if (!write_list)
 		submit_io(b, WRITE, write_endio);
 	else
@@ -1219,19 +1238,37 @@ void dm_bufio_release(struct dm_buffer *
 }
 EXPORT_SYMBOL_GPL(dm_bufio_release);
 
-void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
+void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
+					unsigned start, unsigned end)
 {
 	struct dm_bufio_client *c = b->c;
 
+	BUG_ON(start >= end);
+	BUG_ON(end > b->c->block_size);
+
 	dm_bufio_lock(c);
 
 	BUG_ON(test_bit(B_READING, &b->state));
 
-	if (!test_and_set_bit(B_DIRTY, &b->state))
+	if (!test_and_set_bit(B_DIRTY, &b->state)) {
+		b->dirty_start = start;
+		b->dirty_end = end;
 		__relink_lru(b, LIST_DIRTY);
+	} else {
+		if (start < b->dirty_start)
+			b->dirty_start = start;
+		if (end > b->dirty_end)
+			b->dirty_end = end;
+	}
 
 	dm_bufio_unlock(c);
 }
+EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty);
+
+void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
+{
+	dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size);
+}
 EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty);
 
 void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
@@ -1396,6 +1433,8 @@ retry:
 		wait_on_bit_io(&b->state, B_WRITING,
 			       TASK_UNINTERRUPTIBLE);
 		set_bit(B_DIRTY, &b->state);
+		b->dirty_start = 0;
+		b->dirty_end = c->block_size;
 		__unlink_buffer(b);
 		__link_buffer(b, new_block, LIST_DIRTY);
 	} else {
Index: linux-2.6/drivers/md/dm-bufio.h
===================================================================
--- linux-2.6.orig/drivers/md/dm-bufio.h
+++ linux-2.6/drivers/md/dm-bufio.h
@@ -94,6 +94,15 @@ void dm_bufio_release(struct dm_buffer *
 void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
 
 /*
+ * Mark a part of the buffer dirty.
+ *
+ * The specified part of the buffer is scheduled to be written. dm-bufio may
+ * write the specified part of the buffer or it may write a larger superset.
+ */
+void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b,
+					unsigned start, unsigned end);
+
+/*
  * Initiate writing of dirty buffers, without waiting for completion.
  */
 void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
Index: linux-2.6/drivers/md/dm-integrity.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-integrity.c
+++ linux-2.6/drivers/md/dm-integrity.c
@@ -1039,7 +1039,7 @@ static int dm_integrity_rw_tag(struct dm
 			memcpy(tag, dp, to_copy);
 		} else if (op == TAG_WRITE) {
 			memcpy(dp, tag, to_copy);
-			dm_bufio_mark_buffer_dirty(b);
+			dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
 		} else  {
 			/* e.g.: op == TAG_CMP */
 			if (unlikely(memcmp(dp, tag, to_copy))) {

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: an optimization for dm-bufio and dm-integrity
  2017-07-19 15:30 [PATCH] an optimization for dm-bufio and dm-integrity Mikulas Patocka
@ 2017-07-19 15:39 ` Mike Snitzer
  2017-07-19 16:22   ` Milan Broz
  0 siblings, 1 reply; 6+ messages in thread
From: Mike Snitzer @ 2017-07-19 15:39 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: Mike Snitzer, dm-devel, Milan Broz

On Wed, Jul 19 2017 at 11:30P -0400,
Mikulas Patocka <mpatocka@redhat.com> wrote:

> Hi Mike
> 
> Some times ago, I sent this patch as dm-bufio optimization (it doesn't 
> have to write full buffers, it only writes a part of the buffer that 
> changed). You delayed the patch until the next kernel.
> 
> Will you submit the patch to the current kernel?

Sorry about that.  It obviously fell off my radar.

I'll review it and see if I can make the justification for it going into
4.13-rc2.  But unfortunately the 4.13 merge window closed this past
Sunday when Linus released v4.13-rc1

I'll let you know though, thanks.
Mike

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: an optimization for dm-bufio and dm-integrity
  2017-07-19 15:39 ` Mike Snitzer
@ 2017-07-19 16:22   ` Milan Broz
  2017-07-19 18:00     ` Mikulas Patocka
  2017-07-19 18:56     ` Mike Snitzer
  0 siblings, 2 replies; 6+ messages in thread
From: Milan Broz @ 2017-07-19 16:22 UTC (permalink / raw)
  To: Mike Snitzer, Mikulas Patocka; +Cc: dm-devel

On 07/19/2017 05:39 PM, Mike Snitzer wrote:
> On Wed, Jul 19 2017 at 11:30P -0400,
> Mikulas Patocka <mpatocka@redhat.com> wrote:
> 
>> Hi Mike
>>
>> Some times ago, I sent this patch as dm-bufio optimization (it doesn't 
>> have to write full buffers, it only writes a part of the buffer that 
>> changed). You delayed the patch until the next kernel.
>>
>> Will you submit the patch to the current kernel?
> 
> Sorry about that.  It obviously fell off my radar.
> 
> I'll review it and see if I can make the justification for it going into
> 4.13-rc2.  But unfortunately the 4.13 merge window closed this past
> Sunday when Linus released v4.13-rc1
> 
> I'll let you know though, thanks.

Just FYI: I am just running some tests with the stack space fix and
this patch and the performance improvement for 4k sectors is really big
(compared to 4.12). It would be nice if both patches hits 4.13...

Milan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: an optimization for dm-bufio and dm-integrity
  2017-07-19 16:22   ` Milan Broz
@ 2017-07-19 18:00     ` Mikulas Patocka
  2017-07-19 18:56     ` Mike Snitzer
  1 sibling, 0 replies; 6+ messages in thread
From: Mikulas Patocka @ 2017-07-19 18:00 UTC (permalink / raw)
  To: Milan Broz; +Cc: dm-devel, Mike Snitzer



On Wed, 19 Jul 2017, Milan Broz wrote:

> On 07/19/2017 05:39 PM, Mike Snitzer wrote:
> > On Wed, Jul 19 2017 at 11:30P -0400,
> > Mikulas Patocka <mpatocka@redhat.com> wrote:
> > 
> >> Hi Mike
> >>
> >> Some times ago, I sent this patch as dm-bufio optimization (it doesn't 
> >> have to write full buffers, it only writes a part of the buffer that 
> >> changed). You delayed the patch until the next kernel.
> >>
> >> Will you submit the patch to the current kernel?
> > 
> > Sorry about that.  It obviously fell off my radar.
> > 
> > I'll review it and see if I can make the justification for it going into
> > 4.13-rc2.  But unfortunately the 4.13 merge window closed this past
> > Sunday when Linus released v4.13-rc1
> > 
> > I'll let you know though, thanks.
> 
> Just FYI: I am just running some tests with the stack space fix and
> this patch and the performance improvement for 4k sectors is really big
> (compared to 4.12). It would be nice if both patches hits 4.13...
> 
> Milan

That journal allocation bugfix should definitely go to the stable 4.12 
branch.

The plug fix may or may not be backported, it provides about 60% 
performance improvement on specific write patterns (if you write this 
sequence of bytes with O_DIRECT 
4096-8191,0-4095,12288-16383,8192-12287,20480-24575,16384-20479...).

Mikulas

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: an optimization for dm-bufio and dm-integrity
  2017-07-19 16:22   ` Milan Broz
  2017-07-19 18:00     ` Mikulas Patocka
@ 2017-07-19 18:56     ` Mike Snitzer
  2017-07-20  6:12       ` Milan Broz
  1 sibling, 1 reply; 6+ messages in thread
From: Mike Snitzer @ 2017-07-19 18:56 UTC (permalink / raw)
  To: Milan Broz; +Cc: dm-devel, Mikulas Patocka

On Wed, Jul 19 2017 at 12:22pm -0400,
Milan Broz <mbroz@redhat.com> wrote:

> On 07/19/2017 05:39 PM, Mike Snitzer wrote:
> > On Wed, Jul 19 2017 at 11:30P -0400,
> > Mikulas Patocka <mpatocka@redhat.com> wrote:
> > 
> >> Hi Mike
> >>
> >> Some times ago, I sent this patch as dm-bufio optimization (it doesn't 
> >> have to write full buffers, it only writes a part of the buffer that 
> >> changed). You delayed the patch until the next kernel.
> >>
> >> Will you submit the patch to the current kernel?
> > 
> > Sorry about that.  It obviously fell off my radar.
> > 
> > I'll review it and see if I can make the justification for it going into
> > 4.13-rc2.  But unfortunately the 4.13 merge window closed this past
> > Sunday when Linus released v4.13-rc1
> > 
> > I'll let you know though, thanks.
> 
> Just FYI: I am just running some tests with the stack space fix and
> this patch and the performance improvement for 4k sectors is really big
> (compared to 4.12).

If you could quantify the performance improvement that would be helpful.

Are you also using the plugging patch?

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: an optimization for dm-bufio and dm-integrity
  2017-07-19 18:56     ` Mike Snitzer
@ 2017-07-20  6:12       ` Milan Broz
  0 siblings, 0 replies; 6+ messages in thread
From: Milan Broz @ 2017-07-20  6:12 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, Mikulas Patocka

On 07/19/2017 08:56 PM, Mike Snitzer wrote:
> On Wed, Jul 19 2017 at 12:22pm -0400,
> Milan Broz <mbroz@redhat.com> wrote:
> 
>> On 07/19/2017 05:39 PM, Mike Snitzer wrote:
>>> On Wed, Jul 19 2017 at 11:30P -0400,
>>> Mikulas Patocka <mpatocka@redhat.com> wrote:
>>>
>>>> Hi Mike
>>>>
>>>> Some times ago, I sent this patch as dm-bufio optimization (it doesn't 
>>>> have to write full buffers, it only writes a part of the buffer that 
>>>> changed). You delayed the patch until the next kernel.
>>>>
>>>> Will you submit the patch to the current kernel?
>>>
>>> Sorry about that.  It obviously fell off my radar.
>>>
>>> I'll review it and see if I can make the justification for it going into
>>> 4.13-rc2.  But unfortunately the 4.13 merge window closed this past
>>> Sunday when Linus released v4.13-rc1
>>>
>>> I'll let you know though, thanks.
>>
>> Just FYI: I am just running some tests with the stack space fix and
>> this patch and the performance improvement for 4k sectors is really big
>> (compared to 4.12).
> 
> If you could quantify the performance improvement that would be helpful.

The linear write performance (4k blocks) with data journal and 4k sectors
(well, everyone likes "dd" tests ;-) went from 40MB/s to 130 MB/s on a SSD
(both for AEAD and standalone mode with CRC32).

I think it is mainly thanks to the first patch (that is already marked for stable).
 
> Are you also using the plugging patch?

Not yet, Mikulas sent it later :) Once the whole run is finished, I'll add it to the mix.

Thanks,
Milan



 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-07-20  6:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-19 15:30 [PATCH] an optimization for dm-bufio and dm-integrity Mikulas Patocka
2017-07-19 15:39 ` Mike Snitzer
2017-07-19 16:22   ` Milan Broz
2017-07-19 18:00     ` Mikulas Patocka
2017-07-19 18:56     ` Mike Snitzer
2017-07-20  6:12       ` Milan Broz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.