* [dm-devel] [PATCH] dm-writecache: avoid kcopyd and use open-coded variant instead
@ 2021-06-08 19:05 Mikulas Patocka
0 siblings, 0 replies; only message in thread
From: Mikulas Patocka @ 2021-06-08 19:05 UTC (permalink / raw)
To: Mike Snitzer, Joe Thornber; +Cc: dm-devel
Hi
This is a patch that removes kcopyd usage from dm-writecache and replaces
it with open-coded implementation.
On my system (SATA300 disk + ssd), it is slower than the explicit kcopyd
support. Test it, if it helps on your system with NVME ssd.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Index: linux-2.6/drivers/md/dm-writecache.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-writecache.c
+++ linux-2.6/drivers/md/dm-writecache.c
@@ -11,7 +11,6 @@
#include <linux/vmalloc.h>
#include <linux/kthread.h>
#include <linux/dm-io.h>
-#include <linux/dm-kcopyd.h>
#include <linux/dax.h>
#include <linux/pfn_t.h>
#include <linux/libnvdimm.h>
@@ -181,6 +180,7 @@ struct dm_writecache {
struct workqueue_struct *writeback_wq;
struct work_struct writeback_work;
struct work_struct flush_work;
+ struct workqueue_struct *copy_wq;
struct dm_io_client *dm_io;
@@ -191,12 +191,11 @@ struct dm_writecache {
struct task_struct *flush_thread;
struct bio_list flush_list;
- struct dm_kcopyd_client *dm_kcopyd;
unsigned long *dirty_bitmap;
unsigned dirty_bitmap_size;
struct bio_set bio_set;
- mempool_t copy_pool;
+ mempool_t page_pool;
};
#define WB_LIST_INLINE 16
@@ -214,13 +213,11 @@ struct copy_struct {
struct list_head endio_entry;
struct dm_writecache *wc;
struct wc_entry *e;
- unsigned n_entries;
- int error;
+ unsigned bytes;
+ struct work_struct copy_work;
+ struct bio bio;
};
-DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(dm_writecache_throttle,
- "A percentage of time allocated for data copying");
-
static void wc_lock(struct dm_writecache *wc)
{
mutex_lock(&wc->lock);
@@ -1530,20 +1527,6 @@ static void writecache_writeback_endio(s
raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
}
-static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr)
-{
- struct copy_struct *c = ptr;
- struct dm_writecache *wc = c->wc;
-
- c->error = likely(!(read_err | write_err)) ? 0 : -EIO;
-
- raw_spin_lock_irq(&wc->endio_list_lock);
- if (unlikely(list_empty(&wc->endio_list)))
- wake_up_process(wc->endio_thread);
- list_add_tail(&c->endio_entry, &wc->endio_list);
- raw_spin_unlock_irq(&wc->endio_list_lock);
-}
-
static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list)
{
unsigned i;
@@ -1585,17 +1568,16 @@ static void __writecache_endio_pmem(stru
static void __writecache_endio_ssd(struct dm_writecache *wc, struct list_head *list)
{
- struct copy_struct *c;
- struct wc_entry *e;
-
do {
- c = list_entry(list->next, struct copy_struct, endio_entry);
- list_del(&c->endio_entry);
+ struct copy_struct *cs;
+ struct wc_entry *e;
+ struct bio *bio;
+ unsigned idx;
- if (unlikely(c->error))
- writecache_error(wc, c->error, "copy error");
+ cs = list_entry(list->next, struct copy_struct, endio_entry);
+ list_del(&cs->endio_entry);
- e = c->e;
+ e = cs->e;
do {
BUG_ON(!e->write_in_progress);
e->write_in_progress = false;
@@ -1606,8 +1588,14 @@ static void __writecache_endio_ssd(struc
BUG_ON(!wc->writeback_size);
wc->writeback_size--;
e++;
- } while (--c->n_entries);
- mempool_free(c, &wc->copy_pool);
+ } while (cs->bytes -= wc->block_size);
+ bio = &cs->bio;
+ for (idx = 0; idx < bio->bi_vcnt; idx++) {
+ unsigned p;
+ for (p = 0; p < bio->bi_io_vec[idx].bv_len >> PAGE_SHIFT; p++)
+ mempool_free(bio->bi_io_vec[idx].bv_page + p, &wc->page_pool);
+ }
+ bio_put(bio);
} while (!list_empty(list));
}
@@ -1752,34 +1740,89 @@ static void __writecache_writeback_pmem(
}
}
+static void writecache_write_endio(struct bio *bio)
+{
+ struct copy_struct *cs = container_of(bio, struct copy_struct, bio);
+ struct dm_writecache *wc = cs->wc;
+ unsigned long flags;
+
+ if (unlikely(bio->bi_status != BLK_STS_OK))
+ writecache_error(wc, blk_status_to_errno(bio->bi_status),
+ "write error %d", bio->bi_status);
+
+ raw_spin_lock_irqsave(&wc->endio_list_lock, flags);
+ if (unlikely(list_empty(&wc->endio_list)))
+ wake_up_process(wc->endio_thread);
+ list_add_tail(&cs->endio_entry, &wc->endio_list);
+ raw_spin_unlock_irqrestore(&wc->endio_list_lock, flags);
+}
+
+static void writecache_copy_write(struct work_struct *copy)
+{
+ struct copy_struct *cs = container_of(copy, struct copy_struct, copy_work);
+ struct bio *bio = &cs->bio;
+ struct dm_writecache *wc = cs->wc;
+ unsigned vcnt;
+ if (unlikely(bio->bi_status != BLK_STS_OK)) {
+ writecache_error(wc, blk_status_to_errno(bio->bi_status),
+ "read error %d", bio->bi_status);
+ bio->bi_status = BLK_STS_OK;
+ writecache_write_endio(bio);
+ return;
+ }
+ vcnt = bio->bi_vcnt;
+ bio_reset(bio);
+ bio->bi_vcnt = vcnt;
+ bio->bi_end_io = writecache_write_endio;
+ bio_set_dev(bio, wc->dev->bdev);
+ bio->bi_iter.bi_sector = read_original_sector(wc, cs->e);
+ bio->bi_iter.bi_size = cs->bytes;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA);
+ submit_bio(bio);
+}
+
+static void writecache_read_endio(struct bio *bio)
+{
+ struct copy_struct *cs = container_of(bio, struct copy_struct, bio);
+ struct dm_writecache *wc = cs->wc;
+ queue_work(wc->copy_wq, &cs->copy_work);
+}
+
static void __writecache_writeback_ssd(struct dm_writecache *wc, struct writeback_list *wbl)
{
struct wc_entry *e, *f;
- struct dm_io_region from, to;
- struct copy_struct *c;
+ struct bio *bio;
+ struct copy_struct *cs;
while (wbl->size) {
- unsigned n_sectors;
+ unsigned n_bytes, n_pages, i;
wbl->size--;
e = container_of(wbl->list.prev, struct wc_entry, lru);
list_del(&e->lru);
- n_sectors = e->wc_list_contiguous << (wc->block_size_bits - SECTOR_SHIFT);
+ n_bytes = e->wc_list_contiguous << wc->block_size_bits;
+ n_pages = (n_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
- from.bdev = wc->ssd_dev->bdev;
- from.sector = cache_sector(wc, e);
- from.count = n_sectors;
- to.bdev = wc->dev->bdev;
- to.sector = read_original_sector(wc, e);
- to.count = n_sectors;
-
- c = mempool_alloc(&wc->copy_pool, GFP_NOIO);
- c->wc = wc;
- c->e = e;
- c->n_entries = e->wc_list_contiguous;
+ bio = bio_alloc_bioset(GFP_NOIO, n_pages, &wc->bio_set);
+ cs = container_of(bio, struct copy_struct, bio);
+ cs->wc = wc;
+ cs->e = e;
+ cs->bytes = n_bytes;
+ INIT_WORK(&cs->copy_work, writecache_copy_write);
+ bio->bi_end_io = writecache_read_endio;
+ bio_set_dev(bio, wc->ssd_dev->bdev);
+ bio->bi_iter.bi_sector = cache_sector(wc, e);
+
+ for (i = 0; i < n_bytes; i += PAGE_SIZE) {
+ struct page *p;
+ unsigned len;
+ p = mempool_alloc(&wc->page_pool, GFP_NOIO);
+ len = min((unsigned)PAGE_SIZE, n_bytes - i);
+ bio_add_page(bio, p, len, 0);
+ }
- while ((n_sectors -= wc->block_size >> SECTOR_SHIFT)) {
+ while ((n_bytes -= wc->block_size)) {
wbl->size--;
f = container_of(wbl->list.prev, struct wc_entry, lru);
BUG_ON(f != e + 1);
@@ -1787,15 +1830,8 @@ static void __writecache_writeback_ssd(s
e = f;
}
- if (unlikely(to.sector + to.count > wc->data_device_sectors)) {
- if (to.sector >= wc->data_device_sectors) {
- writecache_copy_endio(0, 0, c);
- continue;
- }
- from.count = to.count = wc->data_device_sectors - to.sector;
- }
-
- dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c);
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ submit_bio(bio);
__writeback_throttle(wc, wbl);
}
@@ -1811,8 +1847,7 @@ static void writecache_writeback(struct
struct writeback_list wbl;
unsigned long n_walked;
- if (!WC_MODE_PMEM(wc))
- dm_kcopyd_wait_if_busy(wc->dm_kcopyd);
+ //flush_workqueue(wc->copy_wq);
wc_lock(wc);
restart:
@@ -2040,11 +2075,14 @@ static void writecache_dtr(struct dm_tar
bioset_exit(&wc->bio_set);
- mempool_exit(&wc->copy_pool);
+ mempool_exit(&wc->page_pool);
if (wc->writeback_wq)
destroy_workqueue(wc->writeback_wq);
+ if (wc->copy_wq)
+ destroy_workqueue(wc->copy_wq);
+
if (wc->dev)
dm_put_device(ti, wc->dev);
@@ -2060,9 +2098,6 @@ static void writecache_dtr(struct dm_tar
vfree(wc->memory_map);
}
- if (wc->dm_kcopyd)
- dm_kcopyd_client_destroy(wc->dm_kcopyd);
-
if (wc->dm_io)
dm_io_client_destroy(wc->dm_io);
@@ -2168,20 +2203,12 @@ static int writecache_ctr(struct dm_targ
goto bad_arguments;
}
- if (WC_MODE_PMEM(wc)) {
- r = bioset_init(&wc->bio_set, BIO_POOL_SIZE,
- offsetof(struct writeback_struct, bio),
- BIOSET_NEED_BVECS);
- if (r) {
- ti->error = "Could not allocate bio set";
- goto bad;
- }
- } else {
- r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct));
- if (r) {
- ti->error = "Could not allocate mempool";
- goto bad;
- }
+ r = bioset_init(&wc->bio_set, BIO_POOL_SIZE,
+ offsetof(struct writeback_struct, bio),
+ BIOSET_NEED_BVECS);
+ if (r) {
+ ti->error = "Could not allocate bio set";
+ goto bad;
}
/*
@@ -2343,6 +2370,19 @@ invalid_optional:
size_t n_blocks, n_metadata_blocks;
uint64_t n_bitmap_bits;
+ r = mempool_init_page_pool(&wc->page_pool, (wc->block_size * BIO_MAX_VECS + PAGE_SIZE - 1) >> PAGE_SHIFT, 0);
+ if (r) {
+ ti->error = "Unable to allocate mempool";
+ goto bad;
+ }
+
+ wc->copy_wq = alloc_workqueue("writecache-copy", WQ_MEM_RECLAIM, 1);
+ if (!wc->copy_wq) {
+ r = -ENOMEM;
+ ti->error = "Could not allocate copy workqueue";
+ goto bad;
+ }
+
wc->memory_map_size -= (uint64_t)wc->start_sector << SECTOR_SHIFT;
bio_list_init(&wc->flush_list);
@@ -2378,14 +2418,6 @@ invalid_optional:
goto bad;
}
- wc->dm_kcopyd = dm_kcopyd_client_create(&dm_kcopyd_throttle);
- if (IS_ERR(wc->dm_kcopyd)) {
- r = PTR_ERR(wc->dm_kcopyd);
- ti->error = "Unable to allocate dm-kcopyd client";
- wc->dm_kcopyd = NULL;
- goto bad;
- }
-
wc->metadata_sectors = n_metadata_blocks << (wc->block_size_bits - SECTOR_SHIFT);
wc->dirty_bitmap_size = (n_bitmap_bits + BITS_PER_LONG - 1) /
BITS_PER_LONG * sizeof(unsigned long);
--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-06-08 19:05 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-08 19:05 [dm-devel] [PATCH] dm-writecache: avoid kcopyd and use open-coded variant instead Mikulas Patocka
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).