All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] dm-writecache
@ 2018-03-08 13:25 Mikulas Patocka
  2018-03-08 14:51   ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Mikulas Patocka @ 2018-03-08 13:25 UTC (permalink / raw)
  To: Alasdair G. Kergon, Mike Snitzer; +Cc: dm-devel

Hi

Here I'm submitting the dm-writecache target. You can add it to your git.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 Documentation/device-mapper/writecache.txt |   68 
 drivers/md/Kconfig                         |   11 
 drivers/md/Makefile                        |    1 
 drivers/md/dm-writecache.c                 | 2417 +++++++++++++++++++++++++++++
 4 files changed, 2497 insertions(+)

Index: linux-2.6/drivers/md/Kconfig
===================================================================
--- linux-2.6.orig/drivers/md/Kconfig	2018-03-08 14:23:31.069999000 +0100
+++ linux-2.6/drivers/md/Kconfig	2018-03-08 14:23:31.059999000 +0100
@@ -334,6 +334,17 @@ config DM_CACHE_SMQ
          of less memory utilization, improved performance and increased
          adaptability in the face of changing workloads.
 
+config DM_WRITECACHE
+	tristate "Writecache target"
+	depends on BLK_DEV_DM
+	---help---
+	   The writecache target caches writes on persistent memory or SSD.
+	   It is intended for databases or other programs that need extremely
+	   low commit latency.
+
+	   The writecache target doesn't cache reads because reads are supposed
+	   to be cached in standard RAM.
+
 config DM_ERA
        tristate "Era target (EXPERIMENTAL)"
        depends on BLK_DEV_DM
Index: linux-2.6/drivers/md/Makefile
===================================================================
--- linux-2.6.orig/drivers/md/Makefile	2018-03-08 14:23:31.069999000 +0100
+++ linux-2.6/drivers/md/Makefile	2018-03-08 14:23:31.059999000 +0100
@@ -67,6 +67,7 @@ obj-$(CONFIG_DM_ERA)		+= dm-era.o
 obj-$(CONFIG_DM_LOG_WRITES)	+= dm-log-writes.o
 obj-$(CONFIG_DM_INTEGRITY)	+= dm-integrity.o
 obj-$(CONFIG_DM_ZONED)		+= dm-zoned.o
+obj-$(CONFIG_DM_WRITECACHE)	+= dm-writecache.o
 
 ifeq ($(CONFIG_DM_UEVENT),y)
 dm-mod-objs			+= dm-uevent.o
Index: linux-2.6/drivers/md/dm-writecache.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/drivers/md/dm-writecache.c	2018-03-08 14:23:31.059999000 +0100
@@ -0,0 +1,2417 @@
+#include <linux/device-mapper.h>
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/kthread.h>
+#include <linux/swait.h>
+#include <linux/dm-io.h>
+#include <linux/dm-kcopyd.h>
+#include <linux/dax.h>
+#include <linux/pfn_t.h>
+
+#define DM_MSG_PREFIX	"writecache"
+
+#define WRITEBACK_FUA			true
+#define HIGH_WATERMARK			50
+#define LOW_WATERMARK			45
+#define MAX_WRITEBACK_JOBS		0
+#define ENDIO_LATENCY			16
+#define WRITEBACK_LATENCY		64
+#define AUTOCOMMIT_BLOCKS_SSD		65536
+#define AUTOCOMMIT_BLOCKS_PMEM		64
+#define AUTOCOMMIT_MSEC			1000
+
+/*
+ * If the architecture doesn't support persistent memory, we can use this driver
+ * in SSD-only mode.
+ */
+#ifndef CONFIG_ARCH_HAS_PMEM_API
+#define DM_WRITECACHE_ONLY_SSD
+#endif
+
+//#define WC_MEASURE_LATENCY
+
+#define BITMAP_GRANULARITY	65536
+#if BITMAP_GRANULARITY < PAGE_SIZE
+#undef BITMAP_GRANULARITY
+#define BITMAP_GRANULARITY	PAGE_SIZE
+#endif
+
+#ifndef bio_set_dev
+#define	bio_set_dev(bio, dev)	((bio)->bi_bdev = (dev))
+#endif
+#ifndef timer_setup
+#define timer_setup(t, c, f)	setup_timer(t, c, (unsigned long)(t))
+#endif
+
+/*
+ * On X86, non-temporal stores are more efficient than cache flushing.
+ * On ARM64, cache flushing is more efficient.
+ */
+#if defined(CONFIG_X86_64)
+#define NT_STORE(dest, src)				\
+do {							\
+	typeof(src) val = (src);			\
+	memcpy_flushcache(&(dest), &val, sizeof(src));	\
+} while (0)
+#define COMMIT_FLUSHED()	wmb()
+#else
+#define NT_STORE(dest, src)	WRITE_ONCE(dest, src)
+#define FLUSH_RANGE		dax_flush
+#define COMMIT_FLUSHED()	do { } while (0)
+#endif
+
+#ifndef FLUSH_RANGE
+#define EAGER_DATA_FLUSH
+#endif
+
+#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && !defined(DM_WRITECACHE_ONLY_SSD)
+#define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+#endif
+
+#define MEMORY_SUPERBLOCK_MAGIC		0x23489321
+#define MEMORY_SUPERBLOCK_VERSION	1
+
+struct wc_memory_entry {
+	uint64_t original_sector;
+	uint64_t seq_count;
+};
+
+struct wc_memory_superblock {
+	union {
+		struct {
+			uint32_t magic;
+			uint32_t version;
+			uint32_t block_size;
+			uint32_t pad;
+			uint64_t n_blocks;
+			uint64_t seq_count;
+		};
+		uint64_t padding[8];
+	};
+	struct wc_memory_entry entries[0];
+};
+
+struct wc_entry {
+	struct rb_node rb_node;
+	struct list_head lru;
+	unsigned short wc_list_contiguous;
+	bool write_in_progress
+#if BITS_PER_LONG == 64
+		:1
+#endif
+	;
+	unsigned long index
+#if BITS_PER_LONG == 64
+		:47
+#endif
+	;
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	uint64_t original_sector;
+	uint64_t seq_count;
+#endif
+};
+
+#ifndef DM_WRITECACHE_ONLY_SSD
+#define WC_MODE_PMEM(wc)			((wc)->pmem_mode)
+#define WC_MODE_FUA(wc)				((wc)->writeback_fua)
+#else
+#define WC_MODE_PMEM(wc)			false
+#define WC_MODE_FUA(wc)				false
+#endif
+#define WC_MODE_SORT_FREELIST(wc)		(!WC_MODE_PMEM(wc))
+
+struct dm_writecache {
+#ifndef DM_WRITECACHE_ONLY_SSD
+	bool pmem_mode;
+	bool writeback_fua;
+#endif
+	struct mutex lock;
+	struct rb_root tree;
+	struct list_head lru;
+	union {
+		struct list_head freelist;
+		struct {
+			struct rb_root freetree;
+			struct wc_entry *current_free;
+		};
+	};
+	size_t freelist_size;
+	size_t writeback_size;
+	unsigned uncommitted_blocks;
+	unsigned autocommit_blocks;
+	unsigned max_writeback_jobs;
+	size_t freelist_high_watermark;
+	size_t freelist_low_watermark;
+	struct timer_list autocommit_timer;
+	unsigned long autocommit_jiffies;
+	struct swait_queue_head freelist_wait;
+
+	struct dm_target *ti;
+	struct dm_dev *dev;
+	struct dm_dev *ssd_dev;
+	void *memory_map;
+	uint64_t memory_map_size;
+	size_t metadata_sectors;
+	void *block_start;
+	struct wc_entry *entries;
+	unsigned block_size;
+	unsigned char block_size_bits;
+	size_t n_blocks;
+	uint64_t seq_count;
+	int error;
+
+	bool overwrote_committed;
+	bool memory_vmapped;
+
+	atomic_t bio_in_progress[2];
+	struct swait_queue_head bio_in_progress_wait[2];
+
+	struct dm_io_client *dm_io;
+
+	unsigned writeback_all;
+	struct workqueue_struct *writeback_wq;
+	struct work_struct writeback_work;
+	struct work_struct flush_work;
+
+	struct swait_queue_head endio_thread_wait;
+	struct list_head endio_list;
+	struct task_struct *endio_thread;
+
+	struct task_struct *flush_thread;
+	struct bio *flush_bio;
+	struct completion flush_completion;
+
+	struct bio_set *bio_set;
+	mempool_t *copy_pool;
+
+	struct dm_kcopyd_client *dm_kcopyd;
+	unsigned long *dirty_bitmap;
+	unsigned dirty_bitmap_size;
+
+	bool high_wm_percent_set;
+	bool low_wm_percent_set;
+	bool max_writeback_jobs_set;
+	bool autocommit_blocks_set;
+	bool autocommit_time_set;
+	bool writeback_fua_set;
+	bool flush_on_suspend;
+
+#ifdef WC_MEASURE_LATENCY
+	ktime_t lock_acquired_time;
+	ktime_t max_lock_held;
+	ktime_t max_lock_wait;
+	ktime_t max_freelist_wait;
+	ktime_t measure_latency_time;
+	ktime_t max_measure_latency;
+#endif
+};
+
+#define WB_LIST_INLINE		16
+
+struct writeback_struct {
+	struct list_head endio_entry;
+	struct dm_writecache *wc;
+	struct wc_entry **wc_list;
+	unsigned wc_list_n;
+	unsigned page_offset;
+	struct page *page;
+	struct wc_entry *wc_list_inline[WB_LIST_INLINE];
+	struct bio bio;
+};
+
+struct copy_struct {
+	struct list_head endio_entry;
+	struct dm_writecache *wc;
+	struct wc_entry *e;
+	unsigned n_entries;
+	int error;
+};
+
+DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(dm_writecache_throttle,
+					    "A percentage of time allocated for data copying");
+
+static inline void measure_latency_start(struct dm_writecache *wc)
+{
+#ifdef WC_MEASURE_LATENCY
+	wc->measure_latency_time = ktime_get();
+#endif
+}
+
+static inline void measure_latency_end(struct dm_writecache *wc, unsigned long n)
+{
+#ifdef WC_MEASURE_LATENCY
+	ktime_t now = ktime_get();
+	if (now - wc->measure_latency_time > wc->max_measure_latency) {
+		wc->max_measure_latency = now - wc->measure_latency_time;
+		printk(KERN_DEBUG "dm-writecache: measured latency %lld.%03lldus, %lu steps\n", wc->max_measure_latency / 1000, wc->max_measure_latency % 1000, n);
+	}
+#endif
+}
+
+static void __wc_lock(struct dm_writecache *wc, int line)
+{
+#ifdef WC_MEASURE_LATENCY
+	ktime_t before, after;
+	before = ktime_get();
+#endif
+	mutex_lock(&wc->lock);
+#ifdef WC_MEASURE_LATENCY
+	after = ktime_get();
+	if (unlikely(after - before > wc->max_lock_wait)) {
+		wc->max_lock_wait = after - before;
+		printk(KERN_DEBUG "dm-writecache: waiting for lock for %lld.%03lldus at %d\n", wc->max_lock_wait / 1000, wc->max_lock_wait % 1000, line);
+		after = ktime_get();
+	}
+	wc->lock_acquired_time = after;
+#endif
+}
+#define wc_lock(wc)	__wc_lock(wc, __LINE__)
+
+static void __wc_unlock(struct dm_writecache *wc, int line)
+{
+#ifdef WC_MEASURE_LATENCY
+	ktime_t now = ktime_get();
+	if (now - wc->lock_acquired_time > wc->max_lock_held) {
+		wc->max_lock_held = now - wc->lock_acquired_time;
+		printk(KERN_DEBUG "dm-writecache: lock held for %lld.%03lldus at %d\n", wc->max_lock_held / 1000, wc->max_lock_held % 1000, line);
+	}
+#endif
+	mutex_unlock(&wc->lock);
+}
+#define wc_unlock(wc)	__wc_unlock(wc, __LINE__)
+
+#define wc_unlock_long(wc)	mutex_unlock(&wc->lock)
+
+static int persistent_memory_claim(struct dm_writecache *wc)
+{
+	int r;
+	loff_t s;
+	long p, da;
+	pfn_t pfn;
+	int id;
+	struct page **pages;
+
+	wc->memory_vmapped = false;
+
+	if (!wc->ssd_dev->dax_dev) {
+		r = -EOPNOTSUPP;
+		goto err1;
+	}
+	s = wc->memory_map_size;
+	p = s >> PAGE_SHIFT;
+	if (!p) {
+		r = -EINVAL;
+		goto err1;
+	}
+	if (p != s >> PAGE_SHIFT) {
+		r = -EOVERFLOW;
+		goto err1;
+	}
+
+	id = dax_read_lock();
+
+	da = dax_direct_access(wc->ssd_dev->dax_dev, 0, p, &wc->memory_map, &pfn);
+	if (da < 0) {
+		wc->memory_map = NULL;
+		r = da;
+		goto err2;
+	}
+	if (!pfn_t_has_page(pfn)) {
+		wc->memory_map = NULL;
+		r = -EOPNOTSUPP;
+		goto err2;
+	}
+#ifdef WC_MEASURE_LATENCY
+	printk(KERN_DEBUG "dm-writecache: device %s, pfn %016llx\n", wc->ssd_dev->name, pfn.val);
+#endif
+	if (da != p) {
+		long i;
+		wc->memory_map = NULL;
+		pages = kvmalloc(p * sizeof(struct page *), GFP_KERNEL);
+		if (!pages) {
+			r = -ENOMEM;
+			goto err2;
+		}
+		i = 0;
+		do {
+			long daa;
+			void *dummy_addr;
+			daa = dax_direct_access(wc->ssd_dev->dax_dev, i, p - i, &dummy_addr, &pfn);
+			if (daa <= 0) {
+				r = daa ? daa : -EINVAL;
+				goto err3;
+			}
+			if (!pfn_t_has_page(pfn)) {
+				r = -EOPNOTSUPP;
+				goto err3;
+			}
+			while (daa-- && i < p) {
+				pages[i++] = pfn_t_to_page(pfn);
+				pfn.val++;
+			}
+		} while (i < p);
+		wc->memory_map = vmap(pages, p, VM_MAP, PAGE_KERNEL);
+		if (!wc->memory_map) {
+			r = -ENOMEM;
+			goto err3;
+		}
+		kvfree(pages);
+		wc->memory_vmapped = true;
+	}
+
+	dax_read_unlock(id);
+
+	return 0;
+
+err3:
+	kvfree(pages);
+err2:
+	dax_read_unlock(id);
+err1:
+	return r;
+}
+
+static void persistent_memory_release(struct dm_writecache *wc)
+{
+	if (wc->memory_vmapped)
+		vunmap(wc->memory_map);
+}
+
+static struct page *persistent_memory_page(void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		return vmalloc_to_page(addr);
+	else
+		return virt_to_page(addr);
+}
+
+static unsigned persistent_memory_page_offset(void *addr)
+{
+	return (unsigned long)addr & (PAGE_SIZE - 1);
+}
+
+static void persistent_memory_flush_cache(void *ptr, size_t size)
+{
+	if (is_vmalloc_addr(ptr))
+		flush_kernel_vmap_range(ptr, size);
+}
+
+static void persistent_memory_invalidate_cache(void *ptr, size_t size)
+{
+	if (is_vmalloc_addr(ptr))
+		invalidate_kernel_vmap_range(ptr, size);
+}
+
+static void persistent_memory_flush(struct dm_writecache *wc, void *ptr, size_t size)
+{
+#ifdef FLUSH_RANGE
+	FLUSH_RANGE(wc->ssd_dev->dax_dev, ptr, size);
+#endif
+}
+
+static void persistent_memory_commit_flushed(void)
+{
+	COMMIT_FLUSHED();
+}
+
+static struct wc_memory_superblock *sb(struct dm_writecache *wc)
+{
+	return wc->memory_map;
+}
+
+static struct wc_memory_entry *memory_entry(struct dm_writecache *wc, struct wc_entry *e)
+{
+	if (is_power_of_2(sizeof(struct wc_entry)) && 0)
+		return &sb(wc)->entries[e - wc->entries];
+	else
+		return &sb(wc)->entries[e->index];
+}
+
+static void *memory_data(struct dm_writecache *wc, struct wc_entry *e)
+{
+	return (char *)wc->block_start + (e->index << wc->block_size_bits);
+}
+
+static sector_t cache_sector(struct dm_writecache *wc, struct wc_entry *e)
+{
+	return wc->metadata_sectors +
+		((sector_t)e->index << (wc->block_size_bits - SECTOR_SHIFT));
+}
+
+static uint64_t read_original_sector(struct dm_writecache *wc, struct wc_entry *e)
+{
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	return e->original_sector;
+#else
+	return le64_to_cpu(memory_entry(wc, e)->original_sector);
+#endif
+}
+
+static uint64_t read_seq_count(struct dm_writecache *wc, struct wc_entry *e)
+{
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	return e->seq_count;
+#else
+	return le64_to_cpu(memory_entry(wc, e)->seq_count);
+#endif
+}
+
+static void clear_seq_count(struct dm_writecache *wc, struct wc_entry *e)
+{
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	e->seq_count = -1;
+#endif
+	NT_STORE(memory_entry(wc, e)->seq_count, cpu_to_le64(-1));
+}
+
+static void write_original_sector_seq_count(struct dm_writecache *wc, struct wc_entry *e,
+					    uint64_t original_sector, uint64_t seq_count)
+{
+	struct wc_memory_entry *me_p, me;
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	e->original_sector = original_sector;
+	e->seq_count = seq_count;
+#endif
+	me_p = memory_entry(wc, e);
+	me.original_sector = cpu_to_le64(original_sector);
+	me.seq_count = cpu_to_le64(seq_count);
+	NT_STORE(*me_p, me);
+}
+
+#define writecache_error(wc, err, msg, arg...)				\
+do {									\
+	if (!cmpxchg(&(wc)->error, 0, err))				\
+		DMERR(msg, ##arg);					\
+	swake_up(&(wc)->freelist_wait);					\
+} while (0)
+
+#define writecache_has_error(wc)	(unlikely(READ_ONCE((wc)->error)))
+
+static void writecache_flush_all_metadata(struct dm_writecache *wc)
+{
+	if (WC_MODE_PMEM(wc)) {
+		persistent_memory_flush(wc,
+			sb(wc), offsetof(struct wc_memory_superblock, entries[wc->n_blocks]));
+	} else {
+		memset(wc->dirty_bitmap, -1, wc->dirty_bitmap_size);
+	}
+}
+
+static void writecache_flush_region(struct dm_writecache *wc, void *ptr, size_t size)
+{
+	if (WC_MODE_PMEM(wc))
+		persistent_memory_flush(wc, ptr, size);
+	else
+		__set_bit(((char *)ptr - (char *)wc->memory_map) / BITMAP_GRANULARITY,
+			  wc->dirty_bitmap);
+}
+
+static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev);
+
+struct io_notify {
+	struct dm_writecache *wc;
+	struct completion c;
+	atomic_t count;
+};
+
+void writecache_notify_io(unsigned long error, void *context)
+{
+	struct io_notify *endio = context;
+
+	if (unlikely(error != 0))
+		writecache_error(endio->wc, -EIO, "error writing metadata");
+	BUG_ON(atomic_read(&endio->count) <= 0);
+	if (atomic_dec_and_test(&endio->count))
+		complete(&endio->c);
+}
+
+static void ssd_commit_flushed(struct dm_writecache *wc)
+{
+	int r;
+	struct dm_io_region region;
+	struct dm_io_request req;
+	struct io_notify endio = {
+		wc,
+		COMPLETION_INITIALIZER_ONSTACK(endio.c),
+		ATOMIC_INIT(1),
+	};
+	unsigned bitmap_bits = wc->dirty_bitmap_size * BITS_PER_LONG;
+	unsigned i = 0;
+
+	while (1) {
+		unsigned j;
+		i = find_next_bit(wc->dirty_bitmap, bitmap_bits, i);
+		if (unlikely(i == bitmap_bits))
+			break;
+		j = find_next_zero_bit(wc->dirty_bitmap, bitmap_bits, i);
+
+		region.bdev = wc->ssd_dev->bdev;
+		region.sector = (sector_t)i * (BITMAP_GRANULARITY >> SECTOR_SHIFT);
+		region.count = (sector_t)(j - i) * (BITMAP_GRANULARITY >> SECTOR_SHIFT);
+
+		if (unlikely(region.sector >= wc->metadata_sectors))
+			break;
+		if (unlikely(region.sector + region.count > wc->metadata_sectors))
+			region.count = wc->metadata_sectors - region.sector;
+
+		atomic_inc(&endio.count);
+		req.bi_op = REQ_OP_WRITE;
+		req.bi_op_flags = REQ_SYNC;
+		req.mem.type = DM_IO_VMA;
+		req.mem.ptr.vma = (char *)wc->memory_map + (size_t)i * BITMAP_GRANULARITY;
+		req.client = wc->dm_io;
+		req.notify.fn = writecache_notify_io;
+		req.notify.context = &endio;
+
+		r = dm_io(&req, 1, &region, NULL);
+		if (unlikely(r)) {
+			/*
+			 * Async dm-io (implied by notify.fn above) won't return an error, but
+			 * if that changes in the future we must catch it: so panic in defense.
+			 */
+			panic(DM_NAME ": " DM_MSG_PREFIX ": dm io error %d", r);
+		}
+		i = j;
+	}
+
+	writecache_notify_io(0, &endio);
+	wait_for_completion_io(&endio.c);
+
+	writecache_disk_flush(wc, wc->ssd_dev);
+
+	memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size);
+}
+
+static void writecache_commit_flushed(struct dm_writecache *wc)
+{
+	if (WC_MODE_PMEM(wc))
+		persistent_memory_commit_flushed();
+	else
+		ssd_commit_flushed(wc);
+}
+
+static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev)
+{
+	int r;
+	struct dm_io_region region;
+	struct dm_io_request req;
+
+	region.bdev = dev->bdev;
+	region.sector = 0;
+	region.count = 0;
+	req.bi_op = REQ_OP_WRITE;
+	req.bi_op_flags = REQ_PREFLUSH;
+	req.mem.type = DM_IO_KMEM;
+	req.mem.ptr.addr = NULL;
+	req.client = wc->dm_io;
+	req.notify.fn = NULL;
+
+	r = dm_io(&req, 1, &region, NULL);
+	if (unlikely(r))
+		writecache_error(wc, r, "error flushing metadata: %d", r);
+}
+
+static void writecache_wait_for_ios(struct dm_writecache *wc, int direction)
+{
+	swait_event(wc->bio_in_progress_wait[direction],
+		   !atomic_read(&wc->bio_in_progress[direction]));
+}
+
+#define WFE_RETURN_FOLLOWING	1
+#define WFE_LOWEST_SEQ		2
+
+static struct wc_entry *writecache_find_entry(struct dm_writecache *wc, uint64_t block, int flags)
+{
+	struct wc_entry *e;
+	struct rb_node *node = wc->tree.rb_node;
+
+	if (unlikely(!node))
+		return NULL;
+
+	while (1) {
+		e = container_of(node, struct wc_entry, rb_node);
+		if (read_original_sector(wc, e) == block)
+			break;
+		node = (read_original_sector(wc, e) >= block ?
+			e->rb_node.rb_left : e->rb_node.rb_right);
+		if (unlikely(!node)) {
+			if (!(flags & WFE_RETURN_FOLLOWING)) {
+				return NULL;
+			}
+			if (read_original_sector(wc, e) >= block) {
+				break;
+			} else {
+				node = rb_next(&e->rb_node);
+				if (unlikely(!node)) {
+					return NULL;
+				}
+				e = container_of(node, struct wc_entry, rb_node);
+				break;
+			}
+		}
+	}
+
+	while (1) {
+		struct wc_entry *e2;
+		if (flags & WFE_LOWEST_SEQ)
+			node = rb_prev(&e->rb_node);
+		else
+			node = rb_next(&e->rb_node);
+		if (!node)
+			return e;
+		e2 = container_of(node, struct wc_entry, rb_node);
+		if (read_original_sector(wc, e2) != block)
+			return e;
+		e = e2;
+	}
+}
+
+static void writecache_insert_entry(struct dm_writecache *wc, struct wc_entry *ins)
+{
+	struct wc_entry *e;
+	struct rb_node **node = &wc->tree.rb_node, *parent = NULL;
+
+	while (*node) {
+		e = container_of(*node, struct wc_entry, rb_node);
+		parent = &e->rb_node;
+		node = read_original_sector(wc, e) > read_original_sector(wc, ins) ?
+			&parent->rb_left : &parent->rb_right;
+	}
+	rb_link_node(&ins->rb_node, parent, node);
+	rb_insert_color(&ins->rb_node, &wc->tree);
+	list_add(&ins->lru, &wc->lru);
+}
+
+static void writecache_unlink(struct dm_writecache *wc, struct wc_entry *e)
+{
+	list_del(&e->lru);
+	rb_erase(&e->rb_node, &wc->tree);
+}
+
+static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry *e)
+{
+	if (WC_MODE_SORT_FREELIST(wc)) {
+		struct rb_node **node = &wc->freetree.rb_node, *parent = NULL;
+		if (unlikely(!*node))
+			wc->current_free = e;
+		while (*node) {
+			parent = *node;
+			node = &e->rb_node < *node ? &parent->rb_left : &parent->rb_right;
+		}
+		rb_link_node(&e->rb_node, parent, node);
+		rb_insert_color(&e->rb_node, &wc->freetree);
+	} else {
+		list_add_tail(&e->lru, &wc->freelist);
+	}
+	wc->freelist_size++;
+}
+
+struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc)
+{
+	struct wc_entry *e;
+
+	if (WC_MODE_SORT_FREELIST(wc)) {
+		struct rb_node *next;
+		if (unlikely(!wc->current_free))
+			return NULL;
+		e = wc->current_free;
+		next = rb_next(&e->rb_node);
+		rb_erase(&e->rb_node, &wc->freetree);
+		if (unlikely(!next))
+			next = rb_first(&wc->freetree);
+		wc->current_free = next ? container_of(next, struct wc_entry, rb_node) : NULL;
+	} else {
+		if (unlikely(list_empty(&wc->freelist)))
+			return NULL;
+		e = container_of(wc->freelist.next, struct wc_entry, lru);
+		list_del(&e->lru);
+	}
+	wc->freelist_size--;
+	if (unlikely(wc->freelist_size <= wc->freelist_high_watermark))
+		queue_work(wc->writeback_wq, &wc->writeback_work);
+
+	return e;
+}
+
+static void writecache_free_entry(struct dm_writecache *wc, struct wc_entry *e)
+{
+	writecache_unlink(wc, e);
+	writecache_add_to_freelist(wc, e);
+	clear_seq_count(wc, e);
+	writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry));
+	if (unlikely(swait_active(&wc->freelist_wait)))
+		swake_up(&wc->freelist_wait);
+}
+
+static void __writecache_wait_on_freelist(struct dm_writecache *wc, bool measure, int line)
+{
+	DECLARE_SWAITQUEUE(wait);
+#ifdef WC_MEASURE_LATENCY
+	ktime_t before, after;
+#endif
+
+	prepare_to_swait(&wc->freelist_wait, &wait, TASK_UNINTERRUPTIBLE);
+	wc_unlock(wc);
+#ifdef WC_MEASURE_LATENCY
+	if (measure)
+		before = ktime_get();
+#endif
+	io_schedule();
+	finish_swait(&wc->freelist_wait, &wait);
+#ifdef WC_MEASURE_LATENCY
+	if (measure) {
+		after = ktime_get();
+		if (unlikely(after - before > wc->max_freelist_wait)) {
+			wc->max_freelist_wait = after - before;
+			printk(KERN_DEBUG "dm-writecache: waiting on freelist for %lld.%03lldus at %d\n", wc->max_freelist_wait / 1000, wc->max_freelist_wait % 1000, line);
+		}
+	}
+#endif
+	wc_lock(wc);
+}
+#define writecache_wait_on_freelist(wc)		__writecache_wait_on_freelist(wc, true, __LINE__)
+#define writecache_wait_on_freelist_long(wc)	__writecache_wait_on_freelist(wc, false, __LINE__)
+
+static void writecache_poison_lists(struct dm_writecache *wc)
+{
+	/*
+	 * Catch incorrect access to these values while the device is suspended.
+	 */
+	memset(&wc->tree, -1, sizeof wc->tree);
+	wc->lru.next = LIST_POISON1;
+	wc->lru.prev = LIST_POISON2;
+	wc->freelist.next = LIST_POISON1;
+	wc->freelist.prev = LIST_POISON2;
+}
+
+static void writecache_flush_entry(struct dm_writecache *wc, struct wc_entry *e)
+{
+	writecache_flush_region(wc, memory_entry(wc, e), sizeof(struct wc_memory_entry));
+#ifndef EAGER_DATA_FLUSH
+	if (WC_MODE_PMEM(wc))
+		writecache_flush_region(wc, memory_data(wc, e), wc->block_size);
+#endif
+}
+
+static bool writecache_entry_is_committed(struct dm_writecache *wc, struct wc_entry *e)
+{
+	return read_seq_count(wc, e) < wc->seq_count;
+}
+
+static void writecache_flush(struct dm_writecache *wc)
+{
+	struct wc_entry *e, *e2;
+	bool need_flush_after_free;
+
+	wc->uncommitted_blocks = 0;
+	del_timer(&wc->autocommit_timer);
+
+	if (list_empty(&wc->lru))
+		return;
+
+	e = container_of(wc->lru.next, struct wc_entry, lru);
+	if (writecache_entry_is_committed(wc, e)) {
+		if (wc->overwrote_committed) {
+			writecache_wait_for_ios(wc, WRITE);
+			writecache_disk_flush(wc, wc->ssd_dev);
+			wc->overwrote_committed = false;
+		}
+		return;
+	}
+	while (1) {
+		writecache_flush_entry(wc, e);
+		if (unlikely(e->lru.next == &wc->lru))
+			break;
+		e2 = container_of(e->lru.next, struct wc_entry, lru);
+		if (writecache_entry_is_committed(wc, e2))
+			break;
+		e = e2;
+		cond_resched();
+	}
+	writecache_commit_flushed(wc);
+
+	writecache_wait_for_ios(wc, WRITE);
+
+	wc->seq_count++;
+	NT_STORE(sb(wc)->seq_count, cpu_to_le64(wc->seq_count));
+	writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count);
+	writecache_commit_flushed(wc);
+
+	wc->overwrote_committed = false;
+
+	need_flush_after_free = false;
+	while (1) {
+		/* Free another committed entry with lower seq-count */
+		struct rb_node *rb_node = rb_prev(&e->rb_node);
+
+		if (rb_node) {
+			e2 = container_of(rb_node, struct wc_entry, rb_node);
+			if (read_original_sector(wc, e2) == read_original_sector(wc, e) &&
+			    likely(!e2->write_in_progress)) {
+				writecache_free_entry(wc, e2);
+				need_flush_after_free = true;
+			}
+		}
+		if (unlikely(e->lru.prev == &wc->lru))
+			break;
+		e = container_of(e->lru.prev, struct wc_entry, lru);
+		cond_resched();
+	}
+
+	if (need_flush_after_free)
+		writecache_commit_flushed(wc);
+}
+
+static void writecache_flush_work(struct work_struct *work)
+{
+	struct dm_writecache *wc = container_of(work, struct dm_writecache, flush_work);
+	wc_lock(wc);
+	writecache_flush(wc);
+	wc_unlock(wc);
+}
+
+#ifdef setup_timer
+static void writecache_autocommit_timer(unsigned long data)
+{
+	struct dm_writecache *wc = (struct dm_writecache *)data;
+	if (!writecache_has_error(wc))
+		queue_work(wc->writeback_wq, &wc->flush_work);
+}
+#else
+static void writecache_autocommit_timer(struct timer_list *t)
+{
+	struct dm_writecache *wc = from_timer(wc, t, autocommit_timer);
+	if (!writecache_has_error(wc))
+		queue_work(wc->writeback_wq, &wc->flush_work);
+}
+#endif
+
+static void writecache_schedule_autocommit(struct dm_writecache *wc)
+{
+	if (!timer_pending(&wc->autocommit_timer))
+		mod_timer(&wc->autocommit_timer, jiffies + wc->autocommit_jiffies);
+}
+
+static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_t end)
+{
+	struct wc_entry *e;
+	bool discarded_something = false;
+
+	e = writecache_find_entry(wc, start, WFE_RETURN_FOLLOWING | WFE_LOWEST_SEQ);
+	if (unlikely(!e))
+		return;
+
+	while (read_original_sector(wc, e) < end) {
+		struct rb_node *node = rb_next(&e->rb_node);
+
+		if (likely(!e->write_in_progress)) {
+			if (!discarded_something) {
+				writecache_wait_for_ios(wc, READ);
+				writecache_wait_for_ios(wc, WRITE);
+				discarded_something = true;
+			}
+			writecache_free_entry(wc, e);
+		}
+
+		if (!node)
+			break;
+
+		e = container_of(node, struct wc_entry, rb_node);
+	}
+
+	if (discarded_something)
+		writecache_commit_flushed(wc);
+}
+
+static bool writecache_wait_for_writeback(struct dm_writecache *wc)
+{
+	if (wc->writeback_size) {
+		writecache_wait_on_freelist(wc);
+		return true;
+	}
+	return false;
+}
+
+static void writecache_suspend(struct dm_target *ti)
+{
+	struct dm_writecache *wc = ti->private;
+	bool flush_on_suspend;
+
+	del_timer_sync(&wc->autocommit_timer);
+
+	wc_lock(wc);
+	writecache_flush(wc);
+	flush_on_suspend = wc->flush_on_suspend;
+	if (flush_on_suspend) {
+		wc->flush_on_suspend = false;
+		wc->writeback_all++;
+		queue_work(wc->writeback_wq, &wc->writeback_work);
+	}
+	wc_unlock(wc);
+
+	flush_workqueue(wc->writeback_wq);
+
+	wc_lock(wc);
+	if (flush_on_suspend) {
+		wc->writeback_all--;
+	}
+	while (writecache_wait_for_writeback(wc));
+
+	if (WC_MODE_PMEM(wc))
+		persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size);
+
+	writecache_poison_lists(wc);
+
+	wc_unlock_long(wc);
+}
+
+static int writecache_alloc_entries(struct dm_writecache *wc)
+{
+	size_t b;
+	if (wc->entries)
+		return 0;
+	wc->entries = vmalloc(sizeof(struct wc_entry) * wc->n_blocks);
+	if (!wc->entries)
+		return -ENOMEM;
+	for (b = 0; b < wc->n_blocks; b++) {
+		struct wc_entry *e = &wc->entries[b];
+		e->index = b;
+		e->write_in_progress = false;
+	}
+	return 0;
+}
+
+static void writecache_resume(struct dm_target *ti)
+{
+	struct dm_writecache *wc = ti->private;
+	size_t b;
+	bool need_flush = false;
+	uint64_t sb_seq_count;
+	int r;
+
+	wc_lock(wc);
+
+	if (WC_MODE_PMEM(wc))
+		persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size);
+
+	wc->tree = RB_ROOT;
+	INIT_LIST_HEAD(&wc->lru);
+	if (WC_MODE_SORT_FREELIST(wc)) {
+		wc->freetree = RB_ROOT;
+		wc->current_free = NULL;
+	} else {
+		INIT_LIST_HEAD(&wc->freelist);
+	}
+	wc->freelist_size = 0;
+
+	r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t));
+	if (r) {
+		writecache_error(wc, r, "hardware memory error when reading superblock: %d", r);
+		sb_seq_count = cpu_to_le64(0);
+	}
+	wc->seq_count = le64_to_cpu(sb_seq_count);
+
+#ifdef DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
+	for (b = 0; b < wc->n_blocks; b++) {
+		struct wc_entry *e = &wc->entries[b];
+		struct wc_memory_entry wme;
+		if (writecache_has_error(wc)) {
+			e->original_sector = -1;
+			e->seq_count = -1;
+			continue;
+		}
+		r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry));
+		if (r) {
+			writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d", (unsigned long)b, r);
+			e->original_sector = -1;
+			e->seq_count = -1;
+		} else {
+			e->original_sector = le64_to_cpu(wme.original_sector);
+			e->seq_count = le64_to_cpu(wme.seq_count);
+		}
+	}
+#endif
+	for (b = 0; b < wc->n_blocks; b++) {
+		struct wc_entry *e = &wc->entries[b];
+		if (!writecache_entry_is_committed(wc, e)) {
+			if (read_seq_count(wc, e) != -1) {
+erase_this:
+				clear_seq_count(wc, e);
+				need_flush = true;
+			}
+			writecache_add_to_freelist(wc, e);
+		} else {
+			struct wc_entry *old;
+
+			old = writecache_find_entry(wc, read_original_sector(wc, e), 0);
+			if (!old) {
+				writecache_insert_entry(wc, e);
+			} else {
+				if (read_seq_count(wc, old) == read_seq_count(wc, e)) {
+					writecache_error(wc, -EINVAL, "two identical entries, position %llu, sector %llu, sequence %llu",
+						 (unsigned long long)b, (unsigned long long)read_original_sector(wc, e),
+						 (unsigned long long)read_seq_count(wc, e));
+				}
+				if (read_seq_count(wc, old) > read_seq_count(wc, e)) {
+					goto erase_this;
+				} else {
+					writecache_free_entry(wc, old);
+					writecache_insert_entry(wc, e);
+					need_flush = true;
+				}
+			}
+		}
+		cond_resched();
+	}
+
+	if (need_flush) {
+		writecache_flush_all_metadata(wc);
+		writecache_commit_flushed(wc);
+	}
+
+	wc_unlock_long(wc);
+}
+
+static int process_flush_mesg(unsigned argc, char **argv, struct dm_writecache *wc)
+{
+	if (argc != 1)
+		return -EINVAL;
+
+	wc_lock(wc);
+	if (dm_suspended(wc->ti)) {
+		wc_unlock(wc);
+		return -EBUSY;
+	}
+	if (writecache_has_error(wc)) {
+		wc_unlock(wc);
+		return -EIO;
+	}
+
+	writecache_flush(wc);
+	wc->writeback_all++;
+	queue_work(wc->writeback_wq, &wc->writeback_work);
+	wc_unlock(wc);
+
+	flush_workqueue(wc->writeback_wq);
+
+	wc_lock(wc);
+	wc->writeback_all--;
+	if (writecache_has_error(wc)) {
+		wc_unlock(wc);
+		return -EIO;
+	}
+	wc_unlock(wc);
+
+	return 0;
+}
+
+static int process_flush_on_suspend_mesg(unsigned argc, char **argv, struct dm_writecache *wc)
+{
+	if (argc != 1)
+		return -EINVAL;
+
+	wc_lock(wc);
+	wc->flush_on_suspend = true;
+	wc_unlock(wc);
+
+	return 0;
+}
+
+static int writecache_message(struct dm_target *ti, unsigned argc, char **argv)
+{
+	int r = -EINVAL;
+	struct dm_writecache *wc = ti->private;
+
+	if (!strcasecmp(argv[0], "flush"))
+		r = process_flush_mesg(argc, argv, wc);
+	else if (!strcasecmp(argv[0], "flush_on_suspend"))
+		r = process_flush_on_suspend_mesg(argc, argv, wc);
+	else
+		DMWARN("unrecognised message received: %s", argv[0]);
+
+	return r;
+}
+
+static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data)
+{
+	void *buf;
+	unsigned long flags;
+	unsigned size;
+	int rw = bio_data_dir(bio);
+	unsigned remaining_size = wc->block_size;
+
+	do {
+		struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter);
+		buf = bvec_kmap_irq(&bv, &flags);
+		size = bv.bv_len;
+		if (unlikely(size > remaining_size))
+			size = remaining_size;
+
+		if (rw == READ) {
+			int r;
+			r = memcpy_mcsafe(buf, data, size);
+			flush_dcache_page(bio_page(bio));
+			if (unlikely(r)) {
+				writecache_error(wc, r, "hardware memory error when reading data: %d", r);
+				bio->bi_status = BLK_STS_IOERR;
+			}
+		} else {
+			flush_dcache_page(bio_page(bio));
+#ifdef EAGER_DATA_FLUSH
+			memcpy_flushcache(data, buf, size);
+#else
+			memcpy(data, buf, size);
+#endif
+		}
+
+		bvec_kunmap_irq(buf, &flags);
+
+		data = (char *)data + size;
+		remaining_size -= size;
+		bio_advance(bio, size);
+	} while (unlikely(remaining_size));
+}
+
+static int writecache_flush_thread(void *data)
+{
+	struct dm_writecache *wc = data;
+
+	while (!kthread_should_stop()) {
+		struct bio *bio;
+
+		bio = wc->flush_bio;
+		if (unlikely(!bio)) {
+		} else if (bio_op(bio) == REQ_OP_DISCARD) {
+			writecache_discard(wc, bio->bi_iter.bi_sector,
+					   bio->bi_iter.bi_sector + (bio->bi_iter.bi_size >> SECTOR_SHIFT));
+		} else {
+			writecache_flush(wc);
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		wc->flush_bio = (void *)0x600 + POISON_POINTER_DELTA;	/* for debugging - catch uninitialized use */
+		complete(&wc->flush_completion);
+
+		schedule();
+	}
+
+	set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio)
+{
+	wc->flush_bio = bio;
+	reinit_completion(&wc->flush_completion);
+	wake_up_process(wc->flush_thread);
+	wait_for_completion_io(&wc->flush_completion);
+}
+
+static int writecache_map(struct dm_target *ti, struct bio *bio)
+{
+	struct wc_entry *e;
+	struct dm_writecache *wc = ti->private;
+
+	bio->bi_private = NULL;
+
+	wc_lock(wc);
+
+	if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
+		if (writecache_has_error(wc))
+			goto unlock_error;
+		if (WC_MODE_PMEM(wc))
+			writecache_flush(wc);
+		else
+			writecache_offload_bio(wc, bio);
+		goto unlock_ok_flush;
+	}
+
+	bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
+
+	if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
+				(wc->block_size / 512 - 1)) != 0)) {
+		DMWARN("I/O is not aligned, sector %llu, size %u, block size %u",
+			(unsigned long long)bio->bi_iter.bi_sector,
+			bio->bi_iter.bi_size, wc->block_size);
+		goto unlock_error;
+	}
+
+	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+		if (writecache_has_error(wc))
+			goto unlock_error;
+		if (WC_MODE_PMEM(wc))
+			writecache_discard(wc, bio->bi_iter.bi_sector,
+					   bio->bi_iter.bi_sector + (bio->bi_iter.bi_size >> SECTOR_SHIFT));
+		else
+			writecache_offload_bio(wc, bio);
+		goto unlock_remap_origin;
+	}
+
+	if (bio_data_dir(bio) == READ) {
+next_block:
+		e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
+		if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) {
+			if (WC_MODE_PMEM(wc)) {
+				bio_copy_block(wc, bio, memory_data(wc, e));
+				if (bio->bi_iter.bi_size)
+					goto next_block;
+				goto unlock_ok_read;
+			} else {
+				dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT);
+				bio_set_dev(bio, wc->ssd_dev->bdev);
+				bio->bi_iter.bi_sector = cache_sector(wc, e);
+				if (!writecache_entry_is_committed(wc, e))
+					writecache_wait_for_ios(wc, WRITE);
+				goto unlock_remap;
+			}
+		} else {
+			if (e) {
+				sector_t next_boundary =
+					read_original_sector(wc, e) - bio->bi_iter.bi_sector;
+				if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) {
+					dm_accept_partial_bio(bio, next_boundary);
+				}
+			}
+			goto unlock_remap_origin;
+		}
+	} else {
+		do {
+			if (writecache_has_error(wc))
+				goto unlock_error;
+			e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
+			if (e) {
+				if (!writecache_entry_is_committed(wc, e))
+					goto bio_copy;
+				if (!WC_MODE_PMEM(wc) && !e->write_in_progress) {
+					wc->overwrote_committed = true;
+					goto bio_copy;
+				}
+			}
+			e = writecache_pop_from_freelist(wc);
+			if (unlikely(!e)) {
+				writecache_wait_on_freelist(wc);
+				continue;
+			}
+			write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count);
+			writecache_insert_entry(wc, e);
+			wc->uncommitted_blocks++;
+bio_copy:
+			if (WC_MODE_PMEM(wc)) {
+				bio_copy_block(wc, bio, memory_data(wc, e));
+			} else {
+				dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT);
+				bio_set_dev(bio, wc->ssd_dev->bdev);
+				bio->bi_iter.bi_sector = cache_sector(wc, e);
+				if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
+					wc->uncommitted_blocks = 0;
+					queue_work(wc->writeback_wq, &wc->flush_work);
+				} else {
+					writecache_schedule_autocommit(wc);
+				}
+				goto unlock_remap;
+			}
+		} while (bio->bi_iter.bi_size);
+
+		if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
+			writecache_flush(wc);
+		} else {
+			writecache_schedule_autocommit(wc);
+		}
+
+		goto unlock_ok_write;
+	}
+
+unlock_remap_origin:
+	bio_set_dev(bio, wc->dev->bdev);
+	wc_unlock(wc);
+	return DM_MAPIO_REMAPPED;
+
+unlock_remap:
+	bio->bi_private = (void *)1;	/* make sure that writecache_end_io decrements bio_in_progress */
+	atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]);
+	wc_unlock(wc);
+	return DM_MAPIO_REMAPPED;
+
+unlock_ok_flush:
+#ifdef WC_MEASURE_LATENCY
+	wc_unlock(wc);
+	bio_endio(bio);
+	return DM_MAPIO_SUBMITTED;
+#endif
+
+unlock_ok_read:
+#ifdef WC_MEASURE_LATENCY
+	wc_unlock(wc);
+	bio_endio(bio);
+	return DM_MAPIO_SUBMITTED;
+#endif
+
+unlock_ok_write:
+	wc_unlock(wc);
+	bio_endio(bio);
+	return DM_MAPIO_SUBMITTED;
+
+unlock_error:
+	wc_unlock(wc);
+	bio_io_error(bio);
+	return DM_MAPIO_SUBMITTED;
+}
+
+static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status)
+{
+	struct dm_writecache *wc = ti->private;
+
+	if (bio->bi_private != NULL) {
+		int dir = bio_data_dir(bio);
+		if (atomic_dec_and_test(&wc->bio_in_progress[dir]))
+			if (unlikely(swait_active(&wc->bio_in_progress_wait[dir])))
+				swake_up(&wc->bio_in_progress_wait[dir]);
+	}
+	return 0;
+}
+
+static int writecache_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
+{
+	struct dm_writecache *wc = ti->private;
+
+	return fn(ti, wc->dev, 0, ti->len, data);
+}
+
+static void writecache_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+	struct dm_writecache *wc = ti->private;
+
+	if (limits->logical_block_size < wc->block_size)
+		limits->logical_block_size = wc->block_size;
+
+	if (limits->physical_block_size < wc->block_size)
+		limits->physical_block_size = wc->block_size;
+
+	if (limits->io_min < wc->block_size)
+		limits->io_min = wc->block_size;
+}
+
+
+static void writecache_writeback_endio(struct bio *bio)
+{
+	struct writeback_struct *wb = container_of(bio, struct writeback_struct, bio);
+	struct dm_writecache *wc = wb->wc;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&wc->endio_thread_wait.lock, flags);
+	list_add_tail(&wb->endio_entry, &wc->endio_list);
+	swake_up_locked(&wc->endio_thread_wait);
+	raw_spin_unlock_irqrestore(&wc->endio_thread_wait.lock, flags);
+}
+
+static void writecache_copy_endio(int read_err, unsigned long write_err, void *ptr)
+{
+	struct copy_struct *c = ptr;
+	struct dm_writecache *wc = c->wc;
+
+	c->error = likely(!(read_err | write_err)) ? 0 : -EIO;
+
+	raw_spin_lock_irq(&wc->endio_thread_wait.lock);
+	list_add_tail(&c->endio_entry, &wc->endio_list);
+	swake_up_locked(&wc->endio_thread_wait);
+	raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+}
+
+static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head *list)
+{
+	unsigned i;
+	struct writeback_struct *wb;
+	struct wc_entry *e;
+	unsigned long n_walked = 0;
+
+	do {
+		wb = list_entry(list->next, struct writeback_struct, endio_entry);
+		list_del(&wb->endio_entry);
+
+		if (unlikely(wb->bio.bi_status != BLK_STS_OK))
+			writecache_error(wc, blk_status_to_errno(wb->bio.bi_status),
+					"write error %d", wb->bio.bi_status);
+		i = 0;
+		do {
+			e = wb->wc_list[i];
+			BUG_ON(!e->write_in_progress);
+			e->write_in_progress = false;
+			INIT_LIST_HEAD(&e->lru);
+			if (!writecache_has_error(wc))
+				writecache_free_entry(wc, e);
+			BUG_ON(!wc->writeback_size);
+			wc->writeback_size--;
+			n_walked++;
+			if (unlikely(n_walked >= ENDIO_LATENCY)) {
+				writecache_commit_flushed(wc);
+				wc_unlock(wc);
+				wc_lock(wc);
+				n_walked = 0;
+			}
+		} while (++i < wb->wc_list_n);
+
+		if (wb->wc_list != wb->wc_list_inline)
+			kfree(wb->wc_list);
+		bio_put(&wb->bio);
+	} while (!list_empty(list));
+}
+
+static void __writecache_endio_ssd(struct dm_writecache *wc, struct list_head *list)
+{
+	struct copy_struct *c;
+	struct wc_entry *e;
+
+	do {
+		c = list_entry(list->next, struct copy_struct, endio_entry);
+		list_del(&c->endio_entry);
+
+		if (unlikely(c->error))
+			writecache_error(wc, c->error, "copy error");
+
+		e = c->e;
+		do {
+			BUG_ON(!e->write_in_progress);
+			e->write_in_progress = false;
+			INIT_LIST_HEAD(&e->lru);
+			if (!writecache_has_error(wc))
+				writecache_free_entry(wc, e);
+
+			BUG_ON(!wc->writeback_size);
+			wc->writeback_size--;
+			e++;
+		} while (--c->n_entries);
+		mempool_free(c, wc->copy_pool);
+	} while (!list_empty(list));
+}
+
+static int writecache_endio_thread(void *data)
+{
+	struct dm_writecache *wc = data;
+
+	while (1) {
+		DECLARE_SWAITQUEUE(wait);
+		struct list_head list;
+
+		raw_spin_lock_irq(&wc->endio_thread_wait.lock);
+continue_locked:
+		if (!list_empty(&wc->endio_list))
+			goto pop_from_list;
+		set_current_state(TASK_INTERRUPTIBLE);
+		__prepare_to_swait(&wc->endio_thread_wait, &wait);
+		raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+
+		if (unlikely(kthread_should_stop())) {
+			finish_swait(&wc->endio_thread_wait, &wait);
+			break;
+		}
+
+		schedule();
+
+		raw_spin_lock_irq(&wc->endio_thread_wait.lock);
+		__finish_swait(&wc->endio_thread_wait, &wait);
+		goto continue_locked;
+
+pop_from_list:
+		list = wc->endio_list;
+		list.next->prev = list.prev->next = &list;
+		INIT_LIST_HEAD(&wc->endio_list);
+		raw_spin_unlock_irq(&wc->endio_thread_wait.lock);
+
+		if (!WC_MODE_FUA(wc))
+			writecache_disk_flush(wc, wc->dev);
+
+		wc_lock(wc);
+
+		if (WC_MODE_PMEM(wc)) {
+			__writecache_endio_pmem(wc, &list);
+		} else {
+			__writecache_endio_ssd(wc, &list);
+			writecache_wait_for_ios(wc, READ);
+		}
+
+		writecache_commit_flushed(wc);
+
+		wc_unlock(wc);
+	}
+
+	return 0;
+}
+
+static bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e, gfp_t gfp)
+{
+	struct dm_writecache *wc = wb->wc;
+	unsigned block_size = wc->block_size;
+	void *address = memory_data(wc, e);
+
+	persistent_memory_flush_cache(address, block_size);
+	return bio_add_page(&wb->bio, persistent_memory_page(address),
+			    block_size, persistent_memory_page_offset(address)) != 0;
+}
+
+struct writeback_list {
+	struct list_head list;
+	size_t size;
+};
+
+static void __writeback_throttle(struct dm_writecache *wc, struct writeback_list *wbl)
+{
+	if (unlikely(wc->max_writeback_jobs)) {
+		if (READ_ONCE(wc->writeback_size) - wbl->size >= wc->max_writeback_jobs) {
+			wc_lock(wc);
+			while (wc->writeback_size - wbl->size >= wc->max_writeback_jobs) {
+				writecache_wait_on_freelist_long(wc);
+			}
+			wc_unlock(wc);
+		}
+	}
+	cond_resched();
+}
+
+static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeback_list *wbl)
+{
+	struct wc_entry *e, *f;
+	struct bio *bio;
+	struct writeback_struct *wb;
+	unsigned max_pages;
+
+	while (wbl->size) {
+		wbl->size--;
+		e = container_of(wbl->list.prev, struct wc_entry, lru);
+		list_del(&e->lru);
+
+		max_pages = e->wc_list_contiguous;
+
+		bio = bio_alloc_bioset(GFP_NOIO, max_pages, wc->bio_set);
+		wb = container_of(bio, struct writeback_struct, bio);
+		wb->wc = wc;
+		wb->bio.bi_end_io = writecache_writeback_endio;
+		bio_set_dev(&wb->bio, wc->dev->bdev);
+		wb->bio.bi_iter.bi_sector = read_original_sector(wc, e);
+		wb->page_offset = PAGE_SIZE;
+		if (max_pages > WB_LIST_INLINE) {
+			wb->wc_list = kmalloc(max_pages * sizeof(struct wc_entry *),
+					      GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+			if (unlikely(!wb->wc_list))
+				goto use_inline_list;
+		} else {
+use_inline_list:
+			wb->wc_list = wb->wc_list_inline;
+			max_pages = WB_LIST_INLINE;
+		}
+
+		BUG_ON(!wc_add_block(wb, e, GFP_NOIO));
+
+		wb->wc_list[0] = e;
+		wb->wc_list_n = 1;
+
+		while (wbl->size && wb->wc_list_n < max_pages) {
+			f = container_of(wbl->list.prev, struct wc_entry, lru);
+			if (read_original_sector(wc, f) !=
+			    read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT))
+				break;
+			if (!wc_add_block(wb, f, GFP_NOWAIT | __GFP_NOWARN))
+				break;
+			wbl->size--;
+			list_del(&f->lru);
+			wb->wc_list[wb->wc_list_n++] = f;
+			e = f;
+		}
+		bio_set_op_attrs(&wb->bio, REQ_OP_WRITE, WC_MODE_FUA(wc) * REQ_FUA);
+		if (writecache_has_error(wc)) {
+			bio->bi_status = BLK_STS_IOERR;
+			bio_endio(&wb->bio);
+		} else {
+			submit_bio(&wb->bio);
+		}
+
+		__writeback_throttle(wc, wbl);
+
+	}
+}
+
+static void __writecache_writeback_ssd(struct dm_writecache *wc, struct writeback_list *wbl)
+{
+	struct wc_entry *e, *f;
+	struct dm_io_region from, to;
+	struct copy_struct *c;
+
+	while (wbl->size) {
+		unsigned n_sectors;
+
+		wbl->size--;
+		e = container_of(wbl->list.prev, struct wc_entry, lru);
+		list_del(&e->lru);
+
+		n_sectors = e->wc_list_contiguous << (wc->block_size_bits - SECTOR_SHIFT);
+
+		from.bdev = wc->ssd_dev->bdev;
+		from.sector = cache_sector(wc, e);
+		from.count = n_sectors;
+		to.bdev = wc->dev->bdev;
+		to.sector = read_original_sector(wc, e);
+		to.count = n_sectors;
+
+		c = mempool_alloc(wc->copy_pool, GFP_NOIO);
+		c->wc = wc;
+		c->e = e;
+		c->n_entries = e->wc_list_contiguous;
+
+		while ((n_sectors -= wc->block_size >> SECTOR_SHIFT)) {
+			wbl->size--;
+			f = container_of(wbl->list.prev, struct wc_entry, lru);
+			BUG_ON(f != e + 1);
+			list_del(&f->lru);
+			e = f;
+		}
+
+		dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c);
+
+		__writeback_throttle(wc, wbl);
+	}
+}
+
+static void writecache_writeback(struct work_struct *work)
+{
+	struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work);
+	struct blk_plug plug;
+	struct wc_entry *e, *f, *g;
+	struct rb_node *node, *next_node;
+	struct list_head skipped;
+	struct writeback_list wbl;
+	unsigned long n_walked;
+
+	wc_lock(wc);
+restart:
+	if (writecache_has_error(wc)) {
+		wc_unlock(wc);
+		return;
+	}
+
+	if (unlikely(wc->writeback_all)) {
+		if (writecache_wait_for_writeback(wc))
+			goto restart;
+	}
+
+	if (wc->overwrote_committed) {
+		writecache_wait_for_ios(wc, WRITE);
+	}
+
+	n_walked = 0;
+	INIT_LIST_HEAD(&skipped);
+	INIT_LIST_HEAD(&wbl.list);
+	wbl.size = 0;
+	while (!list_empty(&wc->lru) &&
+	       (wc->writeback_all ||
+		wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark)) {
+
+		n_walked++;
+		if (unlikely(n_walked > WRITEBACK_LATENCY) && likely(!wc->writeback_all) && likely(!dm_suspended(wc->ti))) {
+			queue_work(wc->writeback_wq, &wc->writeback_work);
+			break;
+		}
+
+		e = container_of(wc->lru.prev, struct wc_entry, lru);
+		BUG_ON(e->write_in_progress);
+		if (unlikely(!writecache_entry_is_committed(wc, e))) {
+			writecache_flush(wc);
+		}
+		node = rb_prev(&e->rb_node);
+		if (node) {
+			f = container_of(node, struct wc_entry, rb_node);
+			if (unlikely(read_original_sector(wc, f) ==
+				     read_original_sector(wc, e))) {
+				BUG_ON(!f->write_in_progress);
+				list_del(&e->lru);
+				list_add(&e->lru, &skipped);
+				cond_resched();
+				continue;
+			}
+		}
+		wc->writeback_size++;
+		list_del(&e->lru);
+		list_add(&e->lru, &wbl.list);
+		wbl.size++;
+		e->write_in_progress = true;
+		e->wc_list_contiguous = 1;
+
+		f = e;
+
+		while (1) {
+			next_node = rb_next(&f->rb_node);
+			if (unlikely(!next_node))
+				break;
+			g = container_of(next_node, struct wc_entry, rb_node);
+			if (read_original_sector(wc, g) ==
+			    read_original_sector(wc, f)) {
+				f = g;
+				continue;
+			}
+			if (read_original_sector(wc, g) !=
+			    read_original_sector(wc, f) + (wc->block_size >> SECTOR_SHIFT))
+				break;
+			if (unlikely(g->write_in_progress))
+				break;
+			if (unlikely(!writecache_entry_is_committed(wc, g)))
+				break;
+
+			if (!WC_MODE_PMEM(wc)) {
+				if (g != f + 1)
+					break;
+			}
+
+			n_walked++;
+			//if (unlikely(n_walked > WRITEBACK_LATENCY) && likely(!wc->writeback_all))
+			//	break;
+
+			wc->writeback_size++;
+			list_del(&g->lru);
+			list_add(&g->lru, &wbl.list);
+			wbl.size++;
+			g->write_in_progress = true;
+			g->wc_list_contiguous = BIO_MAX_PAGES;
+			f = g;
+			e->wc_list_contiguous++;
+			if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES))
+				break;
+		}
+		cond_resched();
+	}
+
+	if (!list_empty(&skipped)) {
+		list_splice_tail(&skipped, &wc->lru);
+		/*
+		 * If we didn't do any progress, we must wait until some
+		 * writeback finishes to avoid burning CPU in a loop
+		 */
+		if (unlikely(!wbl.size))
+			writecache_wait_for_writeback(wc);
+	}
+
+	wc_unlock(wc);
+
+	blk_start_plug(&plug);
+
+	if (WC_MODE_PMEM(wc))
+		__writecache_writeback_pmem(wc, &wbl);
+	else
+		__writecache_writeback_ssd(wc, &wbl);
+
+	blk_finish_plug(&plug);
+
+	if (unlikely(wc->writeback_all)) {
+		wc_lock(wc);
+		while (writecache_wait_for_writeback(wc));
+		wc_unlock(wc);
+	}
+}
+
+static int calculate_memory_size(uint64_t device_size, unsigned block_size,
+				 size_t *n_blocks_p, size_t *n_metadata_blocks_p)
+{
+	uint64_t n_blocks, offset;
+	struct wc_entry e;
+
+	n_blocks = device_size;
+	do_div(n_blocks, block_size + sizeof(struct wc_memory_entry));
+
+	while (1) {
+		if (!n_blocks)
+			return -ENOSPC;
+		/* Verify the following entries[n_blocks] won't overflow */
+		if (n_blocks >= (size_t)-sizeof(struct wc_memory_superblock) / sizeof(struct wc_memory_entry))
+			return -EFBIG;
+		offset = offsetof(struct wc_memory_superblock, entries[n_blocks]);
+		offset = (offset + block_size - 1) & ~(uint64_t)(block_size - 1);
+		if (offset + n_blocks * block_size <= device_size)
+			break;
+		n_blocks--;
+	}
+
+	/* check if the bit field overflows */
+	e.index = n_blocks;
+	if (e.index != n_blocks)
+		return -EFBIG;
+
+	if (n_blocks_p)
+		*n_blocks_p = n_blocks;
+	if (n_metadata_blocks_p)
+		*n_metadata_blocks_p = offset >> __ffs(block_size);
+	return 0;
+}
+
+static int init_memory(struct dm_writecache *wc)
+{
+	size_t b;
+	int r;
+
+	r = calculate_memory_size(wc->memory_map_size, wc->block_size, &wc->n_blocks, NULL);
+	if (r)
+		return r;
+
+	r = writecache_alloc_entries(wc);
+	if (r)
+		return r;
+
+	for (b = 0; b < ARRAY_SIZE(sb(wc)->padding); b++)
+		NT_STORE(sb(wc)->padding[b], cpu_to_le64(0));
+	NT_STORE(sb(wc)->version, cpu_to_le32(MEMORY_SUPERBLOCK_VERSION));
+	NT_STORE(sb(wc)->block_size, cpu_to_le32(wc->block_size));
+	NT_STORE(sb(wc)->n_blocks, cpu_to_le64(wc->n_blocks));
+	NT_STORE(sb(wc)->seq_count, cpu_to_le64(0));
+
+	for (b = 0; b < wc->n_blocks; b++)
+		write_original_sector_seq_count(wc, &wc->entries[b], -1, -1);
+
+	writecache_flush_all_metadata(wc);
+	writecache_commit_flushed(wc);
+	NT_STORE(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC));
+	writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic);
+	writecache_commit_flushed(wc);
+
+	return 0;
+}
+
+static void writecache_dtr(struct dm_target *ti)
+{
+	struct dm_writecache *wc = ti->private;
+
+	if (!wc)
+		return;
+
+	if (wc->endio_thread)
+		kthread_stop(wc->endio_thread);
+
+	if (wc->flush_thread)
+		kthread_stop(wc->flush_thread);
+
+	if (wc->bio_set)
+		bioset_free(wc->bio_set);
+
+	mempool_destroy(wc->copy_pool);
+
+	if (wc->writeback_wq)
+		destroy_workqueue(wc->writeback_wq);
+
+	if (wc->dev)
+		dm_put_device(ti, wc->dev);
+
+	if (wc->ssd_dev)
+		dm_put_device(ti, wc->ssd_dev);
+
+	if (wc->entries)
+		vfree(wc->entries);
+
+	if (wc->memory_map) {
+		if (WC_MODE_PMEM(wc))
+			persistent_memory_release(wc);
+		else
+			vfree(wc->memory_map);
+	}
+
+	if (wc->dm_kcopyd)
+		dm_kcopyd_client_destroy(wc->dm_kcopyd);
+
+	if (wc->dm_io)
+		dm_io_client_destroy(wc->dm_io);
+
+	if (wc->dirty_bitmap)
+		vfree(wc->dirty_bitmap);
+
+	kfree(wc);
+}
+
+static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+	struct dm_writecache *wc;
+	struct dm_arg_set as;
+	const char *string;
+	unsigned opt_params;
+	size_t offset, data_size;
+	int i, r;
+	char dummy;
+	int high_wm_percent = HIGH_WATERMARK;
+	int low_wm_percent = LOW_WATERMARK;
+	uint64_t x;
+	struct wc_memory_superblock s;
+
+	static struct dm_arg _args[] = {
+		{0, 10, "Invalid number of feature args"},
+	};
+
+	as.argc = argc;
+	as.argv = argv;
+
+	wc = kzalloc(sizeof(struct dm_writecache), GFP_KERNEL);
+	if (!wc) {
+		ti->error = "Cannot allocate writecache structure";
+		r = -ENOMEM;
+		goto bad;
+	}
+	ti->private = wc;
+	wc->ti = ti;
+
+	mutex_init(&wc->lock);
+	writecache_poison_lists(wc);
+	init_swait_queue_head(&wc->freelist_wait);
+	timer_setup(&wc->autocommit_timer, writecache_autocommit_timer, 0);
+
+	for (i = 0; i < 2; i++) {
+		atomic_set(&wc->bio_in_progress[i], 0);
+		init_swait_queue_head(&wc->bio_in_progress_wait[i]);
+	}
+
+	wc->dm_io = dm_io_client_create();
+	if (!wc->dm_io) {
+		r = -ENOMEM;
+		ti->error = "Unable to allocate dm-io client";
+		goto bad;
+	}
+
+	wc->writeback_wq = alloc_workqueue("writecache-writeabck", WQ_MEM_RECLAIM, 1);
+	if (!wc->writeback_wq) {
+		r = -ENOMEM;
+		ti->error = "Could not allocate writeback workqueue";
+		goto bad;
+	}
+	INIT_WORK(&wc->writeback_work, writecache_writeback);
+	INIT_WORK(&wc->flush_work, writecache_flush_work);
+
+	init_swait_queue_head(&wc->endio_thread_wait);
+	INIT_LIST_HEAD(&wc->endio_list);
+	wc->endio_thread = kthread_create(writecache_endio_thread, wc, "writecache_endio");
+	if (IS_ERR(wc->endio_thread)) {
+		r = PTR_ERR(wc->endio_thread);
+		wc->endio_thread = NULL;
+		ti->error = "Couldn't spawn endio thread";
+		goto bad;
+	}
+	wake_up_process(wc->endio_thread);
+
+	/*
+	 * Parse the mode (pmem or ssd)
+	 */
+	string = dm_shift_arg(&as);
+	if (!string)
+		goto bad_arguments;
+
+	if (!strcasecmp(string, "s")) {
+#ifndef DM_WRITECACHE_ONLY_SSD
+		wc->pmem_mode = false;
+#endif
+	} else if (!strcasecmp(string, "p")) {
+#ifndef DM_WRITECACHE_ONLY_SSD
+		wc->pmem_mode = true;
+		wc->writeback_fua = WRITEBACK_FUA;
+#else
+		r = -EOPNOTSUPP;
+		ti->error = "Persistent memory not supported on this architecture";
+		goto bad;
+#endif
+	} else {
+		goto bad_arguments;
+	}
+
+	if (WC_MODE_PMEM(wc)) {
+		wc->bio_set = bioset_create(BIO_POOL_SIZE,
+					    offsetof(struct writeback_struct, bio),
+					    BIOSET_NEED_BVECS);
+		if (!wc->bio_set) {
+			r = -ENOMEM;
+			ti->error = "Could not allocate bio set";
+			goto bad;
+		}
+	} else {
+		wc->copy_pool = mempool_create_kmalloc_pool(1, sizeof(struct copy_struct));
+		if (!wc->copy_pool) {
+			r = -ENOMEM;
+			ti->error = "Could not allocate mempool";
+			goto bad;
+		}
+	}
+
+	/*
+	 * Parse the origin data device
+	 */
+	string = dm_shift_arg(&as);
+	if (!string)
+		goto bad_arguments;
+	r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->dev);
+	if (r) {
+		ti->error = "Origin data device lookup failed";
+		goto bad;
+	}
+
+	/*
+	 * Parse cache data device (be it pmem or ssd)
+	 */
+	string = dm_shift_arg(&as);
+	if (!string)
+		goto bad_arguments;
+
+	r = dm_get_device(ti, string, dm_table_get_mode(ti->table), &wc->ssd_dev);
+	if (r) {
+		ti->error = "Cache data device lookup failed";
+		goto bad;
+	}
+	wc->memory_map_size = i_size_read(wc->ssd_dev->bdev->bd_inode);
+
+	if (WC_MODE_PMEM(wc)) {
+		r = persistent_memory_claim(wc);
+		if (r) {
+			ti->error = "Unable to map persistent memory for cache";
+			goto bad;
+		}
+	}
+
+	/*
+	 * Parse the cache block size
+	 */
+	string = dm_shift_arg(&as);
+	if (!string)
+		goto bad_arguments;
+	if (sscanf(string, "%u%c", &wc->block_size, &dummy) != 1 ||
+	    wc->block_size < 512 || wc->block_size > PAGE_SIZE ||
+	    (wc->block_size & (wc->block_size - 1))) {
+		r = -EINVAL;
+		ti->error = "Invalid block size";
+		goto bad;
+	}
+	wc->block_size_bits = __ffs(wc->block_size);
+
+	wc->max_writeback_jobs = MAX_WRITEBACK_JOBS;
+	wc->autocommit_blocks = !WC_MODE_PMEM(wc) ? AUTOCOMMIT_BLOCKS_SSD : AUTOCOMMIT_BLOCKS_PMEM;
+	wc->autocommit_jiffies = msecs_to_jiffies(AUTOCOMMIT_MSEC);
+
+	/*
+	 * Parse optional arguments
+	 */
+	r = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
+	if (r)
+		goto bad;
+
+	while (opt_params) {
+		string = dm_shift_arg(&as), opt_params--;
+		if (!strcasecmp(string, "high_watermark") && opt_params >= 1) {
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%d%c", &high_wm_percent, &dummy) != 1)
+				goto invalid_optional;
+			if (high_wm_percent < 0 || high_wm_percent > 100)
+				goto invalid_optional;
+			wc->high_wm_percent_set = true;
+		} else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) {
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%d%c", &low_wm_percent, &dummy) != 1)
+				goto invalid_optional;
+			if (low_wm_percent < 0 || low_wm_percent > 100)
+				goto invalid_optional;
+			wc->low_wm_percent_set = true;
+		} else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) {
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%u%c", &wc->max_writeback_jobs, &dummy) != 1)
+				goto invalid_optional;
+			wc->max_writeback_jobs_set = true;
+		} else if (!strcasecmp(string, "autocommit_blocks") && opt_params >= 1) {
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%u%c", &wc->autocommit_blocks, &dummy) != 1)
+				goto invalid_optional;
+			wc->autocommit_blocks_set = true;
+		} else if (!strcasecmp(string, "autocommit_time") && opt_params >= 1) {
+			unsigned autocommit_msecs;
+			string = dm_shift_arg(&as), opt_params--;
+			if (sscanf(string, "%u%c", &autocommit_msecs, &dummy) != 1)
+				goto invalid_optional;
+			if (autocommit_msecs > 3600000)
+				goto invalid_optional;
+			wc->autocommit_jiffies = jiffies_to_msecs(autocommit_msecs);
+			wc->autocommit_time_set = true;
+		} else if (!strcasecmp(string, "fua")) {
+			if (WC_MODE_PMEM(wc)) {
+#ifndef DM_WRITECACHE_ONLY_SSD
+				wc->writeback_fua = true;
+				wc->writeback_fua_set = true;
+#endif
+			} else goto invalid_optional;
+		} else if (!strcasecmp(string, "nofua")) {
+			if (WC_MODE_PMEM(wc)) {
+#ifndef DM_WRITECACHE_ONLY_SSD
+				wc->writeback_fua = false;
+				wc->writeback_fua_set = true;
+#endif
+			} else goto invalid_optional;
+		} else {
+invalid_optional:
+			r = -EINVAL;
+			ti->error = "Invalid optional argument";
+			goto bad;
+		}
+	}
+
+	if (!WC_MODE_PMEM(wc)) {
+		struct dm_io_region region;
+		struct dm_io_request req;
+		size_t n_blocks, n_metadata_blocks;
+		uint64_t n_bitmap_bits;
+
+		init_completion(&wc->flush_completion);
+		wc->flush_thread = kthread_create(writecache_flush_thread, wc, "writecache_flush");
+		if (IS_ERR(wc->flush_thread)) {
+			r = PTR_ERR(wc->flush_thread);
+			wc->flush_thread = NULL;
+			ti->error = "Couldn't spawn endio thread";
+			goto bad;
+		}
+		writecache_offload_bio(wc, NULL);
+
+		r = calculate_memory_size(wc->memory_map_size, wc->block_size,
+					  &n_blocks, &n_metadata_blocks);
+		if (r) {
+			ti->error = "Invalid device size";
+			goto bad;
+		}
+
+		n_bitmap_bits = (((uint64_t)n_metadata_blocks << wc->block_size_bits) + BITMAP_GRANULARITY - 1) / BITMAP_GRANULARITY;
+		/* this is limitation of test_bit functions */
+		if (n_bitmap_bits > 1U << 31) {
+			r = -EFBIG;
+			ti->error = "Invalid device size";
+		}
+
+		wc->memory_map = vmalloc(n_metadata_blocks << wc->block_size_bits);
+		if (!wc->memory_map) {
+			r = -ENOMEM;
+			ti->error = "Unable to allocate memory for metadata";
+			goto bad;
+		}
+
+		wc->dm_kcopyd = dm_kcopyd_client_create(&dm_kcopyd_throttle);
+		if (!wc->dm_kcopyd) {
+			r = -ENOMEM;
+			ti->error = "Unable to allocate dm-kcopyd client";
+			goto bad;
+		}
+
+		wc->metadata_sectors = n_metadata_blocks << (wc->block_size_bits - SECTOR_SHIFT);
+		wc->dirty_bitmap_size = (n_bitmap_bits + BITS_PER_LONG - 1) / BITS_PER_LONG * sizeof(unsigned long);
+		wc->dirty_bitmap = vzalloc(wc->dirty_bitmap_size);
+		if (!wc->dirty_bitmap) {
+			r = -ENOMEM;
+			ti->error = "Unable to allocate dirty bitmap";
+			goto bad;
+		}
+
+		region.bdev = wc->ssd_dev->bdev;
+		region.sector = 0;
+		region.count = wc->metadata_sectors;
+		req.bi_op = REQ_OP_READ;
+		req.bi_op_flags = REQ_SYNC;
+		req.mem.type = DM_IO_VMA;
+		req.mem.ptr.vma = (char *)wc->memory_map;
+		req.client = wc->dm_io;
+		req.notify.fn = NULL;
+
+		r = dm_io(&req, 1, &region, NULL);
+		if (r) {
+			ti->error = "Unable to read metadata";
+			goto bad;
+		}
+	}
+
+	r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
+	if (r) {
+		ti->error = "Hardware memory error when reading superblock";
+		goto bad;
+	}
+	if (!le32_to_cpu(le32_to_cpu(s.magic)) && !le32_to_cpu(s.version)) {
+		r = init_memory(wc);
+		if (r) {
+			ti->error = "Unable to initialize device";
+			goto bad;
+		}
+		r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
+		if (r) {
+			ti->error = "Hardware memory error when reading superblock";
+			goto bad;
+		}
+	}
+
+	if (le32_to_cpu(s.magic) != MEMORY_SUPERBLOCK_MAGIC) {
+		ti->error = "Invalid magic in the superblock";
+		r = -EINVAL;
+		goto bad;
+	}
+
+	if (le32_to_cpu(s.version) != MEMORY_SUPERBLOCK_VERSION) {
+		ti->error = "Invalid version in the superblock";
+		r = -EINVAL;
+		goto bad;
+	}
+
+	if (le32_to_cpu(s.block_size) != wc->block_size) {
+		ti->error = "Block size does not match superblock";
+		r = -EINVAL;
+		goto bad;
+	}
+
+	wc->n_blocks = le64_to_cpu(s.n_blocks);
+
+	offset = wc->n_blocks * sizeof(struct wc_memory_entry);
+	if (offset / sizeof(struct wc_memory_entry) != le64_to_cpu(sb(wc)->n_blocks)) {
+overflow:
+		ti->error = "Overflow in size calculation";
+		r = -EINVAL;
+		goto bad;
+	}
+	offset += sizeof(struct wc_memory_superblock);
+	if (offset < sizeof(struct wc_memory_superblock))
+		goto overflow;
+	offset = (offset + wc->block_size - 1) & ~(size_t)(wc->block_size - 1);
+	data_size = wc->n_blocks * (size_t)wc->block_size;
+	if (!offset || (data_size / wc->block_size != wc->n_blocks) ||
+	    (offset + data_size < offset))
+		goto overflow;
+	if (offset + data_size > wc->memory_map_size) {
+		ti->error = "Memory area is too small";
+		r = -EINVAL;
+		goto bad;
+	}
+
+	wc->metadata_sectors = offset >> SECTOR_SHIFT;
+	wc->block_start = (char *)sb(wc) + offset;
+
+	x = (uint64_t)wc->n_blocks * (100 - high_wm_percent);
+	x += 50;
+	do_div(x, 100);
+	wc->freelist_high_watermark = x;
+	x = (uint64_t)wc->n_blocks * (100 - low_wm_percent);
+	x += 50;
+	do_div(x, 100);
+	wc->freelist_low_watermark = x;
+
+	r = writecache_alloc_entries(wc);
+	if (r) {
+		ti->error = "Cannot allocate memory";
+		goto bad;
+	}
+
+	ti->num_flush_bios = 1;
+	ti->flush_supported = true;
+	ti->num_discard_bios = 1;
+
+	if (WC_MODE_PMEM(wc))
+		persistent_memory_flush_cache(wc->memory_map, wc->memory_map_size);
+
+	return 0;
+
+bad_arguments:
+	r = -EINVAL;
+	ti->error = "Bad arguments";
+bad:
+	writecache_dtr(ti);
+	return r;
+}
+
+static void writecache_status(struct dm_target *ti, status_type_t type,
+			      unsigned status_flags, char *result, unsigned maxlen)
+{
+	struct dm_writecache *wc = ti->private;
+	unsigned extra_args;
+	unsigned sz = 0;
+	uint64_t x;
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		DMEMIT("%ld %llu %llu %llu", writecache_has_error(wc), (unsigned long long)wc->n_blocks,
+		       (unsigned long long)wc->freelist_size, (unsigned long long)wc->writeback_size);
+		break;
+	case STATUSTYPE_TABLE:
+		DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's',
+				wc->dev->name, wc->ssd_dev->name, wc->block_size);
+		extra_args = 0;
+		if (wc->high_wm_percent_set)
+			extra_args += 2;
+		if (wc->low_wm_percent_set)
+			extra_args += 2;
+		if (wc->max_writeback_jobs_set)
+			extra_args += 2;
+		if (wc->autocommit_blocks_set)
+			extra_args += 2;
+		if (wc->autocommit_time_set)
+			extra_args += 2;
+#ifndef DM_WRITECACHE_ONLY_SSD
+		if (wc->writeback_fua_set)
+			extra_args++;
+#endif
+		DMEMIT("%u", extra_args);
+		if (wc->high_wm_percent_set) {
+			x = (uint64_t)wc->freelist_high_watermark * 100;
+			x += wc->n_blocks / 2;
+			do_div(x, (size_t)wc->n_blocks);
+			DMEMIT(" high_watermark %u", 100 - (unsigned)x);
+		}
+		if (wc->low_wm_percent_set) {
+			x = (uint64_t)wc->freelist_low_watermark * 100;
+			x += wc->n_blocks / 2;
+			do_div(x, (size_t)wc->n_blocks);
+			DMEMIT(" low_watermark %u", 100 - (unsigned)x);
+		}
+		if (wc->max_writeback_jobs_set) {
+			DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs);
+		}
+		if (wc->autocommit_blocks_set) {
+			DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks);
+		}
+		if (wc->autocommit_time_set) {
+			DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies));
+		}
+#ifndef DM_WRITECACHE_ONLY_SSD
+		if (wc->writeback_fua_set) {
+			DMEMIT(" %sfua", wc->writeback_fua ? "" : "no");
+		}
+#endif
+		break;
+	}
+}
+
+static struct target_type writecache_target = {
+	.name			= "writecache",
+	.version		= {1, 0, 0},
+	.module			= THIS_MODULE,
+	.ctr			= writecache_ctr,
+	.dtr			= writecache_dtr,
+	.status			= writecache_status,
+	.postsuspend		= writecache_suspend,
+	.resume			= writecache_resume,
+	.message		= writecache_message,
+	.map			= writecache_map,
+	.end_io			= writecache_end_io,
+	.iterate_devices	= writecache_iterate_devices,
+	.io_hints		= writecache_io_hints,
+};
+
+static int __init dm_writecache_init(void)
+{
+	int r;
+
+	r = dm_register_target(&writecache_target);
+	if (r < 0) {
+		DMERR("register failed %d", r);
+		return r;
+	}
+
+	return 0;
+}
+
+static void __exit dm_writecache_exit(void)
+{
+	dm_unregister_target(&writecache_target);
+}
+
+module_init(dm_writecache_init);
+module_exit(dm_writecache_exit);
+
+MODULE_DESCRIPTION(DM_NAME " writecache target");
+MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
+MODULE_LICENSE("GPL");
Index: linux-2.6/Documentation/device-mapper/writecache.txt
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6/Documentation/device-mapper/writecache.txt	2018-03-08 14:23:31.059999000 +0100
@@ -0,0 +1,68 @@
+The writecache target caches writes on persistent memory or on SSD. It
+doesn't cache reads because reads are supposed to be cached in page cache
+in normal RAM.
+
+When the device is constructed, the first sector should be zeroed or the
+first sector should contain valid superblock from previous invocation.
+
+Constructor parameters:
+1. type of the cache device - "p" or "s"
+	p - persistent memory
+	s - SSD
+2. the underlying device that will be cached
+3. the cache device
+4. block size (4096 is recommended; the maximum block size is the page
+   size)
+5. the number of optional parameters (the parameters with an argument
+   count as two)
+	high_watermark n	(default: 50)
+		start writeback when the number of used blocks reach this
+		watermark
+	low_watermark x		(default: 45)
+		stop writeback when the number of used blocks drops below
+		this watermark
+	writeback_jobs n	(default: unlimited)
+		limit the number of blocks that are in flight during
+		writeback. Setting this value reduces writeback
+		throughput, but it may improve latency of read requests
+	autocommit_blocks n	(default: 64 for pmem, 65536 for ssd)
+		when the application writes this amount of blocks without
+		issuing the FLUSH request, the blocks are automatically
+		commited
+	autocommit_time ms	(default: 1000)
+		autocommit time in milliseconds. The data is automatically
+		commited if this time passes and no FLUSH request is
+		received
+	fua			(by default on)
+		applicable only to persistent memory - use the FUA flag
+		when writing data from persistent memory back to the
+		underlying device
+	nofua
+		applicable only to persistent memory - don't use the FUA
+		flag when writing back data and send the FLUSH request
+		afterwards
+		- some underlying devices perform better with fua, some
+		  with nofua. The user should test it
+
+Status:
+1. error indicator - 0 if there was no error, otherwise error number
+2. the number of blocks
+3. the number of free blocks
+4. the number of blocks under writeback
+
+Messages:
+	flush
+		flush the cache device. The message returns successfully
+		if the cache device was flushed without an error
+	flush_on_suspend
+		flush the cache device on next suspend. Use this message
+		when you are going to remove the cache device. The proper
+		sequence for removing the cache device is:
+		1. send the "flush_on_suspend" message
+		2. load an inactive table with a linear target that maps
+		   to the underlying device
+		3. suspend the device
+		4. ask for status and verify that there are no errors
+		5. resume the device, so that it will use the linear
+		   target
+		6. the cache device is now inactive and it can be deleted

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-08 14:51   ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2018-03-08 14:51 UTC (permalink / raw)
  To: Mikulas Patocka; +Cc: Mike Snitzer, dm-devel, Alasdair G. Kergon, linux-nvdimm

> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6/drivers/md/dm-writecache.c	2018-03-08 14:23:31.059999000 +0100
> @@ -0,0 +1,2417 @@
> +#include <linux/device-mapper.h>

missing copyright statement, or for those new-fashioned SPDX statement.

> +#define WRITEBACK_FUA			true

no business having this around.

> +#ifndef bio_set_dev
> +#define	bio_set_dev(bio, dev)	((bio)->bi_bdev = (dev))
> +#endif
> +#ifndef timer_setup
> +#define timer_setup(t, c, f)	setup_timer(t, c, (unsigned long)(t))
> +#endif

no business in mainline.

> +/*
> + * On X86, non-temporal stores are more efficient than cache flushing.
> + * On ARM64, cache flushing is more efficient.
> + */
> +#if defined(CONFIG_X86_64)
> +#define NT_STORE(dest, src)				\
> +do {							\
> +	typeof(src) val = (src);			\
> +	memcpy_flushcache(&(dest), &val, sizeof(src));	\
> +} while (0)
> +#define COMMIT_FLUSHED()	wmb()
> +#else
> +#define NT_STORE(dest, src)	WRITE_ONCE(dest, src)
> +#define FLUSH_RANGE		dax_flush
> +#define COMMIT_FLUSHED()	do { } while (0)
> +#endif

Please use proper APIs for this, this has no business in a driver.

And that's it for now.  This is clearly not submission ready, and I
should got back to my backlog of other things.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-08 14:51   ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2018-03-08 14:51 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Mike Snitzer, dm-devel-H+wXaHxf7aLQT0dZR+AlfA,
	Alasdair G. Kergon, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw

> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6/drivers/md/dm-writecache.c	2018-03-08 14:23:31.059999000 +0100
> @@ -0,0 +1,2417 @@
> +#include <linux/device-mapper.h>

missing copyright statement, or for those new-fashioned SPDX statement.

> +#define WRITEBACK_FUA			true

no business having this around.

> +#ifndef bio_set_dev
> +#define	bio_set_dev(bio, dev)	((bio)->bi_bdev = (dev))
> +#endif
> +#ifndef timer_setup
> +#define timer_setup(t, c, f)	setup_timer(t, c, (unsigned long)(t))
> +#endif

no business in mainline.

> +/*
> + * On X86, non-temporal stores are more efficient than cache flushing.
> + * On ARM64, cache flushing is more efficient.
> + */
> +#if defined(CONFIG_X86_64)
> +#define NT_STORE(dest, src)				\
> +do {							\
> +	typeof(src) val = (src);			\
> +	memcpy_flushcache(&(dest), &val, sizeof(src));	\
> +} while (0)
> +#define COMMIT_FLUSHED()	wmb()
> +#else
> +#define NT_STORE(dest, src)	WRITE_ONCE(dest, src)
> +#define FLUSH_RANGE		dax_flush
> +#define COMMIT_FLUSHED()	do { } while (0)
> +#endif

Please use proper APIs for this, this has no business in a driver.

And that's it for now.  This is clearly not submission ready, and I
should got back to my backlog of other things.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-08 17:08     ` Dan Williams
  0 siblings, 0 replies; 27+ messages in thread
From: Dan Williams @ 2018-03-08 17:08 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, Mikulas Patocka, device-mapper development,
	Alasdair G. Kergon, linux-nvdimm

On Thu, Mar 8, 2018 at 6:51 AM, Christoph Hellwig <hch@infradead.org> wrote:
>> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
>> +++ linux-2.6/drivers/md/dm-writecache.c      2018-03-08 14:23:31.059999000 +0100
>> @@ -0,0 +1,2417 @@
>> +#include <linux/device-mapper.h>
>
> missing copyright statement, or for those new-fashioned SPDX statement.
>
>> +#define WRITEBACK_FUA                        true
>
> no business having this around.
>
>> +#ifndef bio_set_dev
>> +#define      bio_set_dev(bio, dev)   ((bio)->bi_bdev = (dev))
>> +#endif
>> +#ifndef timer_setup
>> +#define timer_setup(t, c, f) setup_timer(t, c, (unsigned long)(t))
>> +#endif
>
> no business in mainline.
>
>> +/*
>> + * On X86, non-temporal stores are more efficient than cache flushing.
>> + * On ARM64, cache flushing is more efficient.
>> + */
>> +#if defined(CONFIG_X86_64)
>> +#define NT_STORE(dest, src)                          \
>> +do {                                                 \
>> +     typeof(src) val = (src);                        \
>> +     memcpy_flushcache(&(dest), &val, sizeof(src));  \
>> +} while (0)
>> +#define COMMIT_FLUSHED()     wmb()
>> +#else
>> +#define NT_STORE(dest, src)  WRITE_ONCE(dest, src)
>> +#define FLUSH_RANGE          dax_flush
>> +#define COMMIT_FLUSHED()     do { } while (0)
>> +#endif
>
> Please use proper APIs for this, this has no business in a driver.

I had the same feedback, and Mikulas sent this useful enhancement to
the memcpy_flushcache API:

    https://patchwork.kernel.org/patch/10217655/

...it's in my queue to either push through -tip or add it to the next
libnvdimm pull request for 4.17-rc1.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-08 17:08     ` Dan Williams
  0 siblings, 0 replies; 27+ messages in thread
From: Dan Williams @ 2018-03-08 17:08 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, Mikulas Patocka, device-mapper development,
	Alasdair G. Kergon, linux-nvdimm

On Thu, Mar 8, 2018 at 6:51 AM, Christoph Hellwig <hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org> wrote:
>> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
>> +++ linux-2.6/drivers/md/dm-writecache.c      2018-03-08 14:23:31.059999000 +0100
>> @@ -0,0 +1,2417 @@
>> +#include <linux/device-mapper.h>
>
> missing copyright statement, or for those new-fashioned SPDX statement.
>
>> +#define WRITEBACK_FUA                        true
>
> no business having this around.
>
>> +#ifndef bio_set_dev
>> +#define      bio_set_dev(bio, dev)   ((bio)->bi_bdev = (dev))
>> +#endif
>> +#ifndef timer_setup
>> +#define timer_setup(t, c, f) setup_timer(t, c, (unsigned long)(t))
>> +#endif
>
> no business in mainline.
>
>> +/*
>> + * On X86, non-temporal stores are more efficient than cache flushing.
>> + * On ARM64, cache flushing is more efficient.
>> + */
>> +#if defined(CONFIG_X86_64)
>> +#define NT_STORE(dest, src)                          \
>> +do {                                                 \
>> +     typeof(src) val = (src);                        \
>> +     memcpy_flushcache(&(dest), &val, sizeof(src));  \
>> +} while (0)
>> +#define COMMIT_FLUSHED()     wmb()
>> +#else
>> +#define NT_STORE(dest, src)  WRITE_ONCE(dest, src)
>> +#define FLUSH_RANGE          dax_flush
>> +#define COMMIT_FLUSHED()     do { } while (0)
>> +#endif
>
> Please use proper APIs for this, this has no business in a driver.

I had the same feedback, and Mikulas sent this useful enhancement to
the memcpy_flushcache API:

    https://patchwork.kernel.org/patch/10217655/

...it's in my queue to either push through -tip or add it to the next
libnvdimm pull request for 4.17-rc1.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-09  3:26     ` Mikulas Patocka
  0 siblings, 0 replies; 27+ messages in thread
From: Mikulas Patocka @ 2018-03-09  3:26 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, dm-devel, Alasdair G. Kergon, linux-nvdimm



On Thu, 8 Mar 2018, Christoph Hellwig wrote:

> > --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> > +++ linux-2.6/drivers/md/dm-writecache.c	2018-03-08 14:23:31.059999000 +0100
> > @@ -0,0 +1,2417 @@
> > +#include <linux/device-mapper.h>
> 
> missing copyright statement, or for those new-fashioned SPDX statement.
> 
> > +#define WRITEBACK_FUA			true
> 
> no business having this around.

It's the default setting of the flag wc->writeback_fua (it can be changed 
with target parameters). The flag selects whether the target uses FUA 
requests when doing writeback or whether it uses non-FUA requests and 
FLUSH afterwards. For some block devices, FUA is faster, for some 
nonFUA+FLUSH is faster.

What's wrong with this? Why can't default settings be #defined at the 
beginning of a file?

> > +#ifndef bio_set_dev
> > +#define	bio_set_dev(bio, dev)	((bio)->bi_bdev = (dev))
> > +#endif
> > +#ifndef timer_setup
> > +#define timer_setup(t, c, f)	setup_timer(t, c, (unsigned long)(t))
> > +#endif
> 
> no business in mainline.

People removed dax support for ramdisk in 4.15.

If I need to test it on non-x86 architecture, I need ramdisk as a fake dax 
device - and that only works up to 4.14. These defines are for 4.14 
compatibility.

> > +/*
> > + * On X86, non-temporal stores are more efficient than cache flushing.
> > + * On ARM64, cache flushing is more efficient.
> > + */
> > +#if defined(CONFIG_X86_64)
> > +#define NT_STORE(dest, src)				\
> > +do {							\
> > +	typeof(src) val = (src);			\
> > +	memcpy_flushcache(&(dest), &val, sizeof(src));	\
> > +} while (0)
> > +#define COMMIT_FLUSHED()	wmb()
> > +#else
> > +#define NT_STORE(dest, src)	WRITE_ONCE(dest, src)
> > +#define FLUSH_RANGE		dax_flush
> > +#define COMMIT_FLUSHED()	do { } while (0)
> > +#endif
> 
> Please use proper APIs for this, this has no business in a driver.
> 
> And that's it for now.  This is clearly not submission ready, and I
> should got back to my backlog of other things.

Why is memcpy_flushcache and dax_flush "improper"? What should I use 
instead of them?

Mikulas
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-09  3:26     ` Mikulas Patocka
  0 siblings, 0 replies; 27+ messages in thread
From: Mikulas Patocka @ 2018-03-09  3:26 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, dm-devel-H+wXaHxf7aLQT0dZR+AlfA,
	Alasdair G. Kergon, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw



On Thu, 8 Mar 2018, Christoph Hellwig wrote:

> > --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> > +++ linux-2.6/drivers/md/dm-writecache.c	2018-03-08 14:23:31.059999000 +0100
> > @@ -0,0 +1,2417 @@
> > +#include <linux/device-mapper.h>
> 
> missing copyright statement, or for those new-fashioned SPDX statement.
> 
> > +#define WRITEBACK_FUA			true
> 
> no business having this around.

It's the default setting of the flag wc->writeback_fua (it can be changed 
with target parameters). The flag selects whether the target uses FUA 
requests when doing writeback or whether it uses non-FUA requests and 
FLUSH afterwards. For some block devices, FUA is faster, for some 
nonFUA+FLUSH is faster.

What's wrong with this? Why can't default settings be #defined at the 
beginning of a file?

> > +#ifndef bio_set_dev
> > +#define	bio_set_dev(bio, dev)	((bio)->bi_bdev = (dev))
> > +#endif
> > +#ifndef timer_setup
> > +#define timer_setup(t, c, f)	setup_timer(t, c, (unsigned long)(t))
> > +#endif
> 
> no business in mainline.

People removed dax support for ramdisk in 4.15.

If I need to test it on non-x86 architecture, I need ramdisk as a fake dax 
device - and that only works up to 4.14. These defines are for 4.14 
compatibility.

> > +/*
> > + * On X86, non-temporal stores are more efficient than cache flushing.
> > + * On ARM64, cache flushing is more efficient.
> > + */
> > +#if defined(CONFIG_X86_64)
> > +#define NT_STORE(dest, src)				\
> > +do {							\
> > +	typeof(src) val = (src);			\
> > +	memcpy_flushcache(&(dest), &val, sizeof(src));	\
> > +} while (0)
> > +#define COMMIT_FLUSHED()	wmb()
> > +#else
> > +#define NT_STORE(dest, src)	WRITE_ONCE(dest, src)
> > +#define FLUSH_RANGE		dax_flush
> > +#define COMMIT_FLUSHED()	do { } while (0)
> > +#endif
> 
> Please use proper APIs for this, this has no business in a driver.
> 
> And that's it for now.  This is clearly not submission ready, and I
> should got back to my backlog of other things.

Why is memcpy_flushcache and dax_flush "improper"? What should I use 
instead of them?

Mikulas

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-12  7:48       ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2018-03-12  7:48 UTC (permalink / raw)
  To: Dan Williams
  Cc: Mike Snitzer, linux-nvdimm, Christoph Hellwig,
	device-mapper development, Mikulas Patocka, Alasdair G. Kergon

On Thu, Mar 08, 2018 at 09:08:32AM -0800, Dan Williams wrote:
> I had the same feedback, and Mikulas sent this useful enhancement to
> the memcpy_flushcache API:
> 
>     https://patchwork.kernel.org/patch/10217655/
> 
> ...it's in my queue to either push through -tip or add it to the next
> libnvdimm pull request for 4.17-rc1.

So lets rebase this submission on top of that.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-12  7:48       ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2018-03-12  7:48 UTC (permalink / raw)
  To: Dan Williams
  Cc: Mike Snitzer, linux-nvdimm, Christoph Hellwig,
	device-mapper development, Mikulas Patocka, Alasdair G. Kergon

On Thu, Mar 08, 2018 at 09:08:32AM -0800, Dan Williams wrote:
> I had the same feedback, and Mikulas sent this useful enhancement to
> the memcpy_flushcache API:
> 
>     https://patchwork.kernel.org/patch/10217655/
> 
> ...it's in my queue to either push through -tip or add it to the next
> libnvdimm pull request for 4.17-rc1.

So lets rebase this submission on top of that.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-12  7:50       ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2018-03-12  7:50 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Christoph Hellwig, Mike Snitzer, dm-devel, Alasdair G. Kergon,
	linux-nvdimm

On Thu, Mar 08, 2018 at 10:26:17PM -0500, Mikulas Patocka wrote:
> > no business having this around.
> 
> It's the default setting of the flag wc->writeback_fua (it can be changed 
> with target parameters). The flag selects whether the target uses FUA 
> requests when doing writeback or whether it uses non-FUA requests and 
> FLUSH afterwards. For some block devices, FUA is faster, for some 
> nonFUA+FLUSH is faster.

So just use true as the default flag, adding a name for it in addition
to the field it is assigned to makes no sense at all.

> > > +#ifndef bio_set_dev
> > > +#define	bio_set_dev(bio, dev)	((bio)->bi_bdev = (dev))
> > > +#endif
> > > +#ifndef timer_setup
> > > +#define timer_setup(t, c, f)	setup_timer(t, c, (unsigned long)(t))
> > > +#endif
> > 
> > no business in mainline.
> 
> People removed dax support for ramdisk in 4.15.
> 
> If I need to test it on non-x86 architecture, I need ramdisk as a fake dax 
> device - and that only works up to 4.14. These defines are for 4.14 
> compatibility.

So add them when you backport, or use the existing automated backport
frameworks.  But do not add dead code to an upstream submission.

> > > +#if defined(CONFIG_X86_64)
> > > +#define NT_STORE(dest, src)				\
> > > +do {							\
> > > +	typeof(src) val = (src);			\
> > > +	memcpy_flushcache(&(dest), &val, sizeof(src));	\
> > > +} while (0)
> > > +#define COMMIT_FLUSHED()	wmb()
> > > +#else
> > > +#define NT_STORE(dest, src)	WRITE_ONCE(dest, src)
> > > +#define FLUSH_RANGE		dax_flush
> > > +#define COMMIT_FLUSHED()	do { } while (0)
> > > +#endif
> > 
> > Please use proper APIs for this, this has no business in a driver.
> > 
> > And that's it for now.  This is clearly not submission ready, and I
> > should got back to my backlog of other things.
> 
> Why is memcpy_flushcache and dax_flush "improper"? What should I use 
> instead of them?

They are proper and should be used directly instead of through your
hacky macros.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-12  7:50       ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2018-03-12  7:50 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Christoph Hellwig, Mike Snitzer, dm-devel-H+wXaHxf7aLQT0dZR+AlfA,
	Alasdair G. Kergon, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw

On Thu, Mar 08, 2018 at 10:26:17PM -0500, Mikulas Patocka wrote:
> > no business having this around.
> 
> It's the default setting of the flag wc->writeback_fua (it can be changed 
> with target parameters). The flag selects whether the target uses FUA 
> requests when doing writeback or whether it uses non-FUA requests and 
> FLUSH afterwards. For some block devices, FUA is faster, for some 
> nonFUA+FLUSH is faster.

So just use true as the default flag, adding a name for it in addition
to the field it is assigned to makes no sense at all.

> > > +#ifndef bio_set_dev
> > > +#define	bio_set_dev(bio, dev)	((bio)->bi_bdev = (dev))
> > > +#endif
> > > +#ifndef timer_setup
> > > +#define timer_setup(t, c, f)	setup_timer(t, c, (unsigned long)(t))
> > > +#endif
> > 
> > no business in mainline.
> 
> People removed dax support for ramdisk in 4.15.
> 
> If I need to test it on non-x86 architecture, I need ramdisk as a fake dax 
> device - and that only works up to 4.14. These defines are for 4.14 
> compatibility.

So add them when you backport, or use the existing automated backport
frameworks.  But do not add dead code to an upstream submission.

> > > +#if defined(CONFIG_X86_64)
> > > +#define NT_STORE(dest, src)				\
> > > +do {							\
> > > +	typeof(src) val = (src);			\
> > > +	memcpy_flushcache(&(dest), &val, sizeof(src));	\
> > > +} while (0)
> > > +#define COMMIT_FLUSHED()	wmb()
> > > +#else
> > > +#define NT_STORE(dest, src)	WRITE_ONCE(dest, src)
> > > +#define FLUSH_RANGE		dax_flush
> > > +#define COMMIT_FLUSHED()	do { } while (0)
> > > +#endif
> > 
> > Please use proper APIs for this, this has no business in a driver.
> > 
> > And that's it for now.  This is clearly not submission ready, and I
> > should got back to my backlog of other things.
> 
> Why is memcpy_flushcache and dax_flush "improper"? What should I use 
> instead of them?

They are proper and should be used directly instead of through your
hacky macros.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-12 12:12         ` Mikulas Patocka
  0 siblings, 0 replies; 27+ messages in thread
From: Mikulas Patocka @ 2018-03-12 12:12 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, dm-devel, Alasdair G. Kergon, linux-nvdimm



On Mon, 12 Mar 2018, Christoph Hellwig wrote:

> On Thu, Mar 08, 2018 at 10:26:17PM -0500, Mikulas Patocka wrote:
> > > no business having this around.
> > 
> > It's the default setting of the flag wc->writeback_fua (it can be changed 
> > with target parameters). The flag selects whether the target uses FUA 
> > requests when doing writeback or whether it uses non-FUA requests and 
> > FLUSH afterwards. For some block devices, FUA is faster, for some 
> > nonFUA+FLUSH is faster.
> 
> So just use true as the default flag, adding a name for it in addition
> to the field it is assigned to makes no sense at all.

It makes sense, because all the default values are at one place on the top 
of the file and not scattered through the codebase.

> > > > +#ifndef bio_set_dev
> > > > +#define	bio_set_dev(bio, dev)	((bio)->bi_bdev = (dev))
> > > > +#endif
> > > > +#ifndef timer_setup
> > > > +#define timer_setup(t, c, f)	setup_timer(t, c, (unsigned long)(t))
> > > > +#endif
> > > 
> > > no business in mainline.
> > 
> > People removed dax support for ramdisk in 4.15.
> > 
> > If I need to test it on non-x86 architecture, I need ramdisk as a fake dax 
> > device - and that only works up to 4.14. These defines are for 4.14 
> > compatibility.
> 
> So add them when you backport, or use the existing automated backport
> frameworks.  But do not add dead code to an upstream submission.

I don't intend to backport this driver to stable kernel branches. But I 
can move the file between different machines and test it - it is just 
convenience for me, so that I don't have to patch the file when moving it 
around. It helps me and it doesn't harm anyone else, so what's the problem 
with it?

> > > > +#if defined(CONFIG_X86_64)
> > > > +#define NT_STORE(dest, src)				\
> > > > +do {							\
> > > > +	typeof(src) val = (src);			\
> > > > +	memcpy_flushcache(&(dest), &val, sizeof(src));	\
> > > > +} while (0)
> > > > +#define COMMIT_FLUSHED()	wmb()
> > > > +#else
> > > > +#define NT_STORE(dest, src)	WRITE_ONCE(dest, src)
> > > > +#define FLUSH_RANGE		dax_flush
> > > > +#define COMMIT_FLUSHED()	do { } while (0)
> > > > +#endif
> > > 
> > > Please use proper APIs for this, this has no business in a driver.
> > > 
> > > And that's it for now.  This is clearly not submission ready, and I
> > > should got back to my backlog of other things.
> > 
> > Why is memcpy_flushcache and dax_flush "improper"? What should I use 
> > instead of them?
> 
> They are proper and should be used directly instead of through your
> hacky macros.

On x86-64, memcpy_flushcache is faster than dax_flush.
On ARM64, dax_flush is faster than memcpy_flushcache.

So what should I do? I need to differentiate them based on architecture.

Do you argue that instead of one "#if defined(CONFIG_X86_64)" at the top 
of the file we many more "#if defined(CONFIG_X86_64)" lines all over the 
file - just, because you don't like #defines?

Currently, we can change one line of source code to switch between these 
two functions and benchmark which one performs better on a particular 
processor. Once these macros are deleted, the switch will not be possible.

Mikulas
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-12 12:12         ` Mikulas Patocka
  0 siblings, 0 replies; 27+ messages in thread
From: Mikulas Patocka @ 2018-03-12 12:12 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Snitzer, dm-devel-H+wXaHxf7aLQT0dZR+AlfA,
	Alasdair G. Kergon, linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw



On Mon, 12 Mar 2018, Christoph Hellwig wrote:

> On Thu, Mar 08, 2018 at 10:26:17PM -0500, Mikulas Patocka wrote:
> > > no business having this around.
> > 
> > It's the default setting of the flag wc->writeback_fua (it can be changed 
> > with target parameters). The flag selects whether the target uses FUA 
> > requests when doing writeback or whether it uses non-FUA requests and 
> > FLUSH afterwards. For some block devices, FUA is faster, for some 
> > nonFUA+FLUSH is faster.
> 
> So just use true as the default flag, adding a name for it in addition
> to the field it is assigned to makes no sense at all.

It makes sense, because all the default values are at one place on the top 
of the file and not scattered through the codebase.

> > > > +#ifndef bio_set_dev
> > > > +#define	bio_set_dev(bio, dev)	((bio)->bi_bdev = (dev))
> > > > +#endif
> > > > +#ifndef timer_setup
> > > > +#define timer_setup(t, c, f)	setup_timer(t, c, (unsigned long)(t))
> > > > +#endif
> > > 
> > > no business in mainline.
> > 
> > People removed dax support for ramdisk in 4.15.
> > 
> > If I need to test it on non-x86 architecture, I need ramdisk as a fake dax 
> > device - and that only works up to 4.14. These defines are for 4.14 
> > compatibility.
> 
> So add them when you backport, or use the existing automated backport
> frameworks.  But do not add dead code to an upstream submission.

I don't intend to backport this driver to stable kernel branches. But I 
can move the file between different machines and test it - it is just 
convenience for me, so that I don't have to patch the file when moving it 
around. It helps me and it doesn't harm anyone else, so what's the problem 
with it?

> > > > +#if defined(CONFIG_X86_64)
> > > > +#define NT_STORE(dest, src)				\
> > > > +do {							\
> > > > +	typeof(src) val = (src);			\
> > > > +	memcpy_flushcache(&(dest), &val, sizeof(src));	\
> > > > +} while (0)
> > > > +#define COMMIT_FLUSHED()	wmb()
> > > > +#else
> > > > +#define NT_STORE(dest, src)	WRITE_ONCE(dest, src)
> > > > +#define FLUSH_RANGE		dax_flush
> > > > +#define COMMIT_FLUSHED()	do { } while (0)
> > > > +#endif
> > > 
> > > Please use proper APIs for this, this has no business in a driver.
> > > 
> > > And that's it for now.  This is clearly not submission ready, and I
> > > should got back to my backlog of other things.
> > 
> > Why is memcpy_flushcache and dax_flush "improper"? What should I use 
> > instead of them?
> 
> They are proper and should be used directly instead of through your
> hacky macros.

On x86-64, memcpy_flushcache is faster than dax_flush.
On ARM64, dax_flush is faster than memcpy_flushcache.

So what should I do? I need to differentiate them based on architecture.

Do you argue that instead of one "#if defined(CONFIG_X86_64)" at the top 
of the file we many more "#if defined(CONFIG_X86_64)" lines all over the 
file - just, because you don't like #defines?

Currently, we can change one line of source code to switch between these 
two functions and benchmark which one performs better on a particular 
processor. Once these macros are deleted, the switch will not be possible.

Mikulas

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-12 12:15         ` Mikulas Patocka
  0 siblings, 0 replies; 27+ messages in thread
From: Mikulas Patocka @ 2018-03-12 12:15 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Alasdair G. Kergon, Mike Snitzer, device-mapper development,
	linux-nvdimm



On Mon, 12 Mar 2018, Christoph Hellwig wrote:

> On Thu, Mar 08, 2018 at 09:08:32AM -0800, Dan Williams wrote:
> > I had the same feedback, and Mikulas sent this useful enhancement to
> > the memcpy_flushcache API:
> > 
> >     https://patchwork.kernel.org/patch/10217655/
> > 
> > ...it's in my queue to either push through -tip or add it to the next
> > libnvdimm pull request for 4.17-rc1.
> 
> So lets rebase this submission on top of that.

I already did and the patch that you criticized is based on the top of 
that.

I've found out that memcpy_flushcache performs better on x86 and dax_flush 
performs betetr on ARM64, so the code has two flushing strategies that are 
switched with preprocessor condition.

Mikulas
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [dm-devel] [PATCH] dm-writecache
@ 2018-03-12 12:15         ` Mikulas Patocka
  0 siblings, 0 replies; 27+ messages in thread
From: Mikulas Patocka @ 2018-03-12 12:15 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Alasdair G. Kergon, Mike Snitzer, device-mapper development,
	linux-nvdimm



On Mon, 12 Mar 2018, Christoph Hellwig wrote:

> On Thu, Mar 08, 2018 at 09:08:32AM -0800, Dan Williams wrote:
> > I had the same feedback, and Mikulas sent this useful enhancement to
> > the memcpy_flushcache API:
> > 
> >     https://patchwork.kernel.org/patch/10217655/
> > 
> > ...it's in my queue to either push through -tip or add it to the next
> > libnvdimm pull request for 4.17-rc1.
> 
> So lets rebase this submission on top of that.

I already did and the patch that you criticized is based on the top of 
that.

I've found out that memcpy_flushcache performs better on x86 and dax_flush 
performs betetr on ARM64, so the code has two flushing strategies that are 
switched with preprocessor condition.

Mikulas

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 15:44       ` Mike Snitzer
  0 siblings, 0 replies; 27+ messages in thread
From: Mike Snitzer @ 2018-05-18 15:44 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, device-mapper development, Mikulas Patocka,
	Alasdair G. Kergon, linux-nvdimm

On Thu, Mar 08 2018 at 12:08pm -0500,
Dan Williams <dan.j.williams@intel.com> wrote:

> Mikulas sent this useful enhancement to the memcpy_flushcache API:
> 
>     https://patchwork.kernel.org/patch/10217655/
> 
> ...it's in my queue to either push through -tip or add it to the next
> libnvdimm pull request for 4.17-rc1.

Hi Dan,

Seems this never actually went upstream.  I've staged it in
linux-dm.git's "for-next" for the time being:
https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=dm-4.18&id=a7e96990b5ff6206fefdc5bfe74396bb880f7e48

But do you intend to pick it up for 4.18 inclusion?  If so I'll drop
it.. would just hate for it to get dropped on the floor by getting lost
in the shuffle between trees.

Please avise, thanks!
Mike
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 15:44       ` Mike Snitzer
  0 siblings, 0 replies; 27+ messages in thread
From: Mike Snitzer @ 2018-05-18 15:44 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, device-mapper development, Mikulas Patocka,
	Alasdair G. Kergon, linux-nvdimm

On Thu, Mar 08 2018 at 12:08pm -0500,
Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> wrote:

> Mikulas sent this useful enhancement to the memcpy_flushcache API:
> 
>     https://patchwork.kernel.org/patch/10217655/
> 
> ...it's in my queue to either push through -tip or add it to the next
> libnvdimm pull request for 4.17-rc1.

Hi Dan,

Seems this never actually went upstream.  I've staged it in
linux-dm.git's "for-next" for the time being:
https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=dm-4.18&id=a7e96990b5ff6206fefdc5bfe74396bb880f7e48

But do you intend to pick it up for 4.18 inclusion?  If so I'll drop
it.. would just hate for it to get dropped on the floor by getting lost
in the shuffle between trees.

Please avise, thanks!
Mike

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 15:54         ` Dan Williams
  0 siblings, 0 replies; 27+ messages in thread
From: Dan Williams @ 2018-05-18 15:54 UTC (permalink / raw)
  To: Mike Snitzer
  Cc: Christoph Hellwig, device-mapper development, Mikulas Patocka,
	Alasdair G. Kergon, linux-nvdimm

On Fri, May 18, 2018 at 8:44 AM, Mike Snitzer <snitzer@redhat.com> wrote:
> On Thu, Mar 08 2018 at 12:08pm -0500,
> Dan Williams <dan.j.williams@intel.com> wrote:
>
>> Mikulas sent this useful enhancement to the memcpy_flushcache API:
>>
>>     https://patchwork.kernel.org/patch/10217655/
>>
>> ...it's in my queue to either push through -tip or add it to the next
>> libnvdimm pull request for 4.17-rc1.
>
> Hi Dan,
>
> Seems this never actually went upstream.  I've staged it in
> linux-dm.git's "for-next" for the time being:
> https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=dm-4.18&id=a7e96990b5ff6206fefdc5bfe74396bb880f7e48
>
> But do you intend to pick it up for 4.18 inclusion?  If so I'll drop
> it.. would just hate for it to get dropped on the floor by getting lost
> in the shuffle between trees.
>
> Please avise, thanks!
> Mike

Thanks for picking it up! I was hoping to resend it to get acks from
x86 folks, and then yes it fell through the cracks in my patch
tracking.

Now that I look at it again I don't think we need this hunk:

void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
size_t len)
{
char *from = kmap_atomic(page);
- memcpy_flushcache(to, from + offset, len);
+ __memcpy_flushcache(to, from + offset, len);
kunmap_atomic(from);
}

...and I wonder what the benefit is of the 16-byte case? I would
assume the bulk of the benefit is limited to the 4 and 8 byte copy
cases.

Mikulas please resend with those comments addressed and include Ingo and Thomas.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 15:54         ` Dan Williams
  0 siblings, 0 replies; 27+ messages in thread
From: Dan Williams @ 2018-05-18 15:54 UTC (permalink / raw)
  To: Mike Snitzer
  Cc: Christoph Hellwig, device-mapper development, Mikulas Patocka,
	Alasdair G. Kergon, linux-nvdimm

On Fri, May 18, 2018 at 8:44 AM, Mike Snitzer <snitzer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
> On Thu, Mar 08 2018 at 12:08pm -0500,
> Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> wrote:
>
>> Mikulas sent this useful enhancement to the memcpy_flushcache API:
>>
>>     https://patchwork.kernel.org/patch/10217655/
>>
>> ...it's in my queue to either push through -tip or add it to the next
>> libnvdimm pull request for 4.17-rc1.
>
> Hi Dan,
>
> Seems this never actually went upstream.  I've staged it in
> linux-dm.git's "for-next" for the time being:
> https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=dm-4.18&id=a7e96990b5ff6206fefdc5bfe74396bb880f7e48
>
> But do you intend to pick it up for 4.18 inclusion?  If so I'll drop
> it.. would just hate for it to get dropped on the floor by getting lost
> in the shuffle between trees.
>
> Please avise, thanks!
> Mike

Thanks for picking it up! I was hoping to resend it to get acks from
x86 folks, and then yes it fell through the cracks in my patch
tracking.

Now that I look at it again I don't think we need this hunk:

void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
size_t len)
{
char *from = kmap_atomic(page);
- memcpy_flushcache(to, from + offset, len);
+ __memcpy_flushcache(to, from + offset, len);
kunmap_atomic(from);
}

...and I wonder what the benefit is of the 16-byte case? I would
assume the bulk of the benefit is limited to the 4 and 8 byte copy
cases.

Mikulas please resend with those comments addressed and include Ingo and Thomas.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 20:12           ` Mikulas Patocka
  0 siblings, 0 replies; 27+ messages in thread
From: Mikulas Patocka @ 2018-05-18 20:12 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, device-mapper development, Alasdair G. Kergon,
	Mike Snitzer, linux-nvdimm



On Fri, 18 May 2018, Dan Williams wrote:

> On Fri, May 18, 2018 at 8:44 AM, Mike Snitzer <snitzer@redhat.com> wrote:
> > On Thu, Mar 08 2018 at 12:08pm -0500,
> > Dan Williams <dan.j.williams@intel.com> wrote:
> >
> >> Mikulas sent this useful enhancement to the memcpy_flushcache API:
> >>
> >>     https://patchwork.kernel.org/patch/10217655/
> >>
> >> ...it's in my queue to either push through -tip or add it to the next
> >> libnvdimm pull request for 4.17-rc1.
> >
> > Hi Dan,
> >
> > Seems this never actually went upstream.  I've staged it in
> > linux-dm.git's "for-next" for the time being:
> > https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=dm-4.18&id=a7e96990b5ff6206fefdc5bfe74396bb880f7e48
> >
> > But do you intend to pick it up for 4.18 inclusion?  If so I'll drop
> > it.. would just hate for it to get dropped on the floor by getting lost
> > in the shuffle between trees.
> >
> > Please avise, thanks!
> > Mike
> 
> Thanks for picking it up! I was hoping to resend it to get acks from
> x86 folks, and then yes it fell through the cracks in my patch
> tracking.
> 
> Now that I look at it again I don't think we need this hunk:
> 
> void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
> size_t len)
> {
> char *from = kmap_atomic(page);
> - memcpy_flushcache(to, from + offset, len);
> + __memcpy_flushcache(to, from + offset, len);
> kunmap_atomic(from);
> }

Yes - this is not needed.

> ...and I wonder what the benefit is of the 16-byte case? I would
> assume the bulk of the benefit is limited to the 4 and 8 byte copy
> cases.

dm-writecache uses 16-byte writes frequently, so it is needed for that.

If we split 16-byte write to two 8-byte writes, it would degrade 
performance for architectures where memcpy_flushcache needs to flush the 
cache.

> Mikulas please resend with those comments addressed and include Ingo and 
> Thomas.

Mikulas
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 20:12           ` Mikulas Patocka
  0 siblings, 0 replies; 27+ messages in thread
From: Mikulas Patocka @ 2018-05-18 20:12 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, device-mapper development, Alasdair G. Kergon,
	Mike Snitzer, linux-nvdimm



On Fri, 18 May 2018, Dan Williams wrote:

> On Fri, May 18, 2018 at 8:44 AM, Mike Snitzer <snitzer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
> > On Thu, Mar 08 2018 at 12:08pm -0500,
> > Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> wrote:
> >
> >> Mikulas sent this useful enhancement to the memcpy_flushcache API:
> >>
> >>     https://patchwork.kernel.org/patch/10217655/
> >>
> >> ...it's in my queue to either push through -tip or add it to the next
> >> libnvdimm pull request for 4.17-rc1.
> >
> > Hi Dan,
> >
> > Seems this never actually went upstream.  I've staged it in
> > linux-dm.git's "for-next" for the time being:
> > https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=dm-4.18&id=a7e96990b5ff6206fefdc5bfe74396bb880f7e48
> >
> > But do you intend to pick it up for 4.18 inclusion?  If so I'll drop
> > it.. would just hate for it to get dropped on the floor by getting lost
> > in the shuffle between trees.
> >
> > Please avise, thanks!
> > Mike
> 
> Thanks for picking it up! I was hoping to resend it to get acks from
> x86 folks, and then yes it fell through the cracks in my patch
> tracking.
> 
> Now that I look at it again I don't think we need this hunk:
> 
> void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
> size_t len)
> {
> char *from = kmap_atomic(page);
> - memcpy_flushcache(to, from + offset, len);
> + __memcpy_flushcache(to, from + offset, len);
> kunmap_atomic(from);
> }

Yes - this is not needed.

> ...and I wonder what the benefit is of the 16-byte case? I would
> assume the bulk of the benefit is limited to the 4 and 8 byte copy
> cases.

dm-writecache uses 16-byte writes frequently, so it is needed for that.

If we split 16-byte write to two 8-byte writes, it would degrade 
performance for architectures where memcpy_flushcache needs to flush the 
cache.

> Mikulas please resend with those comments addressed and include Ingo and 
> Thomas.

Mikulas

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 20:14             ` Dan Williams
  0 siblings, 0 replies; 27+ messages in thread
From: Dan Williams @ 2018-05-18 20:14 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Christoph Hellwig, device-mapper development, Alasdair G. Kergon,
	Mike Snitzer, linux-nvdimm

On Fri, May 18, 2018 at 1:12 PM, Mikulas Patocka <mpatocka@redhat.com> wrote:
>
>
> On Fri, 18 May 2018, Dan Williams wrote:
>
>> On Fri, May 18, 2018 at 8:44 AM, Mike Snitzer <snitzer@redhat.com> wrote:
>> > On Thu, Mar 08 2018 at 12:08pm -0500,
>> > Dan Williams <dan.j.williams@intel.com> wrote:
>> >
>> >> Mikulas sent this useful enhancement to the memcpy_flushcache API:
>> >>
>> >>     https://patchwork.kernel.org/patch/10217655/
>> >>
>> >> ...it's in my queue to either push through -tip or add it to the next
>> >> libnvdimm pull request for 4.17-rc1.
>> >
>> > Hi Dan,
>> >
>> > Seems this never actually went upstream.  I've staged it in
>> > linux-dm.git's "for-next" for the time being:
>> > https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=dm-4.18&id=a7e96990b5ff6206fefdc5bfe74396bb880f7e48
>> >
>> > But do you intend to pick it up for 4.18 inclusion?  If so I'll drop
>> > it.. would just hate for it to get dropped on the floor by getting lost
>> > in the shuffle between trees.
>> >
>> > Please avise, thanks!
>> > Mike
>>
>> Thanks for picking it up! I was hoping to resend it to get acks from
>> x86 folks, and then yes it fell through the cracks in my patch
>> tracking.
>>
>> Now that I look at it again I don't think we need this hunk:
>>
>> void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
>> size_t len)
>> {
>> char *from = kmap_atomic(page);
>> - memcpy_flushcache(to, from + offset, len);
>> + __memcpy_flushcache(to, from + offset, len);
>> kunmap_atomic(from);
>> }
>
> Yes - this is not needed.
>
>> ...and I wonder what the benefit is of the 16-byte case? I would
>> assume the bulk of the benefit is limited to the 4 and 8 byte copy
>> cases.
>
> dm-writecache uses 16-byte writes frequently, so it is needed for that.
>
> If we split 16-byte write to two 8-byte writes, it would degrade
> performance for architectures where memcpy_flushcache needs to flush the
> cache.

My question was how measurable it is to special case 16-byte
transfers? I know Ingo is going to ask this question, so it would
speed things along if this patch included performance benefit numbers
for each special case in the changelog.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 20:14             ` Dan Williams
  0 siblings, 0 replies; 27+ messages in thread
From: Dan Williams @ 2018-05-18 20:14 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Christoph Hellwig, device-mapper development, Alasdair G. Kergon,
	Mike Snitzer, linux-nvdimm

On Fri, May 18, 2018 at 1:12 PM, Mikulas Patocka <mpatocka-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
>
>
> On Fri, 18 May 2018, Dan Williams wrote:
>
>> On Fri, May 18, 2018 at 8:44 AM, Mike Snitzer <snitzer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
>> > On Thu, Mar 08 2018 at 12:08pm -0500,
>> > Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> wrote:
>> >
>> >> Mikulas sent this useful enhancement to the memcpy_flushcache API:
>> >>
>> >>     https://patchwork.kernel.org/patch/10217655/
>> >>
>> >> ...it's in my queue to either push through -tip or add it to the next
>> >> libnvdimm pull request for 4.17-rc1.
>> >
>> > Hi Dan,
>> >
>> > Seems this never actually went upstream.  I've staged it in
>> > linux-dm.git's "for-next" for the time being:
>> > https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/commit/?h=dm-4.18&id=a7e96990b5ff6206fefdc5bfe74396bb880f7e48
>> >
>> > But do you intend to pick it up for 4.18 inclusion?  If so I'll drop
>> > it.. would just hate for it to get dropped on the floor by getting lost
>> > in the shuffle between trees.
>> >
>> > Please avise, thanks!
>> > Mike
>>
>> Thanks for picking it up! I was hoping to resend it to get acks from
>> x86 folks, and then yes it fell through the cracks in my patch
>> tracking.
>>
>> Now that I look at it again I don't think we need this hunk:
>>
>> void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
>> size_t len)
>> {
>> char *from = kmap_atomic(page);
>> - memcpy_flushcache(to, from + offset, len);
>> + __memcpy_flushcache(to, from + offset, len);
>> kunmap_atomic(from);
>> }
>
> Yes - this is not needed.
>
>> ...and I wonder what the benefit is of the 16-byte case? I would
>> assume the bulk of the benefit is limited to the 4 and 8 byte copy
>> cases.
>
> dm-writecache uses 16-byte writes frequently, so it is needed for that.
>
> If we split 16-byte write to two 8-byte writes, it would degrade
> performance for architectures where memcpy_flushcache needs to flush the
> cache.

My question was how measurable it is to special case 16-byte
transfers? I know Ingo is going to ask this question, so it would
speed things along if this patch included performance benefit numbers
for each special case in the changelog.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 22:00               ` Mikulas Patocka
  0 siblings, 0 replies; 27+ messages in thread
From: Mikulas Patocka @ 2018-05-18 22:00 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, device-mapper development, Alasdair G. Kergon,
	Mike Snitzer, linux-nvdimm



On Fri, 18 May 2018, Dan Williams wrote:

> >> ...and I wonder what the benefit is of the 16-byte case? I would
> >> assume the bulk of the benefit is limited to the 4 and 8 byte copy
> >> cases.
> >
> > dm-writecache uses 16-byte writes frequently, so it is needed for that.
> >
> > If we split 16-byte write to two 8-byte writes, it would degrade
> > performance for architectures where memcpy_flushcache needs to flush the
> > cache.
> 
> My question was how measurable it is to special case 16-byte
> transfers? I know Ingo is going to ask this question, so it would
> speed things along if this patch included performance benefit numbers
> for each special case in the changelog.

I tested it some times ago - and the movnti instruction has 2% better 
throughput than the existing memcpy_flushcache function.

It is doing one 16-byte write for every sector written and one 8-byte 
write for every sector clean-up. So, the overhead is measurable.

Mikulas
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 22:00               ` Mikulas Patocka
  0 siblings, 0 replies; 27+ messages in thread
From: Mikulas Patocka @ 2018-05-18 22:00 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, device-mapper development, Alasdair G. Kergon,
	Mike Snitzer, linux-nvdimm



On Fri, 18 May 2018, Dan Williams wrote:

> >> ...and I wonder what the benefit is of the 16-byte case? I would
> >> assume the bulk of the benefit is limited to the 4 and 8 byte copy
> >> cases.
> >
> > dm-writecache uses 16-byte writes frequently, so it is needed for that.
> >
> > If we split 16-byte write to two 8-byte writes, it would degrade
> > performance for architectures where memcpy_flushcache needs to flush the
> > cache.
> 
> My question was how measurable it is to special case 16-byte
> transfers? I know Ingo is going to ask this question, so it would
> speed things along if this patch included performance benefit numbers
> for each special case in the changelog.

I tested it some times ago - and the movnti instruction has 2% better 
throughput than the existing memcpy_flushcache function.

It is doing one 16-byte write for every sector written and one 8-byte 
write for every sector clean-up. So, the overhead is measurable.

Mikulas

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 22:10                 ` Dan Williams
  0 siblings, 0 replies; 27+ messages in thread
From: Dan Williams @ 2018-05-18 22:10 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Christoph Hellwig, device-mapper development, Alasdair G. Kergon,
	Mike Snitzer, linux-nvdimm

On Fri, May 18, 2018 at 3:00 PM, Mikulas Patocka <mpatocka@redhat.com> wrote:
>
>
> On Fri, 18 May 2018, Dan Williams wrote:
>
>> >> ...and I wonder what the benefit is of the 16-byte case? I would
>> >> assume the bulk of the benefit is limited to the 4 and 8 byte copy
>> >> cases.
>> >
>> > dm-writecache uses 16-byte writes frequently, so it is needed for that.
>> >
>> > If we split 16-byte write to two 8-byte writes, it would degrade
>> > performance for architectures where memcpy_flushcache needs to flush the
>> > cache.
>>
>> My question was how measurable it is to special case 16-byte
>> transfers? I know Ingo is going to ask this question, so it would
>> speed things along if this patch included performance benefit numbers
>> for each special case in the changelog.
>
> I tested it some times ago - and the movnti instruction has 2% better
> throughput than the existing memcpy_flushcache function.
>
> It is doing one 16-byte write for every sector written and one 8-byte
> write for every sector clean-up. So, the overhead is measurable.

Awesome, include those measured numbers in the changelog for the next
spin of the patch.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: dm-writecache
@ 2018-05-18 22:10                 ` Dan Williams
  0 siblings, 0 replies; 27+ messages in thread
From: Dan Williams @ 2018-05-18 22:10 UTC (permalink / raw)
  To: Mikulas Patocka
  Cc: Christoph Hellwig, device-mapper development, Alasdair G. Kergon,
	Mike Snitzer, linux-nvdimm

On Fri, May 18, 2018 at 3:00 PM, Mikulas Patocka <mpatocka-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:
>
>
> On Fri, 18 May 2018, Dan Williams wrote:
>
>> >> ...and I wonder what the benefit is of the 16-byte case? I would
>> >> assume the bulk of the benefit is limited to the 4 and 8 byte copy
>> >> cases.
>> >
>> > dm-writecache uses 16-byte writes frequently, so it is needed for that.
>> >
>> > If we split 16-byte write to two 8-byte writes, it would degrade
>> > performance for architectures where memcpy_flushcache needs to flush the
>> > cache.
>>
>> My question was how measurable it is to special case 16-byte
>> transfers? I know Ingo is going to ask this question, so it would
>> speed things along if this patch included performance benefit numbers
>> for each special case in the changelog.
>
> I tested it some times ago - and the movnti instruction has 2% better
> throughput than the existing memcpy_flushcache function.
>
> It is doing one 16-byte write for every sector written and one 8-byte
> write for every sector clean-up. So, the overhead is measurable.

Awesome, include those measured numbers in the changelog for the next
spin of the patch.

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2018-05-18 22:10 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-08 13:25 [PATCH] dm-writecache Mikulas Patocka
2018-03-08 14:51 ` [dm-devel] " Christoph Hellwig
2018-03-08 14:51   ` Christoph Hellwig
2018-03-08 17:08   ` Dan Williams
2018-03-08 17:08     ` Dan Williams
2018-03-12  7:48     ` Christoph Hellwig
2018-03-12  7:48       ` Christoph Hellwig
2018-03-12 12:15       ` Mikulas Patocka
2018-03-12 12:15         ` Mikulas Patocka
2018-05-18 15:44     ` dm-writecache Mike Snitzer
2018-05-18 15:44       ` dm-writecache Mike Snitzer
2018-05-18 15:54       ` dm-writecache Dan Williams
2018-05-18 15:54         ` dm-writecache Dan Williams
2018-05-18 20:12         ` dm-writecache Mikulas Patocka
2018-05-18 20:12           ` dm-writecache Mikulas Patocka
2018-05-18 20:14           ` dm-writecache Dan Williams
2018-05-18 20:14             ` dm-writecache Dan Williams
2018-05-18 22:00             ` dm-writecache Mikulas Patocka
2018-05-18 22:00               ` dm-writecache Mikulas Patocka
2018-05-18 22:10               ` dm-writecache Dan Williams
2018-05-18 22:10                 ` dm-writecache Dan Williams
2018-03-09  3:26   ` [dm-devel] [PATCH] dm-writecache Mikulas Patocka
2018-03-09  3:26     ` Mikulas Patocka
2018-03-12  7:50     ` Christoph Hellwig
2018-03-12  7:50       ` Christoph Hellwig
2018-03-12 12:12       ` Mikulas Patocka
2018-03-12 12:12         ` Mikulas Patocka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.