All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: axboe@kernel.dk
Cc: sfr@canb.auug.org.au, linux-nvdimm@lists.01.org, neilb@suse.de,
	gregkh@linuxfoundation.org, linux-kernel@vger.kernel.org,
	mingo@kernel.org, linux-acpi@vger.kernel.org, jmoyer@redhat.com,
	akpm@linux-foundation.org, hch@lst.de
Subject: [PATCH v4 17/21] libnd: infrastructure for btt devices
Date: Wed, 27 May 2015 18:26:16 -0400	[thread overview]
Message-ID: <20150527222616.17965.7678.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <20150527210155.17965.74864.stgit@dwillia2-desk3.amr.corp.intel.com>

Block devices from an nd bus, in addition to accepting "struct bio"
based requests, also have the capability to perform byte-aligned
accesses.  By default only the bio/block interface is used.  However, if
another driver can make effective use of the byte-aligned capability it
can claim/disable the block interface and use the byte-aligned "nd_io"
interface.

The BTT driver is the initial first consumer of this mechanism to allow
layering atomic sector update guarantees on top of nd_io capable
libnd-block-devices, or their partitions.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/block/nd/Kconfig      |    3 
 drivers/block/nd/Makefile     |    1 
 drivers/block/nd/btt.h        |   45 ++++
 drivers/block/nd/btt_devs.c   |  442 +++++++++++++++++++++++++++++++++++++++++
 drivers/block/nd/bus.c        |  128 ++++++++++++
 drivers/block/nd/core.c       |   79 +++++++
 drivers/block/nd/nd-private.h |   28 +++
 drivers/block/nd/nd.h         |   94 +++++++++
 drivers/block/nd/pmem.c       |   29 +++
 include/uapi/linux/ndctl.h    |    2 
 10 files changed, 847 insertions(+), 4 deletions(-)
 create mode 100644 drivers/block/nd/btt.h
 create mode 100644 drivers/block/nd/btt_devs.c

diff --git a/drivers/block/nd/Kconfig b/drivers/block/nd/Kconfig
index 03f572f0e3d0..00d9afe9475e 100644
--- a/drivers/block/nd/Kconfig
+++ b/drivers/block/nd/Kconfig
@@ -34,4 +34,7 @@ config BLK_DEV_PMEM
 
 	  Say Y if you want to use a NVDIMM described by NFIT
 
+config ND_BTT_DEVS
+	def_bool y
+
 endif
diff --git a/drivers/block/nd/Makefile b/drivers/block/nd/Makefile
index 8d14510559e1..9866669d7738 100644
--- a/drivers/block/nd/Makefile
+++ b/drivers/block/nd/Makefile
@@ -11,3 +11,4 @@ libnd-y += region_devs.o
 libnd-y += region.o
 libnd-y += namespace_devs.o
 libnd-y += label.o
+libnd-$(CONFIG_ND_BTT_DEVS) += btt_devs.o
diff --git a/drivers/block/nd/btt.h b/drivers/block/nd/btt.h
new file mode 100644
index 000000000000..e8f6d8e0ddd3
--- /dev/null
+++ b/drivers/block/nd/btt.h
@@ -0,0 +1,45 @@
+/*
+ * Block Translation Table library
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_BTT_H
+#define _LINUX_BTT_H
+
+#include <linux/types.h>
+
+#define BTT_SIG_LEN 16
+#define BTT_SIG "BTT_ARENA_INFO\0"
+
+struct btt_sb {
+	u8 signature[BTT_SIG_LEN];
+	u8 uuid[16];
+	u8 parent_uuid[16];
+	__le32 flags;
+	__le16 version_major;
+	__le16 version_minor;
+	__le32 external_lbasize;
+	__le32 external_nlba;
+	__le32 internal_lbasize;
+	__le32 internal_nlba;
+	__le32 nfree;
+	__le32 infosize;
+	__le64 nextoff;
+	__le64 dataoff;
+	__le64 mapoff;
+	__le64 logoff;
+	__le64 info2off;
+	u8 padding[3968];
+	__le64 checksum;
+};
+
+#endif
diff --git a/drivers/block/nd/btt_devs.c b/drivers/block/nd/btt_devs.c
new file mode 100644
index 000000000000..b3b813288092
--- /dev/null
+++ b/drivers/block/nd/btt_devs.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-private.h"
+#include "btt.h"
+#include "nd.h"
+
+static DEFINE_IDA(btt_ida);
+
+static void nd_btt_release(struct device *dev)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	dev_dbg(dev, "%s\n", __func__);
+	WARN_ON(nd_btt->backing_dev);
+	ndio_del_claim(nd_btt->ndio_claim);
+	ida_simple_remove(&btt_ida, nd_btt->id);
+	kfree(nd_btt->uuid);
+	kfree(nd_btt);
+}
+
+static struct device_type nd_btt_device_type = {
+	.name = "nd_btt",
+	.release = nd_btt_release,
+};
+
+bool is_nd_btt(struct device *dev)
+{
+	return dev->type == &nd_btt_device_type;
+}
+
+struct nd_btt *to_nd_btt(struct device *dev)
+{
+	struct nd_btt *nd_btt = container_of(dev, struct nd_btt, dev);
+
+	WARN_ON(!is_nd_btt(dev));
+	return nd_btt;
+}
+EXPORT_SYMBOL(to_nd_btt);
+
+static const unsigned long btt_lbasize_supported[] = { 512, 4096, 0 };
+
+static ssize_t sector_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf);
+}
+
+static ssize_t sector_size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	nd_bus_lock(dev);
+	rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize,
+			btt_lbasize_supported);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nd_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(sector_size);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nd_btt->uuid)
+		return sprintf(buf, "%pUb\n", nd_btt->uuid);
+	return sprintf(buf, "\n");
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t backing_dev_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	char name[BDEVNAME_SIZE];
+
+	if (nd_btt->backing_dev)
+		return sprintf(buf, "/dev/%s\n",
+				bdevname(nd_btt->backing_dev, name));
+	else
+		return sprintf(buf, "\n");
+}
+
+static const fmode_t nd_btt_devs_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
+
+static void nd_btt_ndio_notify_remove(struct nd_io_claim *ndio_claim)
+{
+	char bdev_name[BDEVNAME_SIZE];
+	struct nd_btt *nd_btt;
+
+	if (!ndio_claim || !ndio_claim->holder)
+		return;
+
+	nd_btt = to_nd_btt(ndio_claim->holder);
+	WARN_ON_ONCE(!is_nd_bus_locked(&nd_btt->dev));
+	dev_dbg(&nd_btt->dev, "%pf: %s: release /dev/%s\n",
+			__builtin_return_address(0), __func__,
+			bdevname(nd_btt->backing_dev, bdev_name));
+	blkdev_put(nd_btt->backing_dev, nd_btt_devs_mode);
+	nd_btt->backing_dev = NULL;
+
+	/*
+	 * Once we've had our backing device removed we need to be fully
+	 * reconfigured.  The bus will have already created a new seed
+	 * for this purpose, so now is a good time to clean up this
+	 * stale nd_btt instance.
+	 */
+	if (nd_btt->dev.driver)
+		nd_device_unregister(&nd_btt->dev, ND_ASYNC);
+	else {
+		ndio_del_claim(ndio_claim);
+		nd_btt->ndio_claim = NULL;
+	}
+}
+
+static ssize_t __backing_dev_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_bus *nd_bus = walk_to_nd_bus(dev);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	char bdev_name[BDEVNAME_SIZE];
+	struct block_device *bdev;
+	struct nd_io *ndio;
+	char *path;
+
+	if (dev->driver) {
+		dev_dbg(dev, "%s: -EBUSY\n", __func__);
+		return -EBUSY;
+	}
+
+	path = kstrndup(buf, len, GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	/* detach the backing device */
+	if (strcmp(strim(path), "") == 0) {
+		if (!nd_btt->backing_dev)
+			goto out;
+		nd_btt_ndio_notify_remove(nd_btt->ndio_claim);
+		goto out;
+	} else if (nd_btt->backing_dev) {
+		dev_dbg(dev, "backing_dev already set\n");
+		len = -EBUSY;
+		goto out;
+	}
+
+	bdev = blkdev_get_by_path(strim(path), nd_btt_devs_mode, nd_btt);
+	if (IS_ERR(bdev)) {
+		dev_dbg(dev, "open '%s' failed: %ld\n", strim(path),
+				PTR_ERR(bdev));
+		len = PTR_ERR(bdev);
+		goto out;
+	}
+
+	if (get_capacity(bdev->bd_disk) < SZ_16M / 512) {
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENXIO;
+		goto out;
+	}
+
+	ndio = ndio_lookup(nd_bus, bdevname(bdev->bd_contains, bdev_name));
+	if (!ndio) {
+		dev_dbg(dev, "%s does not have an ndio interface\n",
+				strim(path));
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENXIO;
+		goto out;
+	}
+
+	nd_btt->ndio_claim = ndio_add_claim(ndio, &nd_btt->dev,
+			nd_btt_ndio_notify_remove);
+	if (!nd_btt->ndio_claim) {
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENOMEM;
+		goto out;
+	}
+
+	WARN_ON_ONCE(!is_nd_bus_locked(&nd_btt->dev));
+	nd_btt->backing_dev = bdev;
+
+ out:
+	kfree(path);
+	return len;
+}
+
+static ssize_t backing_dev_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	ssize_t rc;
+
+	nd_bus_lock(dev);
+	device_lock(dev);
+	rc = __backing_dev_store(dev, attr, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+	nd_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RW(backing_dev);
+
+static bool is_nd_btt_idle(struct device *dev)
+{
+	struct nd_bus *nd_bus = walk_to_nd_bus(dev);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nd_bus->nd_btt == nd_btt || dev->driver || nd_btt->backing_dev)
+		return false;
+	return true;
+}
+
+static ssize_t delete_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	/* return 1 if can be deleted */
+	return sprintf(buf, "%d\n", is_nd_btt_idle(dev));
+}
+
+static ssize_t delete_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	unsigned long val;
+
+	/* write 1 to delete */
+	if (kstrtoul(buf, 0, &val) != 0 || val != 1)
+		return -EINVAL;
+
+	/* prevent deletion while this btt is active, or is the current seed */
+	if (!is_nd_btt_idle(dev))
+		return -EBUSY;
+
+	/*
+	 * userspace raced itself if device goes active here and it gets
+	 * to keep the pieces
+	 */
+	nd_device_unregister(dev, ND_ASYNC);
+
+	return len;
+}
+static DEVICE_ATTR_RW(delete);
+
+static struct attribute *nd_btt_attributes[] = {
+	&dev_attr_sector_size.attr,
+	&dev_attr_backing_dev.attr,
+	&dev_attr_delete.attr,
+	&dev_attr_uuid.attr,
+	NULL,
+};
+
+static struct attribute_group nd_btt_attribute_group = {
+	.attrs = nd_btt_attributes,
+};
+
+static const struct attribute_group *nd_btt_attribute_groups[] = {
+	&nd_btt_attribute_group,
+	&nd_device_attribute_group,
+	NULL,
+};
+
+static struct nd_btt *__nd_btt_create(struct nd_bus *nd_bus,
+		unsigned long lbasize, u8 *uuid)
+{
+	struct nd_btt *nd_btt = kzalloc(sizeof(*nd_btt), GFP_KERNEL);
+	struct device *dev;
+
+	if (!nd_btt)
+		return NULL;
+	nd_btt->id = ida_simple_get(&btt_ida, 0, 0, GFP_KERNEL);
+	if (nd_btt->id < 0) {
+		kfree(nd_btt);
+		return NULL;
+	}
+
+	nd_btt->lbasize = lbasize;
+	if (uuid)
+		uuid = kmemdup(uuid, 16, GFP_KERNEL);
+	nd_btt->uuid = uuid;
+	dev = &nd_btt->dev;
+	dev_set_name(dev, "btt%d", nd_btt->id);
+	dev->parent = &nd_bus->dev;
+	dev->type = &nd_btt_device_type;
+	dev->groups = nd_btt_attribute_groups;
+	return nd_btt;
+}
+
+struct nd_btt *nd_btt_create(struct nd_bus *nd_bus)
+{
+	struct nd_btt *nd_btt = __nd_btt_create(nd_bus, 0, NULL);
+
+	if (!nd_btt)
+		return NULL;
+	nd_device_register(&nd_btt->dev);
+	return nd_btt;
+}
+
+/*
+ * nd_btt_sb_checksum: compute checksum for btt info block
+ *
+ * Returns a fletcher64 checksum of everything in the given info block
+ * except the last field (since that's where the checksum lives).
+ */
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
+{
+	u64 sum, sum_save;
+
+	sum_save = btt_sb->checksum;
+	btt_sb->checksum = 0;
+	sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
+	btt_sb->checksum = sum_save;
+	return sum;
+}
+EXPORT_SYMBOL(nd_btt_sb_checksum);
+
+static int nd_btt_autodetect(struct nd_bus *nd_bus, struct nd_io *ndio,
+		struct block_device *bdev)
+{
+	char name[BDEVNAME_SIZE];
+	struct nd_btt *nd_btt;
+	struct btt_sb *btt_sb;
+	u64 offset, checksum;
+	u32 lbasize;
+	u8 *uuid;
+	int rc;
+
+	btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL);
+	if (!btt_sb)
+		return -ENODEV;
+
+	offset = nd_partition_offset(bdev);
+	rc = ndio->rw_bytes(ndio, btt_sb, offset + SZ_4K, sizeof(*btt_sb), READ);
+	if (rc)
+		goto out_free_sb;
+
+	if (get_capacity(bdev->bd_disk) < SZ_16M / 512)
+		goto out_free_sb;
+
+	if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
+		goto out_free_sb;
+
+	checksum = le64_to_cpu(btt_sb->checksum);
+	btt_sb->checksum = 0;
+	if (checksum != nd_btt_sb_checksum(btt_sb))
+		goto out_free_sb;
+	btt_sb->checksum = cpu_to_le64(checksum);
+
+	uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
+	if (!uuid)
+		goto out_free_sb;
+
+	lbasize = le32_to_cpu(btt_sb->external_lbasize);
+	nd_btt = __nd_btt_create(nd_bus, lbasize, uuid);
+	if (!nd_btt)
+		goto out_free_uuid;
+
+	device_initialize(&nd_btt->dev);
+	nd_btt->ndio_claim = ndio_add_claim(ndio, &nd_btt->dev,
+			nd_btt_ndio_notify_remove);
+	if (!nd_btt->ndio_claim)
+		goto out_free_btt;
+
+	nd_btt->backing_dev = bdev;
+	dev_dbg(&nd_btt->dev, "%s: activate %s\n", __func__,
+			bdevname(bdev, name));
+	__nd_device_register(&nd_btt->dev);
+	kfree(btt_sb);
+	return 0;
+
+ out_free_btt:
+	kfree(nd_btt);
+ out_free_uuid:
+	kfree(uuid);
+ out_free_sb:
+	kfree(btt_sb);
+
+	return -ENODEV;
+}
+
+void nd_btt_notify_ndio(struct nd_bus *nd_bus, struct nd_io *ndio)
+{
+	struct disk_part_iter piter;
+	struct hd_struct *part;
+
+	disk_part_iter_init(&piter, ndio->disk, DISK_PITER_INCL_PART0);
+	while ((part = disk_part_iter_next(&piter))) {
+		struct block_device *bdev;
+		int rc;
+
+		bdev = bdget_disk(ndio->disk, part->partno);
+		if (!bdev)
+			continue;
+		if (blkdev_get(bdev, nd_btt_devs_mode, nd_bus) != 0)
+			continue;
+		rc = nd_btt_autodetect(nd_bus, ndio, bdev);
+		if (rc)
+			blkdev_put(bdev, nd_btt_devs_mode);
+		/* no need to scan further in the case of whole disk btt */
+		if (rc == 0 && part->partno == 0)
+			break;
+	}
+	disk_part_iter_exit(&piter);
+}
diff --git a/drivers/block/nd/bus.c b/drivers/block/nd/bus.c
index 4a2185a99bd7..dc69ccfae53a 100644
--- a/drivers/block/nd/bus.c
+++ b/drivers/block/nd/bus.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/fcntl.h>
 #include <linux/async.h>
+#include <linux/genhd.h>
 #include <linux/ndctl.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -40,6 +41,8 @@ static int to_nd_device_type(struct device *dev)
 		return ND_DEVICE_REGION_BLK;
 	else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
 		return nd_region_to_namespace_type(to_nd_region(dev->parent));
+	else if (is_nd_btt(dev))
+		return ND_DEVICE_BTT;
 
 	return 0;
 }
@@ -84,6 +87,21 @@ static int nd_bus_probe(struct device *dev)
 
 	dev_dbg(&nd_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
 			dev_name(dev), rc);
+
+	/* check if our btt-seed has sprouted, and plant another */
+	if (rc == 0 && is_nd_btt(dev) && dev == &nd_bus->nd_btt->dev) {
+		const char *sep = "", *name = "", *status = "failed";
+
+		nd_bus->nd_btt = nd_btt_create(nd_bus);
+		if (nd_bus->nd_btt) {
+			status = "succeeded";
+			sep = ": ";
+			name = dev_name(&nd_bus->nd_btt->dev);
+		}
+		dev_dbg(&nd_bus->dev, "btt seed creation %s%s%s\n",
+				status, sep, name);
+	}
+
 	if (rc != 0)
 		module_put(provider);
 	return rc;
@@ -144,14 +162,19 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie)
 	put_device(dev);
 }
 
-void nd_device_register(struct device *dev)
+void __nd_device_register(struct device *dev)
 {
 	dev->bus = &nd_bus_type;
-	device_initialize(dev);
 	get_device(dev);
 	async_schedule_domain(nd_async_device_register, dev,
 			&nd_async_domain);
 }
+
+void nd_device_register(struct device *dev)
+{
+	device_initialize(dev);
+	__nd_device_register(dev);
+}
 EXPORT_SYMBOL(nd_device_register);
 
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
@@ -200,6 +223,107 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
 }
 EXPORT_SYMBOL(__nd_driver_register);
 
+/**
+ * nd_register_ndio() - register byte-aligned access capability for an nd-bdev
+ * @disk: child gendisk of the ndio namepace device
+ * @ndio: initialized ndio instance to register
+ *
+ * LOCKING: hold nd_bus_lock() over the creation of ndio->disk and the
+ * subsequent nd_region_ndio event
+ */
+int nd_register_ndio(struct nd_io *ndio)
+{
+	struct nd_bus *nd_bus;
+	struct device *dev;
+
+	if (!ndio || !ndio->dev || !ndio->disk || !list_empty(&ndio->list)
+			|| !ndio->rw_bytes || !list_empty(&ndio->claims)) {
+		pr_debug("%s bad parameters from %pf\n", __func__,
+				__builtin_return_address(0));
+		return -EINVAL;
+	}
+
+	dev = ndio->dev;
+	nd_bus = walk_to_nd_bus(dev);
+	if (!nd_bus)
+		return -EINVAL;
+
+	WARN_ON_ONCE(!is_nd_bus_locked(&nd_bus->dev));
+	list_add(&ndio->list, &nd_bus->ndios);
+
+	/* TODO: generic infrastructure for 3rd party ndio claimers */
+	nd_btt_notify_ndio(nd_bus, ndio);
+
+	return 0;
+}
+EXPORT_SYMBOL(nd_register_ndio);
+
+/**
+ * __nd_unregister_ndio() - try to remove an ndio interface
+ * @ndio: interface to remove
+ */
+static int __nd_unregister_ndio(struct nd_io *ndio)
+{
+	struct nd_io_claim *ndio_claim, *_n;
+	struct nd_bus *nd_bus;
+	LIST_HEAD(claims);
+
+	nd_bus = walk_to_nd_bus(ndio->dev);
+	if (!nd_bus || list_empty(&ndio->list))
+		return -ENXIO;
+
+	spin_lock(&ndio->lock);
+	list_splice_init(&ndio->claims, &claims);
+	spin_unlock(&ndio->lock);
+
+	list_for_each_entry_safe(ndio_claim, _n, &claims, list)
+		ndio_claim->notify_remove(ndio_claim);
+
+	list_del_init(&ndio->list);
+
+	return 0;
+}
+
+int nd_unregister_ndio(struct nd_io *ndio)
+{
+	struct device *dev = ndio->dev;
+	int rc;
+
+	nd_bus_lock(dev);
+	rc = __nd_unregister_ndio(ndio);
+	nd_bus_unlock(dev);
+
+	/*
+	 * Flush in case ->notify_remove() kicked off asynchronous device
+	 * unregistration
+	 */
+	nd_synchronize();
+
+	return rc;
+}
+EXPORT_SYMBOL(nd_unregister_ndio);
+
+static struct nd_io *__ndio_lookup(struct nd_bus *nd_bus, const char *diskname)
+{
+	struct nd_io *ndio;
+
+	list_for_each_entry(ndio, &nd_bus->ndios, list)
+		if (strcmp(diskname, ndio->disk->disk_name) == 0)
+			return ndio;
+
+	return NULL;
+}
+
+struct nd_io *ndio_lookup(struct nd_bus *nd_bus, const char *diskname)
+{
+	struct nd_io *ndio;
+
+	WARN_ON_ONCE(!is_nd_bus_locked(&nd_bus->dev));
+	ndio = __ndio_lookup(nd_bus, diskname);
+
+	return ndio;
+}
+
 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 		char *buf)
 {
diff --git a/drivers/block/nd/core.c b/drivers/block/nd/core.c
index b45863343a48..a0709a2e302f 100644
--- a/drivers/block/nd/core.c
+++ b/drivers/block/nd/core.c
@@ -55,6 +55,62 @@ bool is_nd_bus_locked(struct device *dev)
 }
 EXPORT_SYMBOL(is_nd_bus_locked);
 
+void nd_init_ndio(struct nd_io *ndio, nd_rw_bytes_fn rw_bytes,
+		struct device *dev, struct gendisk *disk, unsigned long align)
+{
+	memset(ndio, 0, sizeof(*ndio));
+	INIT_LIST_HEAD(&ndio->claims);
+	INIT_LIST_HEAD(&ndio->list);
+	spin_lock_init(&ndio->lock);
+	ndio->dev = dev;
+	ndio->disk = disk;
+	ndio->align = align;
+	ndio->rw_bytes = rw_bytes;
+}
+EXPORT_SYMBOL(nd_init_ndio);
+
+void ndio_del_claim(struct nd_io_claim *ndio_claim)
+{
+	struct nd_io *ndio;
+	struct device *holder;
+
+	if (!ndio_claim)
+		return;
+	ndio = ndio_claim->parent;
+	holder = ndio_claim->holder;
+
+	dev_dbg(holder, "%s: drop %s\n", __func__, dev_name(ndio->dev));
+	spin_lock(&ndio->lock);
+	list_del(&ndio_claim->list);
+	spin_unlock(&ndio->lock);
+	put_device(ndio->dev);
+	kfree(ndio_claim);
+	put_device(holder);
+}
+
+struct nd_io_claim *ndio_add_claim(struct nd_io *ndio, struct device *holder,
+		ndio_notify_remove_fn notify_remove)
+{
+	struct nd_io_claim *ndio_claim = kzalloc(sizeof(*ndio_claim), GFP_KERNEL);
+
+	if (!ndio_claim)
+		return NULL;
+
+	INIT_LIST_HEAD(&ndio_claim->list);
+	ndio_claim->parent = ndio;
+	get_device(ndio->dev);
+
+	spin_lock(&ndio->lock);
+	list_add(&ndio_claim->list, &ndio->claims);
+	spin_unlock(&ndio->lock);
+
+	ndio_claim->holder = holder;
+	ndio_claim->notify_remove = notify_remove;
+	get_device(holder);
+
+	return ndio_claim;
+}
+
 u64 nd_fletcher64(void *addr, size_t len, bool le)
 {
 	u32 *buf = addr;
@@ -75,6 +131,8 @@ static void nd_bus_release(struct device *dev)
 {
 	struct nd_bus *nd_bus = container_of(dev, struct nd_bus, dev);
 
+	WARN_ON(!list_empty(&nd_bus->ndios));
+
 	ida_simple_remove(&nd_ida, nd_bus->id);
 	kfree(nd_bus);
 }
@@ -271,10 +329,28 @@ static ssize_t wait_probe_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(wait_probe);
 
+static ssize_t btt_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_bus *nd_bus = to_nd_bus(dev);
+	ssize_t rc;
+
+	nd_bus_lock(dev);
+	if (nd_bus->nd_btt)
+		rc = sprintf(buf, "%s\n", dev_name(&nd_bus->nd_btt->dev));
+	else
+		rc = sprintf(buf, "\n");
+	nd_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(btt_seed);
+
 static struct attribute *nd_bus_attributes[] = {
 	&dev_attr_commands.attr,
 	&dev_attr_wait_probe.attr,
 	&dev_attr_provider.attr,
+	&dev_attr_btt_seed.attr,
 	NULL,
 };
 
@@ -291,6 +367,7 @@ struct nd_bus *__nd_bus_register(struct device *parent,
 
 	if (!nd_bus)
 		return NULL;
+	INIT_LIST_HEAD(&nd_bus->ndios);
 	INIT_LIST_HEAD(&nd_bus->list);
 	init_waitqueue_head(&nd_bus->probe_wait);
 	nd_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
@@ -319,6 +396,8 @@ struct nd_bus *__nd_bus_register(struct device *parent,
 	list_add_tail(&nd_bus->list, &nd_bus_list);
 	mutex_unlock(&nd_bus_list_mutex);
 
+	nd_bus->nd_btt = nd_btt_create(nd_bus);
+
 	return nd_bus;
  err:
 	put_device(&nd_bus->dev);
diff --git a/drivers/block/nd/nd-private.h b/drivers/block/nd/nd-private.h
index fffd65436e2b..6c89695956a4 100644
--- a/drivers/block/nd/nd-private.h
+++ b/drivers/block/nd/nd-private.h
@@ -22,14 +22,21 @@ extern struct list_head nd_bus_list;
 extern struct mutex nd_bus_list_mutex;
 extern int nd_dimm_major;
 
+struct block_device;
+struct nd_io_claim;
+struct nd_btt;
+struct nd_io;
+
 struct nd_bus {
 	struct nd_bus_descriptor *nd_desc;
 	wait_queue_head_t probe_wait;
 	struct module *module;
+	struct list_head ndios;
 	struct list_head list;
 	struct device dev;
 	int id, probe_active;
 	struct mutex reconfig_mutex;
+	struct nd_btt *nd_btt;
 };
 
 struct nd_dimm {
@@ -41,9 +48,29 @@ struct nd_dimm {
 	int id;
 };
 
+struct nd_io *ndio_lookup(struct nd_bus *nd_bus, const char *diskname);
 bool is_nd_dimm(struct device *dev);
 bool is_nd_blk(struct device *dev);
 bool is_nd_pmem(struct device *dev);
+#if IS_ENABLED(CONFIG_ND_BTT_DEVS)
+bool is_nd_btt(struct device *dev);
+struct nd_btt *nd_btt_create(struct nd_bus *nd_bus);
+void nd_btt_notify_ndio(struct nd_bus *nd_bus, struct nd_io *ndio);
+#else
+static inline bool is_nd_btt(struct device *dev)
+{
+	return false;
+}
+
+static inline struct nd_btt *nd_btt_create(struct nd_bus *nd_bus)
+{
+	return NULL;
+}
+
+static inline void nd_btt_notify_ndio(struct nd_bus *nd_bus, struct nd_io *ndio)
+{
+}
+#endif
 struct nd_bus *walk_to_nd_bus(struct device *nd_dev);
 int __init nd_bus_init(void);
 void nd_bus_exit(void);
@@ -62,6 +89,7 @@ void nd_synchronize(void);
 int nd_bus_register_dimms(struct nd_bus *nd_bus);
 int nd_bus_register_regions(struct nd_bus *nd_bus);
 int nd_bus_init_interleave_sets(struct nd_bus *nd_bus);
+void __nd_device_register(struct device *dev);
 int nd_match_dimm(struct device *dev, void *data);
 struct nd_label_id;
 char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
diff --git a/drivers/block/nd/nd.h b/drivers/block/nd/nd.h
index 24a440a23b2c..73e830785f74 100644
--- a/drivers/block/nd/nd.h
+++ b/drivers/block/nd/nd.h
@@ -12,13 +12,19 @@
  */
 #ifndef __ND_H__
 #define __ND_H__
+#include <linux/genhd.h>
 #include <linux/device.h>
 #include <linux/libnd.h>
 #include <linux/mutex.h>
 #include <linux/ndctl.h>
 #include <linux/types.h>
+#include <linux/fs.h>
 #include "label.h"
 
+enum {
+	SECTOR_SHIFT = 9,
+};
+
 struct nd_dimm_drvdata {
 	struct device *dev;
 	int nsindex_size;
@@ -111,6 +117,84 @@ static inline unsigned nd_inc_seq(unsigned seq)
 	return next[seq & 3];
 }
 
+struct nd_io;
+/**
+ * nd_rw_bytes_fn() - access bytes relative to the "whole disk" namespace device
+ * @ndio: per-namespace context
+ * @buf: source / target for the write / read
+ * @offset: offset relative to the start of the namespace device
+ * @n: num bytes to access
+ * @flags: READ, WRITE, and other REQ_* flags
+ *
+ * Note: Implementations may assume that offset + n never crosses ndio->align
+ */
+typedef int (*nd_rw_bytes_fn)(struct nd_io *ndio, void *buf, size_t offset,
+		size_t n, unsigned long flags);
+#define nd_data_dir(flags) (flags & 1)
+
+/**
+ * struct nd_io - info for byte-aligned access to nd devices
+ * @rw_bytes: operation to perform byte-aligned access
+ * @align: a single ->rw_bytes() request may not cross this alignment
+ * @gendisk: whole disk block device for the namespace
+ * @list: for the core to cache a list of "ndio"s for later association
+ * @dev: namespace device
+ * @claims: list of clients using this interface
+ * @lock: protect @claims mutation
+ */
+struct nd_io {
+	nd_rw_bytes_fn rw_bytes;
+	unsigned long align;
+	struct gendisk *disk;
+	struct list_head list;
+	struct device *dev;
+	struct list_head claims;
+	spinlock_t lock;
+};
+
+struct nd_io_claim;
+typedef void (*ndio_notify_remove_fn)(struct nd_io_claim *ndio_claim);
+
+/**
+ * struct nd_io_claim - instance of a claim on a parent ndio
+ * @notify_remove: ndio is going away, release resources
+ * @holder: object that has claimed this ndio
+ * @parent: ndio in use
+ * @holder: holder device
+ * @list: claim peers
+ *
+ * An ndio may be claimed multiple times, consider the case of a btt
+ * instance per partition on a namespace.
+ */
+struct nd_io_claim {
+	struct nd_io *parent;
+	ndio_notify_remove_fn notify_remove;
+	struct list_head list;
+	struct device *holder;
+};
+
+struct nd_btt {
+	struct device dev;
+	struct nd_io *ndio;
+	struct block_device *backing_dev;
+	unsigned long lbasize;
+	u8 *uuid;
+	u64 offset;
+	int id;
+	struct nd_io_claim *ndio_claim;
+};
+
+static inline u64 nd_partition_offset(struct block_device *bdev)
+{
+	struct hd_struct *p;
+
+	if (bdev == bdev->bd_contains)
+		return 0;
+
+	p = bdev->bd_part;
+	return ((u64) p->start_sect) << SECTOR_SHIFT;
+}
+
 enum nd_async_mode {
 	ND_SYNC,
 	ND_ASYNC,
@@ -125,12 +209,22 @@ ssize_t nd_sector_size_show(unsigned long current_lbasize,
 		const unsigned long *supported, char *buf);
 ssize_t nd_sector_size_store(struct device *dev, const char *buf,
 		unsigned long *current_lbasize, const unsigned long *supported);
+int nd_register_ndio(struct nd_io *ndio);
+int nd_unregister_ndio(struct nd_io *ndio);
+void nd_init_ndio(struct nd_io *ndio, nd_rw_bytes_fn rw_bytes,
+		struct device *dev, struct gendisk *disk, unsigned long align);
+void ndio_del_claim(struct nd_io_claim *ndio_claim);
+struct nd_io_claim *ndio_add_claim(struct nd_io *ndio, struct device *holder,
+		ndio_notify_remove_fn notify_remove);
 struct nd_dimm;
 struct nd_dimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
 int nd_dimm_init_nsarea(struct nd_dimm_drvdata *ndd);
 int nd_dimm_init_config_data(struct nd_dimm_drvdata *ndd);
 int nd_dimm_set_config_data(struct nd_dimm_drvdata *ndd, size_t offset,
 		void *buf, size_t len);
+struct nd_btt *to_nd_btt(struct device *dev);
+struct btt_sb;
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
 struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_namespace_type(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
diff --git a/drivers/block/nd/pmem.c b/drivers/block/nd/pmem.c
index 7e7421d9c167..5e8c9c629f22 100644
--- a/drivers/block/nd/pmem.c
+++ b/drivers/block/nd/pmem.c
@@ -29,6 +29,7 @@
 struct pmem_device {
 	struct request_queue	*pmem_queue;
 	struct gendisk		*pmem_disk;
+	struct nd_io		ndio;
 
 	/* One contiguous memory region per device */
 	phys_addr_t		phys_addr;
@@ -96,6 +97,26 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	return 0;
 }
 
+static int pmem_rw_bytes(struct nd_io *ndio, void *buf, size_t offset,
+		size_t n, unsigned long flags)
+{
+	struct pmem_device *pmem = container_of(ndio, typeof(*pmem), ndio);
+	int rw = nd_data_dir(flags);
+
+	if (unlikely(offset + n > pmem->size)) {
+		dev_WARN_ONCE(ndio->dev, 1, "%s: request out of range\n",
+				__func__);
+		return -EFAULT;
+	}
+
+	if (rw == READ)
+		memcpy(buf, pmem->virt_addr + offset, n);
+	else
+		memcpy(pmem->virt_addr + offset, buf, n);
+
+	return 0;
+}
+
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
 			      void **kaddr, unsigned long *pfn, long size)
 {
@@ -169,8 +190,6 @@ static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res,
 	set_capacity(disk, pmem->size >> 9);
 	pmem->pmem_disk = disk;
 
-	add_disk(disk);
-
 	return pmem;
 
 out_free_queue:
@@ -222,7 +241,12 @@ static int nd_pmem_probe(struct device *dev)
 	if (IS_ERR(pmem))
 		return PTR_ERR(pmem);
 
+	nd_bus_lock(dev);
+	add_disk(pmem->pmem_disk);
 	dev_set_drvdata(dev, pmem);
+	nd_init_ndio(&pmem->ndio, pmem_rw_bytes, dev, pmem->pmem_disk, 0);
+	nd_register_ndio(&pmem->ndio);
+	nd_bus_unlock(dev);
 
 	return 0;
 }
@@ -231,6 +255,7 @@ static int nd_pmem_remove(struct device *dev)
 {
 	struct pmem_device *pmem = dev_get_drvdata(dev);
 
+	nd_unregister_ndio(&pmem->ndio);
 	pmem_free(pmem);
 	return 0;
 }
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 0b4dcabb248a..e595751c613d 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -181,6 +181,7 @@ static inline const char *nd_dimm_cmd_name(unsigned cmd)
 #define ND_DEVICE_NAMESPACE_IO 4    /* legacy persistent memory */
 #define ND_DEVICE_NAMESPACE_PMEM 5  /* persistent memory namespace (may alias) */
 #define ND_DEVICE_NAMESPACE_BLK 6   /* block-data-window namespace (may alias) */
+#define ND_DEVICE_BTT 7		    /* block-translation table device */
 
 enum nd_driver_flags {
 	ND_DRIVER_DIMM            = 1 << ND_DEVICE_DIMM,
@@ -189,6 +190,7 @@ enum nd_driver_flags {
 	ND_DRIVER_NAMESPACE_IO    = 1 << ND_DEVICE_NAMESPACE_IO,
 	ND_DRIVER_NAMESPACE_PMEM  = 1 << ND_DEVICE_NAMESPACE_PMEM,
 	ND_DRIVER_NAMESPACE_BLK   = 1 << ND_DEVICE_NAMESPACE_BLK,
+	ND_DRIVER_BTT		  = 1 << ND_DEVICE_BTT,
 };
 
 enum {


WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams@intel.com>
To: axboe@kernel.dk
Cc: sfr@canb.auug.org.au, linux-nvdimm@ml01.01.org, neilb@suse.de,
	gregkh@linuxfoundation.org, linux-kernel@vger.kernel.org,
	mingo@kernel.org, linux-acpi@vger.kernel.org, jmoyer@redhat.com,
	akpm@linux-foundation.org, hch@lst.de
Subject: [PATCH v4 17/21] libnd: infrastructure for btt devices
Date: Wed, 27 May 2015 18:26:16 -0400	[thread overview]
Message-ID: <20150527222616.17965.7678.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <20150527210155.17965.74864.stgit@dwillia2-desk3.amr.corp.intel.com>

Block devices from an nd bus, in addition to accepting "struct bio"
based requests, also have the capability to perform byte-aligned
accesses.  By default only the bio/block interface is used.  However, if
another driver can make effective use of the byte-aligned capability it
can claim/disable the block interface and use the byte-aligned "nd_io"
interface.

The BTT driver is the initial first consumer of this mechanism to allow
layering atomic sector update guarantees on top of nd_io capable
libnd-block-devices, or their partitions.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/block/nd/Kconfig      |    3 
 drivers/block/nd/Makefile     |    1 
 drivers/block/nd/btt.h        |   45 ++++
 drivers/block/nd/btt_devs.c   |  442 +++++++++++++++++++++++++++++++++++++++++
 drivers/block/nd/bus.c        |  128 ++++++++++++
 drivers/block/nd/core.c       |   79 +++++++
 drivers/block/nd/nd-private.h |   28 +++
 drivers/block/nd/nd.h         |   94 +++++++++
 drivers/block/nd/pmem.c       |   29 +++
 include/uapi/linux/ndctl.h    |    2 
 10 files changed, 847 insertions(+), 4 deletions(-)
 create mode 100644 drivers/block/nd/btt.h
 create mode 100644 drivers/block/nd/btt_devs.c

diff --git a/drivers/block/nd/Kconfig b/drivers/block/nd/Kconfig
index 03f572f0e3d0..00d9afe9475e 100644
--- a/drivers/block/nd/Kconfig
+++ b/drivers/block/nd/Kconfig
@@ -34,4 +34,7 @@ config BLK_DEV_PMEM
 
 	  Say Y if you want to use a NVDIMM described by NFIT
 
+config ND_BTT_DEVS
+	def_bool y
+
 endif
diff --git a/drivers/block/nd/Makefile b/drivers/block/nd/Makefile
index 8d14510559e1..9866669d7738 100644
--- a/drivers/block/nd/Makefile
+++ b/drivers/block/nd/Makefile
@@ -11,3 +11,4 @@ libnd-y += region_devs.o
 libnd-y += region.o
 libnd-y += namespace_devs.o
 libnd-y += label.o
+libnd-$(CONFIG_ND_BTT_DEVS) += btt_devs.o
diff --git a/drivers/block/nd/btt.h b/drivers/block/nd/btt.h
new file mode 100644
index 000000000000..e8f6d8e0ddd3
--- /dev/null
+++ b/drivers/block/nd/btt.h
@@ -0,0 +1,45 @@
+/*
+ * Block Translation Table library
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_BTT_H
+#define _LINUX_BTT_H
+
+#include <linux/types.h>
+
+#define BTT_SIG_LEN 16
+#define BTT_SIG "BTT_ARENA_INFO\0"
+
+struct btt_sb {
+	u8 signature[BTT_SIG_LEN];
+	u8 uuid[16];
+	u8 parent_uuid[16];
+	__le32 flags;
+	__le16 version_major;
+	__le16 version_minor;
+	__le32 external_lbasize;
+	__le32 external_nlba;
+	__le32 internal_lbasize;
+	__le32 internal_nlba;
+	__le32 nfree;
+	__le32 infosize;
+	__le64 nextoff;
+	__le64 dataoff;
+	__le64 mapoff;
+	__le64 logoff;
+	__le64 info2off;
+	u8 padding[3968];
+	__le64 checksum;
+};
+
+#endif
diff --git a/drivers/block/nd/btt_devs.c b/drivers/block/nd/btt_devs.c
new file mode 100644
index 000000000000..b3b813288092
--- /dev/null
+++ b/drivers/block/nd/btt_devs.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-private.h"
+#include "btt.h"
+#include "nd.h"
+
+static DEFINE_IDA(btt_ida);
+
+static void nd_btt_release(struct device *dev)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	dev_dbg(dev, "%s\n", __func__);
+	WARN_ON(nd_btt->backing_dev);
+	ndio_del_claim(nd_btt->ndio_claim);
+	ida_simple_remove(&btt_ida, nd_btt->id);
+	kfree(nd_btt->uuid);
+	kfree(nd_btt);
+}
+
+static struct device_type nd_btt_device_type = {
+	.name = "nd_btt",
+	.release = nd_btt_release,
+};
+
+bool is_nd_btt(struct device *dev)
+{
+	return dev->type == &nd_btt_device_type;
+}
+
+struct nd_btt *to_nd_btt(struct device *dev)
+{
+	struct nd_btt *nd_btt = container_of(dev, struct nd_btt, dev);
+
+	WARN_ON(!is_nd_btt(dev));
+	return nd_btt;
+}
+EXPORT_SYMBOL(to_nd_btt);
+
+static const unsigned long btt_lbasize_supported[] = { 512, 4096, 0 };
+
+static ssize_t sector_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf);
+}
+
+static ssize_t sector_size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	nd_bus_lock(dev);
+	rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize,
+			btt_lbasize_supported);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nd_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(sector_size);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nd_btt->uuid)
+		return sprintf(buf, "%pUb\n", nd_btt->uuid);
+	return sprintf(buf, "\n");
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t backing_dev_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	char name[BDEVNAME_SIZE];
+
+	if (nd_btt->backing_dev)
+		return sprintf(buf, "/dev/%s\n",
+				bdevname(nd_btt->backing_dev, name));
+	else
+		return sprintf(buf, "\n");
+}
+
+static const fmode_t nd_btt_devs_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
+
+static void nd_btt_ndio_notify_remove(struct nd_io_claim *ndio_claim)
+{
+	char bdev_name[BDEVNAME_SIZE];
+	struct nd_btt *nd_btt;
+
+	if (!ndio_claim || !ndio_claim->holder)
+		return;
+
+	nd_btt = to_nd_btt(ndio_claim->holder);
+	WARN_ON_ONCE(!is_nd_bus_locked(&nd_btt->dev));
+	dev_dbg(&nd_btt->dev, "%pf: %s: release /dev/%s\n",
+			__builtin_return_address(0), __func__,
+			bdevname(nd_btt->backing_dev, bdev_name));
+	blkdev_put(nd_btt->backing_dev, nd_btt_devs_mode);
+	nd_btt->backing_dev = NULL;
+
+	/*
+	 * Once we've had our backing device removed we need to be fully
+	 * reconfigured.  The bus will have already created a new seed
+	 * for this purpose, so now is a good time to clean up this
+	 * stale nd_btt instance.
+	 */
+	if (nd_btt->dev.driver)
+		nd_device_unregister(&nd_btt->dev, ND_ASYNC);
+	else {
+		ndio_del_claim(ndio_claim);
+		nd_btt->ndio_claim = NULL;
+	}
+}
+
+static ssize_t __backing_dev_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_bus *nd_bus = walk_to_nd_bus(dev);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	char bdev_name[BDEVNAME_SIZE];
+	struct block_device *bdev;
+	struct nd_io *ndio;
+	char *path;
+
+	if (dev->driver) {
+		dev_dbg(dev, "%s: -EBUSY\n", __func__);
+		return -EBUSY;
+	}
+
+	path = kstrndup(buf, len, GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	/* detach the backing device */
+	if (strcmp(strim(path), "") == 0) {
+		if (!nd_btt->backing_dev)
+			goto out;
+		nd_btt_ndio_notify_remove(nd_btt->ndio_claim);
+		goto out;
+	} else if (nd_btt->backing_dev) {
+		dev_dbg(dev, "backing_dev already set\n");
+		len = -EBUSY;
+		goto out;
+	}
+
+	bdev = blkdev_get_by_path(strim(path), nd_btt_devs_mode, nd_btt);
+	if (IS_ERR(bdev)) {
+		dev_dbg(dev, "open '%s' failed: %ld\n", strim(path),
+				PTR_ERR(bdev));
+		len = PTR_ERR(bdev);
+		goto out;
+	}
+
+	if (get_capacity(bdev->bd_disk) < SZ_16M / 512) {
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENXIO;
+		goto out;
+	}
+
+	ndio = ndio_lookup(nd_bus, bdevname(bdev->bd_contains, bdev_name));
+	if (!ndio) {
+		dev_dbg(dev, "%s does not have an ndio interface\n",
+				strim(path));
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENXIO;
+		goto out;
+	}
+
+	nd_btt->ndio_claim = ndio_add_claim(ndio, &nd_btt->dev,
+			nd_btt_ndio_notify_remove);
+	if (!nd_btt->ndio_claim) {
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENOMEM;
+		goto out;
+	}
+
+	WARN_ON_ONCE(!is_nd_bus_locked(&nd_btt->dev));
+	nd_btt->backing_dev = bdev;
+
+ out:
+	kfree(path);
+	return len;
+}
+
+static ssize_t backing_dev_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	ssize_t rc;
+
+	nd_bus_lock(dev);
+	device_lock(dev);
+	rc = __backing_dev_store(dev, attr, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+	nd_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RW(backing_dev);
+
+static bool is_nd_btt_idle(struct device *dev)
+{
+	struct nd_bus *nd_bus = walk_to_nd_bus(dev);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nd_bus->nd_btt == nd_btt || dev->driver || nd_btt->backing_dev)
+		return false;
+	return true;
+}
+
+static ssize_t delete_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	/* return 1 if can be deleted */
+	return sprintf(buf, "%d\n", is_nd_btt_idle(dev));
+}
+
+static ssize_t delete_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	unsigned long val;
+
+	/* write 1 to delete */
+	if (kstrtoul(buf, 0, &val) != 0 || val != 1)
+		return -EINVAL;
+
+	/* prevent deletion while this btt is active, or is the current seed */
+	if (!is_nd_btt_idle(dev))
+		return -EBUSY;
+
+	/*
+	 * userspace raced itself if device goes active here and it gets
+	 * to keep the pieces
+	 */
+	nd_device_unregister(dev, ND_ASYNC);
+
+	return len;
+}
+static DEVICE_ATTR_RW(delete);
+
+static struct attribute *nd_btt_attributes[] = {
+	&dev_attr_sector_size.attr,
+	&dev_attr_backing_dev.attr,
+	&dev_attr_delete.attr,
+	&dev_attr_uuid.attr,
+	NULL,
+};
+
+static struct attribute_group nd_btt_attribute_group = {
+	.attrs = nd_btt_attributes,
+};
+
+static const struct attribute_group *nd_btt_attribute_groups[] = {
+	&nd_btt_attribute_group,
+	&nd_device_attribute_group,
+	NULL,
+};
+
+static struct nd_btt *__nd_btt_create(struct nd_bus *nd_bus,
+		unsigned long lbasize, u8 *uuid)
+{
+	struct nd_btt *nd_btt = kzalloc(sizeof(*nd_btt), GFP_KERNEL);
+	struct device *dev;
+
+	if (!nd_btt)
+		return NULL;
+	nd_btt->id = ida_simple_get(&btt_ida, 0, 0, GFP_KERNEL);
+	if (nd_btt->id < 0) {
+		kfree(nd_btt);
+		return NULL;
+	}
+
+	nd_btt->lbasize = lbasize;
+	if (uuid)
+		uuid = kmemdup(uuid, 16, GFP_KERNEL);
+	nd_btt->uuid = uuid;
+	dev = &nd_btt->dev;
+	dev_set_name(dev, "btt%d", nd_btt->id);
+	dev->parent = &nd_bus->dev;
+	dev->type = &nd_btt_device_type;
+	dev->groups = nd_btt_attribute_groups;
+	return nd_btt;
+}
+
+struct nd_btt *nd_btt_create(struct nd_bus *nd_bus)
+{
+	struct nd_btt *nd_btt = __nd_btt_create(nd_bus, 0, NULL);
+
+	if (!nd_btt)
+		return NULL;
+	nd_device_register(&nd_btt->dev);
+	return nd_btt;
+}
+
+/*
+ * nd_btt_sb_checksum: compute checksum for btt info block
+ *
+ * Returns a fletcher64 checksum of everything in the given info block
+ * except the last field (since that's where the checksum lives).
+ */
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
+{
+	u64 sum, sum_save;
+
+	sum_save = btt_sb->checksum;
+	btt_sb->checksum = 0;
+	sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
+	btt_sb->checksum = sum_save;
+	return sum;
+}
+EXPORT_SYMBOL(nd_btt_sb_checksum);
+
+static int nd_btt_autodetect(struct nd_bus *nd_bus, struct nd_io *ndio,
+		struct block_device *bdev)
+{
+	char name[BDEVNAME_SIZE];
+	struct nd_btt *nd_btt;
+	struct btt_sb *btt_sb;
+	u64 offset, checksum;
+	u32 lbasize;
+	u8 *uuid;
+	int rc;
+
+	btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL);
+	if (!btt_sb)
+		return -ENODEV;
+
+	offset = nd_partition_offset(bdev);
+	rc = ndio->rw_bytes(ndio, btt_sb, offset + SZ_4K, sizeof(*btt_sb), READ);
+	if (rc)
+		goto out_free_sb;
+
+	if (get_capacity(bdev->bd_disk) < SZ_16M / 512)
+		goto out_free_sb;
+
+	if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
+		goto out_free_sb;
+
+	checksum = le64_to_cpu(btt_sb->checksum);
+	btt_sb->checksum = 0;
+	if (checksum != nd_btt_sb_checksum(btt_sb))
+		goto out_free_sb;
+	btt_sb->checksum = cpu_to_le64(checksum);
+
+	uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
+	if (!uuid)
+		goto out_free_sb;
+
+	lbasize = le32_to_cpu(btt_sb->external_lbasize);
+	nd_btt = __nd_btt_create(nd_bus, lbasize, uuid);
+	if (!nd_btt)
+		goto out_free_uuid;
+
+	device_initialize(&nd_btt->dev);
+	nd_btt->ndio_claim = ndio_add_claim(ndio, &nd_btt->dev,
+			nd_btt_ndio_notify_remove);
+	if (!nd_btt->ndio_claim)
+		goto out_free_btt;
+
+	nd_btt->backing_dev = bdev;
+	dev_dbg(&nd_btt->dev, "%s: activate %s\n", __func__,
+			bdevname(bdev, name));
+	__nd_device_register(&nd_btt->dev);
+	kfree(btt_sb);
+	return 0;
+
+ out_free_btt:
+	kfree(nd_btt);
+ out_free_uuid:
+	kfree(uuid);
+ out_free_sb:
+	kfree(btt_sb);
+
+	return -ENODEV;
+}
+
+void nd_btt_notify_ndio(struct nd_bus *nd_bus, struct nd_io *ndio)
+{
+	struct disk_part_iter piter;
+	struct hd_struct *part;
+
+	disk_part_iter_init(&piter, ndio->disk, DISK_PITER_INCL_PART0);
+	while ((part = disk_part_iter_next(&piter))) {
+		struct block_device *bdev;
+		int rc;
+
+		bdev = bdget_disk(ndio->disk, part->partno);
+		if (!bdev)
+			continue;
+		if (blkdev_get(bdev, nd_btt_devs_mode, nd_bus) != 0)
+			continue;
+		rc = nd_btt_autodetect(nd_bus, ndio, bdev);
+		if (rc)
+			blkdev_put(bdev, nd_btt_devs_mode);
+		/* no need to scan further in the case of whole disk btt */
+		if (rc == 0 && part->partno == 0)
+			break;
+	}
+	disk_part_iter_exit(&piter);
+}
diff --git a/drivers/block/nd/bus.c b/drivers/block/nd/bus.c
index 4a2185a99bd7..dc69ccfae53a 100644
--- a/drivers/block/nd/bus.c
+++ b/drivers/block/nd/bus.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/fcntl.h>
 #include <linux/async.h>
+#include <linux/genhd.h>
 #include <linux/ndctl.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -40,6 +41,8 @@ static int to_nd_device_type(struct device *dev)
 		return ND_DEVICE_REGION_BLK;
 	else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
 		return nd_region_to_namespace_type(to_nd_region(dev->parent));
+	else if (is_nd_btt(dev))
+		return ND_DEVICE_BTT;
 
 	return 0;
 }
@@ -84,6 +87,21 @@ static int nd_bus_probe(struct device *dev)
 
 	dev_dbg(&nd_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
 			dev_name(dev), rc);
+
+	/* check if our btt-seed has sprouted, and plant another */
+	if (rc == 0 && is_nd_btt(dev) && dev == &nd_bus->nd_btt->dev) {
+		const char *sep = "", *name = "", *status = "failed";
+
+		nd_bus->nd_btt = nd_btt_create(nd_bus);
+		if (nd_bus->nd_btt) {
+			status = "succeeded";
+			sep = ": ";
+			name = dev_name(&nd_bus->nd_btt->dev);
+		}
+		dev_dbg(&nd_bus->dev, "btt seed creation %s%s%s\n",
+				status, sep, name);
+	}
+
 	if (rc != 0)
 		module_put(provider);
 	return rc;
@@ -144,14 +162,19 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie)
 	put_device(dev);
 }
 
-void nd_device_register(struct device *dev)
+void __nd_device_register(struct device *dev)
 {
 	dev->bus = &nd_bus_type;
-	device_initialize(dev);
 	get_device(dev);
 	async_schedule_domain(nd_async_device_register, dev,
 			&nd_async_domain);
 }
+
+void nd_device_register(struct device *dev)
+{
+	device_initialize(dev);
+	__nd_device_register(dev);
+}
 EXPORT_SYMBOL(nd_device_register);
 
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
@@ -200,6 +223,107 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
 }
 EXPORT_SYMBOL(__nd_driver_register);
 
+/**
+ * nd_register_ndio() - register byte-aligned access capability for an nd-bdev
+ * @disk: child gendisk of the ndio namepace device
+ * @ndio: initialized ndio instance to register
+ *
+ * LOCKING: hold nd_bus_lock() over the creation of ndio->disk and the
+ * subsequent nd_region_ndio event
+ */
+int nd_register_ndio(struct nd_io *ndio)
+{
+	struct nd_bus *nd_bus;
+	struct device *dev;
+
+	if (!ndio || !ndio->dev || !ndio->disk || !list_empty(&ndio->list)
+			|| !ndio->rw_bytes || !list_empty(&ndio->claims)) {
+		pr_debug("%s bad parameters from %pf\n", __func__,
+				__builtin_return_address(0));
+		return -EINVAL;
+	}
+
+	dev = ndio->dev;
+	nd_bus = walk_to_nd_bus(dev);
+	if (!nd_bus)
+		return -EINVAL;
+
+	WARN_ON_ONCE(!is_nd_bus_locked(&nd_bus->dev));
+	list_add(&ndio->list, &nd_bus->ndios);
+
+	/* TODO: generic infrastructure for 3rd party ndio claimers */
+	nd_btt_notify_ndio(nd_bus, ndio);
+
+	return 0;
+}
+EXPORT_SYMBOL(nd_register_ndio);
+
+/**
+ * __nd_unregister_ndio() - try to remove an ndio interface
+ * @ndio: interface to remove
+ */
+static int __nd_unregister_ndio(struct nd_io *ndio)
+{
+	struct nd_io_claim *ndio_claim, *_n;
+	struct nd_bus *nd_bus;
+	LIST_HEAD(claims);
+
+	nd_bus = walk_to_nd_bus(ndio->dev);
+	if (!nd_bus || list_empty(&ndio->list))
+		return -ENXIO;
+
+	spin_lock(&ndio->lock);
+	list_splice_init(&ndio->claims, &claims);
+	spin_unlock(&ndio->lock);
+
+	list_for_each_entry_safe(ndio_claim, _n, &claims, list)
+		ndio_claim->notify_remove(ndio_claim);
+
+	list_del_init(&ndio->list);
+
+	return 0;
+}
+
+int nd_unregister_ndio(struct nd_io *ndio)
+{
+	struct device *dev = ndio->dev;
+	int rc;
+
+	nd_bus_lock(dev);
+	rc = __nd_unregister_ndio(ndio);
+	nd_bus_unlock(dev);
+
+	/*
+	 * Flush in case ->notify_remove() kicked off asynchronous device
+	 * unregistration
+	 */
+	nd_synchronize();
+
+	return rc;
+}
+EXPORT_SYMBOL(nd_unregister_ndio);
+
+static struct nd_io *__ndio_lookup(struct nd_bus *nd_bus, const char *diskname)
+{
+	struct nd_io *ndio;
+
+	list_for_each_entry(ndio, &nd_bus->ndios, list)
+		if (strcmp(diskname, ndio->disk->disk_name) == 0)
+			return ndio;
+
+	return NULL;
+}
+
+struct nd_io *ndio_lookup(struct nd_bus *nd_bus, const char *diskname)
+{
+	struct nd_io *ndio;
+
+	WARN_ON_ONCE(!is_nd_bus_locked(&nd_bus->dev));
+	ndio = __ndio_lookup(nd_bus, diskname);
+
+	return ndio;
+}
+
 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 		char *buf)
 {
diff --git a/drivers/block/nd/core.c b/drivers/block/nd/core.c
index b45863343a48..a0709a2e302f 100644
--- a/drivers/block/nd/core.c
+++ b/drivers/block/nd/core.c
@@ -55,6 +55,62 @@ bool is_nd_bus_locked(struct device *dev)
 }
 EXPORT_SYMBOL(is_nd_bus_locked);
 
+void nd_init_ndio(struct nd_io *ndio, nd_rw_bytes_fn rw_bytes,
+		struct device *dev, struct gendisk *disk, unsigned long align)
+{
+	memset(ndio, 0, sizeof(*ndio));
+	INIT_LIST_HEAD(&ndio->claims);
+	INIT_LIST_HEAD(&ndio->list);
+	spin_lock_init(&ndio->lock);
+	ndio->dev = dev;
+	ndio->disk = disk;
+	ndio->align = align;
+	ndio->rw_bytes = rw_bytes;
+}
+EXPORT_SYMBOL(nd_init_ndio);
+
+void ndio_del_claim(struct nd_io_claim *ndio_claim)
+{
+	struct nd_io *ndio;
+	struct device *holder;
+
+	if (!ndio_claim)
+		return;
+	ndio = ndio_claim->parent;
+	holder = ndio_claim->holder;
+
+	dev_dbg(holder, "%s: drop %s\n", __func__, dev_name(ndio->dev));
+	spin_lock(&ndio->lock);
+	list_del(&ndio_claim->list);
+	spin_unlock(&ndio->lock);
+	put_device(ndio->dev);
+	kfree(ndio_claim);
+	put_device(holder);
+}
+
+struct nd_io_claim *ndio_add_claim(struct nd_io *ndio, struct device *holder,
+		ndio_notify_remove_fn notify_remove)
+{
+	struct nd_io_claim *ndio_claim = kzalloc(sizeof(*ndio_claim), GFP_KERNEL);
+
+	if (!ndio_claim)
+		return NULL;
+
+	INIT_LIST_HEAD(&ndio_claim->list);
+	ndio_claim->parent = ndio;
+	get_device(ndio->dev);
+
+	spin_lock(&ndio->lock);
+	list_add(&ndio_claim->list, &ndio->claims);
+	spin_unlock(&ndio->lock);
+
+	ndio_claim->holder = holder;
+	ndio_claim->notify_remove = notify_remove;
+	get_device(holder);
+
+	return ndio_claim;
+}
+
 u64 nd_fletcher64(void *addr, size_t len, bool le)
 {
 	u32 *buf = addr;
@@ -75,6 +131,8 @@ static void nd_bus_release(struct device *dev)
 {
 	struct nd_bus *nd_bus = container_of(dev, struct nd_bus, dev);
 
+	WARN_ON(!list_empty(&nd_bus->ndios));
+
 	ida_simple_remove(&nd_ida, nd_bus->id);
 	kfree(nd_bus);
 }
@@ -271,10 +329,28 @@ static ssize_t wait_probe_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(wait_probe);
 
+static ssize_t btt_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_bus *nd_bus = to_nd_bus(dev);
+	ssize_t rc;
+
+	nd_bus_lock(dev);
+	if (nd_bus->nd_btt)
+		rc = sprintf(buf, "%s\n", dev_name(&nd_bus->nd_btt->dev));
+	else
+		rc = sprintf(buf, "\n");
+	nd_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(btt_seed);
+
 static struct attribute *nd_bus_attributes[] = {
 	&dev_attr_commands.attr,
 	&dev_attr_wait_probe.attr,
 	&dev_attr_provider.attr,
+	&dev_attr_btt_seed.attr,
 	NULL,
 };
 
@@ -291,6 +367,7 @@ struct nd_bus *__nd_bus_register(struct device *parent,
 
 	if (!nd_bus)
 		return NULL;
+	INIT_LIST_HEAD(&nd_bus->ndios);
 	INIT_LIST_HEAD(&nd_bus->list);
 	init_waitqueue_head(&nd_bus->probe_wait);
 	nd_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
@@ -319,6 +396,8 @@ struct nd_bus *__nd_bus_register(struct device *parent,
 	list_add_tail(&nd_bus->list, &nd_bus_list);
 	mutex_unlock(&nd_bus_list_mutex);
 
+	nd_bus->nd_btt = nd_btt_create(nd_bus);
+
 	return nd_bus;
  err:
 	put_device(&nd_bus->dev);
diff --git a/drivers/block/nd/nd-private.h b/drivers/block/nd/nd-private.h
index fffd65436e2b..6c89695956a4 100644
--- a/drivers/block/nd/nd-private.h
+++ b/drivers/block/nd/nd-private.h
@@ -22,14 +22,21 @@ extern struct list_head nd_bus_list;
 extern struct mutex nd_bus_list_mutex;
 extern int nd_dimm_major;
 
+struct block_device;
+struct nd_io_claim;
+struct nd_btt;
+struct nd_io;
+
 struct nd_bus {
 	struct nd_bus_descriptor *nd_desc;
 	wait_queue_head_t probe_wait;
 	struct module *module;
+	struct list_head ndios;
 	struct list_head list;
 	struct device dev;
 	int id, probe_active;
 	struct mutex reconfig_mutex;
+	struct nd_btt *nd_btt;
 };
 
 struct nd_dimm {
@@ -41,9 +48,29 @@ struct nd_dimm {
 	int id;
 };
 
+struct nd_io *ndio_lookup(struct nd_bus *nd_bus, const char *diskname);
 bool is_nd_dimm(struct device *dev);
 bool is_nd_blk(struct device *dev);
 bool is_nd_pmem(struct device *dev);
+#if IS_ENABLED(CONFIG_ND_BTT_DEVS)
+bool is_nd_btt(struct device *dev);
+struct nd_btt *nd_btt_create(struct nd_bus *nd_bus);
+void nd_btt_notify_ndio(struct nd_bus *nd_bus, struct nd_io *ndio);
+#else
+static inline bool is_nd_btt(struct device *dev)
+{
+	return false;
+}
+
+static inline struct nd_btt *nd_btt_create(struct nd_bus *nd_bus)
+{
+	return NULL;
+}
+
+static inline void nd_btt_notify_ndio(struct nd_bus *nd_bus, struct nd_io *ndio)
+{
+}
+#endif
 struct nd_bus *walk_to_nd_bus(struct device *nd_dev);
 int __init nd_bus_init(void);
 void nd_bus_exit(void);
@@ -62,6 +89,7 @@ void nd_synchronize(void);
 int nd_bus_register_dimms(struct nd_bus *nd_bus);
 int nd_bus_register_regions(struct nd_bus *nd_bus);
 int nd_bus_init_interleave_sets(struct nd_bus *nd_bus);
+void __nd_device_register(struct device *dev);
 int nd_match_dimm(struct device *dev, void *data);
 struct nd_label_id;
 char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
diff --git a/drivers/block/nd/nd.h b/drivers/block/nd/nd.h
index 24a440a23b2c..73e830785f74 100644
--- a/drivers/block/nd/nd.h
+++ b/drivers/block/nd/nd.h
@@ -12,13 +12,19 @@
  */
 #ifndef __ND_H__
 #define __ND_H__
+#include <linux/genhd.h>
 #include <linux/device.h>
 #include <linux/libnd.h>
 #include <linux/mutex.h>
 #include <linux/ndctl.h>
 #include <linux/types.h>
+#include <linux/fs.h>
 #include "label.h"
 
+enum {
+	SECTOR_SHIFT = 9,
+};
+
 struct nd_dimm_drvdata {
 	struct device *dev;
 	int nsindex_size;
@@ -111,6 +117,84 @@ static inline unsigned nd_inc_seq(unsigned seq)
 	return next[seq & 3];
 }
 
+struct nd_io;
+/**
+ * nd_rw_bytes_fn() - access bytes relative to the "whole disk" namespace device
+ * @ndio: per-namespace context
+ * @buf: source / target for the write / read
+ * @offset: offset relative to the start of the namespace device
+ * @n: num bytes to access
+ * @flags: READ, WRITE, and other REQ_* flags
+ *
+ * Note: Implementations may assume that offset + n never crosses ndio->align
+ */
+typedef int (*nd_rw_bytes_fn)(struct nd_io *ndio, void *buf, size_t offset,
+		size_t n, unsigned long flags);
+#define nd_data_dir(flags) (flags & 1)
+
+/**
+ * struct nd_io - info for byte-aligned access to nd devices
+ * @rw_bytes: operation to perform byte-aligned access
+ * @align: a single ->rw_bytes() request may not cross this alignment
+ * @gendisk: whole disk block device for the namespace
+ * @list: for the core to cache a list of "ndio"s for later association
+ * @dev: namespace device
+ * @claims: list of clients using this interface
+ * @lock: protect @claims mutation
+ */
+struct nd_io {
+	nd_rw_bytes_fn rw_bytes;
+	unsigned long align;
+	struct gendisk *disk;
+	struct list_head list;
+	struct device *dev;
+	struct list_head claims;
+	spinlock_t lock;
+};
+
+struct nd_io_claim;
+typedef void (*ndio_notify_remove_fn)(struct nd_io_claim *ndio_claim);
+
+/**
+ * struct nd_io_claim - instance of a claim on a parent ndio
+ * @notify_remove: ndio is going away, release resources
+ * @holder: object that has claimed this ndio
+ * @parent: ndio in use
+ * @holder: holder device
+ * @list: claim peers
+ *
+ * An ndio may be claimed multiple times, consider the case of a btt
+ * instance per partition on a namespace.
+ */
+struct nd_io_claim {
+	struct nd_io *parent;
+	ndio_notify_remove_fn notify_remove;
+	struct list_head list;
+	struct device *holder;
+};
+
+struct nd_btt {
+	struct device dev;
+	struct nd_io *ndio;
+	struct block_device *backing_dev;
+	unsigned long lbasize;
+	u8 *uuid;
+	u64 offset;
+	int id;
+	struct nd_io_claim *ndio_claim;
+};
+
+static inline u64 nd_partition_offset(struct block_device *bdev)
+{
+	struct hd_struct *p;
+
+	if (bdev == bdev->bd_contains)
+		return 0;
+
+	p = bdev->bd_part;
+	return ((u64) p->start_sect) << SECTOR_SHIFT;
+}
+
 enum nd_async_mode {
 	ND_SYNC,
 	ND_ASYNC,
@@ -125,12 +209,22 @@ ssize_t nd_sector_size_show(unsigned long current_lbasize,
 		const unsigned long *supported, char *buf);
 ssize_t nd_sector_size_store(struct device *dev, const char *buf,
 		unsigned long *current_lbasize, const unsigned long *supported);
+int nd_register_ndio(struct nd_io *ndio);
+int nd_unregister_ndio(struct nd_io *ndio);
+void nd_init_ndio(struct nd_io *ndio, nd_rw_bytes_fn rw_bytes,
+		struct device *dev, struct gendisk *disk, unsigned long align);
+void ndio_del_claim(struct nd_io_claim *ndio_claim);
+struct nd_io_claim *ndio_add_claim(struct nd_io *ndio, struct device *holder,
+		ndio_notify_remove_fn notify_remove);
 struct nd_dimm;
 struct nd_dimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
 int nd_dimm_init_nsarea(struct nd_dimm_drvdata *ndd);
 int nd_dimm_init_config_data(struct nd_dimm_drvdata *ndd);
 int nd_dimm_set_config_data(struct nd_dimm_drvdata *ndd, size_t offset,
 		void *buf, size_t len);
+struct nd_btt *to_nd_btt(struct device *dev);
+struct btt_sb;
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
 struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_namespace_type(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
diff --git a/drivers/block/nd/pmem.c b/drivers/block/nd/pmem.c
index 7e7421d9c167..5e8c9c629f22 100644
--- a/drivers/block/nd/pmem.c
+++ b/drivers/block/nd/pmem.c
@@ -29,6 +29,7 @@
 struct pmem_device {
 	struct request_queue	*pmem_queue;
 	struct gendisk		*pmem_disk;
+	struct nd_io		ndio;
 
 	/* One contiguous memory region per device */
 	phys_addr_t		phys_addr;
@@ -96,6 +97,26 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	return 0;
 }
 
+static int pmem_rw_bytes(struct nd_io *ndio, void *buf, size_t offset,
+		size_t n, unsigned long flags)
+{
+	struct pmem_device *pmem = container_of(ndio, typeof(*pmem), ndio);
+	int rw = nd_data_dir(flags);
+
+	if (unlikely(offset + n > pmem->size)) {
+		dev_WARN_ONCE(ndio->dev, 1, "%s: request out of range\n",
+				__func__);
+		return -EFAULT;
+	}
+
+	if (rw == READ)
+		memcpy(buf, pmem->virt_addr + offset, n);
+	else
+		memcpy(pmem->virt_addr + offset, buf, n);
+
+	return 0;
+}
+
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
 			      void **kaddr, unsigned long *pfn, long size)
 {
@@ -169,8 +190,6 @@ static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res,
 	set_capacity(disk, pmem->size >> 9);
 	pmem->pmem_disk = disk;
 
-	add_disk(disk);
-
 	return pmem;
 
 out_free_queue:
@@ -222,7 +241,12 @@ static int nd_pmem_probe(struct device *dev)
 	if (IS_ERR(pmem))
 		return PTR_ERR(pmem);
 
+	nd_bus_lock(dev);
+	add_disk(pmem->pmem_disk);
 	dev_set_drvdata(dev, pmem);
+	nd_init_ndio(&pmem->ndio, pmem_rw_bytes, dev, pmem->pmem_disk, 0);
+	nd_register_ndio(&pmem->ndio);
+	nd_bus_unlock(dev);
 
 	return 0;
 }
@@ -231,6 +255,7 @@ static int nd_pmem_remove(struct device *dev)
 {
 	struct pmem_device *pmem = dev_get_drvdata(dev);
 
+	nd_unregister_ndio(&pmem->ndio);
 	pmem_free(pmem);
 	return 0;
 }
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 0b4dcabb248a..e595751c613d 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -181,6 +181,7 @@ static inline const char *nd_dimm_cmd_name(unsigned cmd)
 #define ND_DEVICE_NAMESPACE_IO 4    /* legacy persistent memory */
 #define ND_DEVICE_NAMESPACE_PMEM 5  /* persistent memory namespace (may alias) */
 #define ND_DEVICE_NAMESPACE_BLK 6   /* block-data-window namespace (may alias) */
+#define ND_DEVICE_BTT 7		    /* block-translation table device */
 
 enum nd_driver_flags {
 	ND_DRIVER_DIMM            = 1 << ND_DEVICE_DIMM,
@@ -189,6 +190,7 @@ enum nd_driver_flags {
 	ND_DRIVER_NAMESPACE_IO    = 1 << ND_DEVICE_NAMESPACE_IO,
 	ND_DRIVER_NAMESPACE_PMEM  = 1 << ND_DEVICE_NAMESPACE_PMEM,
 	ND_DRIVER_NAMESPACE_BLK   = 1 << ND_DEVICE_NAMESPACE_BLK,
+	ND_DRIVER_BTT		  = 1 << ND_DEVICE_BTT,
 };
 
 enum {


  parent reply	other threads:[~2015-05-27 22:26 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-27 22:24 [GIT PULL v4 00/21] libnd: non-volatile memory device support Dan Williams
2015-05-27 22:24 ` Dan Williams
2015-05-27 22:24 ` [PATCH v4 01/21] e820, efi: add ACPI 6.0 persistent memory types Dan Williams
2015-05-27 22:24   ` Dan Williams
2015-05-27 22:24 ` [PATCH v4 02/21] libnd, nfit: initial libnd infrastructure and NFIT support Dan Williams
2015-05-27 22:24   ` Dan Williams
2015-05-27 22:24 ` [PATCH v4 03/21] libnd: control character device and libnd bus sysfs attributes Dan Williams
2015-05-27 22:24   ` Dan Williams
2015-05-27 22:25 ` [PATCH v4 04/21] libnd, nfit: dimm/memory-devices Dan Williams
2015-05-27 22:25   ` Dan Williams
2015-05-27 22:25 ` [PATCH v4 05/21] libnd: control (ioctl) messages for libnd bus and dimm devices Dan Williams
2015-05-27 22:25   ` Dan Williams
2015-05-27 22:25 ` [PATCH v4 06/21] libnd, nd_dimm: dimm driver and base libnd device-driver infrastructure Dan Williams
2015-05-27 22:25   ` Dan Williams
2015-05-27 22:25 ` [PATCH v4 07/21] libnd, nfit: regions (block-data-window, persistent memory, volatile memory) Dan Williams
2015-05-27 22:25   ` Dan Williams
2015-05-27 22:25 ` [PATCH v4 08/21] libnd: support for legacy (non-aliasing) nvdimms Dan Williams
2015-05-27 22:25   ` Dan Williams
2015-05-27 22:25 ` [PATCH v4 09/21] libnd, nd_pmem: add libnd support to the pmem driver Dan Williams
2015-05-27 22:25   ` Dan Williams
2015-05-27 22:25 ` [PATCH v4 10/21] pmem: Dynamically allocate partition numbers Dan Williams
2015-05-27 22:25   ` Dan Williams
2015-05-27 22:25 ` [PATCH v4 11/21] libnd, nfit: add interleave-set state-tracking infrastructure Dan Williams
2015-05-27 22:25   ` Dan Williams
2015-05-27 22:25 ` [PATCH v4 12/21] libnd: namespace indices: read and validate Dan Williams
2015-05-27 22:25   ` Dan Williams
2015-05-27 22:25 ` [PATCH v4 13/21] libnd: pmem label sets and namespace instantiation Dan Williams
2015-05-27 22:25   ` Dan Williams
2015-05-27 22:25 ` [PATCH v4 14/21] libnd: blk labels " Dan Williams
2015-05-27 22:25   ` Dan Williams
2015-05-27 22:26 ` [PATCH v4 15/21] libnd: write pmem label set Dan Williams
2015-05-27 22:26   ` Dan Williams
2015-05-27 22:26 ` [PATCH v4 16/21] libnd: write blk " Dan Williams
2015-05-27 22:26   ` Dan Williams
2015-05-27 22:26 ` Dan Williams [this message]
2015-05-27 22:26   ` [PATCH v4 17/21] libnd: infrastructure for btt devices Dan Williams
2015-05-27 22:26 ` [PATCH v4 18/21] nd_btt: atomic sector updates Dan Williams
2015-05-27 22:26   ` Dan Williams
2015-05-27 22:26 ` [PATCH v4 19/21] libnd, nfit, nd_blk: driver for BLK-mode access persistent memory Dan Williams
2015-05-27 22:26   ` Dan Williams
2015-05-27 22:26 ` [PATCH v4 20/21] nfit-test: manufactured NFITs for interface development Dan Williams
2015-05-27 22:26   ` Dan Williams
2015-05-27 22:26 ` [PATCH v4 21/21] libnd: Non-Volatile Devices Dan Williams
2015-05-27 22:26   ` Dan Williams
2015-05-27 22:36 ` [GIT PULL v4 00/21] libnd: non-volatile memory device support Rafael J. Wysocki
2015-05-27 22:36   ` Rafael J. Wysocki
2015-05-27 22:36   ` Rafael J. Wysocki
2015-05-27 22:52   ` Dan Williams
2015-05-27 22:52     ` Dan Williams
2015-05-27 22:52     ` Dan Williams
2015-05-27 23:17     ` Rafael J. Wysocki
2015-05-27 23:17       ` Rafael J. Wysocki
2015-05-27 23:17       ` Rafael J. Wysocki
2015-05-28  0:34       ` Dan Williams
2015-05-28  0:34         ` Dan Williams
2015-05-28  0:34         ` Dan Williams
2015-05-28  0:42         ` Rafael J. Wysocki
2015-05-28  0:42           ` Rafael J. Wysocki
2015-05-28  0:42           ` Rafael J. Wysocki
2015-05-28  0:55           ` Dan Williams
2015-05-28  0:55             ` Dan Williams
2015-05-28  0:55             ` Dan Williams
2015-05-28  1:01             ` Rafael J. Wysocki
2015-05-28  1:01               ` Rafael J. Wysocki
2015-05-28  1:01               ` Rafael J. Wysocki
2015-05-28  5:21               ` Williams, Dan J
2015-05-28  5:21                 ` Williams, Dan J
2015-05-28  5:21                 ` Williams, Dan J
2015-05-28  8:51 ` Christoph Hellwig
2015-05-28  8:51   ` Christoph Hellwig
2015-05-28 14:55   ` Dan Williams
2015-05-28 14:55     ` Dan Williams
2015-05-28 14:55     ` Dan Williams
2015-06-03  6:55     ` Christoph Hellwig
2015-06-03  6:55       ` Christoph Hellwig
2015-06-03  6:55       ` Christoph Hellwig
2015-06-03  7:02       ` Dan Williams
2015-06-03  7:02         ` Dan Williams
2015-06-03  7:02         ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150527222616.17965.7678.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=axboe@kernel.dk \
    --cc=gregkh@linuxfoundation.org \
    --cc=hch@lst.de \
    --cc=jmoyer@redhat.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=mingo@kernel.org \
    --cc=neilb@suse.de \
    --cc=sfr@canb.auug.org.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.