All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: axboe@kernel.dk, sfr@canb.auug.org.au, rafael@kernel.org,
	neilb@suse.de, gregkh@linuxfoundation.org,
	linux-kernel@vger.kernel.org, mingo@kernel.org,
	linux-acpi@vger.kernel.org, jmoyer@redhat.com,
	linux-api@vger.kernel.org, akpm@linux-foundation.org, hch@lst.de
Subject: [PATCH v5 17/21] libnvdimm: infrastructure for btt devices
Date: Mon, 01 Jun 2015 20:15:41 -0400	[thread overview]
Message-ID: <20150602001541.4506.90125.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <20150602001134.4506.45867.stgit@dwillia2-desk3.amr.corp.intel.com>

Block devices from an nd bus, in addition to accepting "struct bio"
based requests, also have the capability to perform byte-aligned
accesses.  By default only the bio/block interface is used.  However, if
another driver can make effective use of the byte-aligned capability it
can claim/disable the block interface and use the byte-aligned "nd_io"
interface.

The BTT driver is the initial first consumer of this mechanism to allow
layering atomic sector update guarantees on top of nd_io capable
libnvdimm-block-devices, or their partitions.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/Kconfig      |    3 
 drivers/nvdimm/Makefile     |    1 
 drivers/nvdimm/btt.h        |   45 ++++
 drivers/nvdimm/btt_devs.c   |  442 +++++++++++++++++++++++++++++++++++++++++++
 drivers/nvdimm/bus.c        |  128 ++++++++++++
 drivers/nvdimm/core.c       |   79 ++++++++
 drivers/nvdimm/nd-private.h |   28 +++
 drivers/nvdimm/nd.h         |   94 +++++++++
 drivers/nvdimm/pmem.c       |   29 +++
 include/uapi/linux/ndctl.h  |    2 
 10 files changed, 847 insertions(+), 4 deletions(-)
 create mode 100644 drivers/nvdimm/btt.h
 create mode 100644 drivers/nvdimm/btt_devs.c

diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 020acf860258..564c8c9b2af6 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -34,4 +34,7 @@ config BLK_DEV_PMEM
 
 	  Say Y if you want to use a NVDIMM described by NFIT
 
+config ND_BTT_DEVS
+	def_bool y
+
 endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index abce98f87f16..eb1bbce86592 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -11,3 +11,4 @@ libnvdimm-y += region_devs.o
 libnvdimm-y += region.o
 libnvdimm-y += namespace_devs.o
 libnvdimm-y += label.o
+libnvdimm-$(CONFIG_ND_BTT_DEVS) += btt_devs.o
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
new file mode 100644
index 000000000000..e8f6d8e0ddd3
--- /dev/null
+++ b/drivers/nvdimm/btt.h
@@ -0,0 +1,45 @@
+/*
+ * Block Translation Table library
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_BTT_H
+#define _LINUX_BTT_H
+
+#include <linux/types.h>
+
+#define BTT_SIG_LEN 16
+#define BTT_SIG "BTT_ARENA_INFO\0"
+
+struct btt_sb {
+	u8 signature[BTT_SIG_LEN];
+	u8 uuid[16];
+	u8 parent_uuid[16];
+	__le32 flags;
+	__le16 version_major;
+	__le16 version_minor;
+	__le32 external_lbasize;
+	__le32 external_nlba;
+	__le32 internal_lbasize;
+	__le32 internal_nlba;
+	__le32 nfree;
+	__le32 infosize;
+	__le64 nextoff;
+	__le64 dataoff;
+	__le64 mapoff;
+	__le64 logoff;
+	__le64 info2off;
+	u8 padding[3968];
+	__le64 checksum;
+};
+
+#endif
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
new file mode 100644
index 000000000000..f288ed7c490f
--- /dev/null
+++ b/drivers/nvdimm/btt_devs.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-private.h"
+#include "btt.h"
+#include "nd.h"
+
+static DEFINE_IDA(btt_ida);
+
+static void nd_btt_release(struct device *dev)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	dev_dbg(dev, "%s\n", __func__);
+	WARN_ON(nd_btt->backing_dev);
+	ndio_del_claim(nd_btt->ndio_claim);
+	ida_simple_remove(&btt_ida, nd_btt->id);
+	kfree(nd_btt->uuid);
+	kfree(nd_btt);
+}
+
+static struct device_type nd_btt_device_type = {
+	.name = "nd_btt",
+	.release = nd_btt_release,
+};
+
+bool is_nd_btt(struct device *dev)
+{
+	return dev->type == &nd_btt_device_type;
+}
+
+struct nd_btt *to_nd_btt(struct device *dev)
+{
+	struct nd_btt *nd_btt = container_of(dev, struct nd_btt, dev);
+
+	WARN_ON(!is_nd_btt(dev));
+	return nd_btt;
+}
+EXPORT_SYMBOL(to_nd_btt);
+
+static const unsigned long btt_lbasize_supported[] = { 512, 4096, 0 };
+
+static ssize_t sector_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf);
+}
+
+static ssize_t sector_size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize,
+			btt_lbasize_supported);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(sector_size);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nd_btt->uuid)
+		return sprintf(buf, "%pUb\n", nd_btt->uuid);
+	return sprintf(buf, "\n");
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t backing_dev_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	char name[BDEVNAME_SIZE];
+
+	if (nd_btt->backing_dev)
+		return sprintf(buf, "/dev/%s\n",
+				bdevname(nd_btt->backing_dev, name));
+	else
+		return sprintf(buf, "\n");
+}
+
+static const fmode_t nd_btt_devs_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
+
+static void nd_btt_ndio_notify_remove(struct nd_io_claim *ndio_claim)
+{
+	char bdev_name[BDEVNAME_SIZE];
+	struct nd_btt *nd_btt;
+
+	if (!ndio_claim || !ndio_claim->holder)
+		return;
+
+	nd_btt = to_nd_btt(ndio_claim->holder);
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
+	dev_dbg(&nd_btt->dev, "%pf: %s: release /dev/%s\n",
+			__builtin_return_address(0), __func__,
+			bdevname(nd_btt->backing_dev, bdev_name));
+	blkdev_put(nd_btt->backing_dev, nd_btt_devs_mode);
+	nd_btt->backing_dev = NULL;
+
+	/*
+	 * Once we've had our backing device removed we need to be fully
+	 * reconfigured.  The bus will have already created a new seed
+	 * for this purpose, so now is a good time to clean up this
+	 * stale nd_btt instance.
+	 */
+	if (nd_btt->dev.driver)
+		nd_device_unregister(&nd_btt->dev, ND_ASYNC);
+	else {
+		ndio_del_claim(ndio_claim);
+		nd_btt->ndio_claim = NULL;
+	}
+}
+
+static ssize_t __backing_dev_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	char bdev_name[BDEVNAME_SIZE];
+	struct block_device *bdev;
+	struct nd_io *ndio;
+	char *path;
+
+	if (dev->driver) {
+		dev_dbg(dev, "%s: -EBUSY\n", __func__);
+		return -EBUSY;
+	}
+
+	path = kstrndup(buf, len, GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	/* detach the backing device */
+	if (strcmp(strim(path), "") == 0) {
+		if (!nd_btt->backing_dev)
+			goto out;
+		nd_btt_ndio_notify_remove(nd_btt->ndio_claim);
+		goto out;
+	} else if (nd_btt->backing_dev) {
+		dev_dbg(dev, "backing_dev already set\n");
+		len = -EBUSY;
+		goto out;
+	}
+
+	bdev = blkdev_get_by_path(strim(path), nd_btt_devs_mode, nd_btt);
+	if (IS_ERR(bdev)) {
+		dev_dbg(dev, "open '%s' failed: %ld\n", strim(path),
+				PTR_ERR(bdev));
+		len = PTR_ERR(bdev);
+		goto out;
+	}
+
+	if (get_capacity(bdev->bd_disk) < SZ_16M / 512) {
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENXIO;
+		goto out;
+	}
+
+	ndio = ndio_lookup(nvdimm_bus, bdevname(bdev->bd_contains, bdev_name));
+	if (!ndio) {
+		dev_dbg(dev, "%s does not have an ndio interface\n",
+				strim(path));
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENXIO;
+		goto out;
+	}
+
+	nd_btt->ndio_claim = ndio_add_claim(ndio, &nd_btt->dev,
+			nd_btt_ndio_notify_remove);
+	if (!nd_btt->ndio_claim) {
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENOMEM;
+		goto out;
+	}
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
+	nd_btt->backing_dev = bdev;
+
+ out:
+	kfree(path);
+	return len;
+}
+
+static ssize_t backing_dev_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	device_lock(dev);
+	rc = __backing_dev_store(dev, attr, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RW(backing_dev);
+
+static bool is_nd_btt_idle(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nvdimm_bus->nd_btt == nd_btt || dev->driver || nd_btt->backing_dev)
+		return false;
+	return true;
+}
+
+static ssize_t delete_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	/* return 1 if can be deleted */
+	return sprintf(buf, "%d\n", is_nd_btt_idle(dev));
+}
+
+static ssize_t delete_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	unsigned long val;
+
+	/* write 1 to delete */
+	if (kstrtoul(buf, 0, &val) != 0 || val != 1)
+		return -EINVAL;
+
+	/* prevent deletion while this btt is active, or is the current seed */
+	if (!is_nd_btt_idle(dev))
+		return -EBUSY;
+
+	/*
+	 * userspace raced itself if device goes active here and it gets
+	 * to keep the pieces
+	 */
+	nd_device_unregister(dev, ND_ASYNC);
+
+	return len;
+}
+static DEVICE_ATTR_RW(delete);
+
+static struct attribute *nd_btt_attributes[] = {
+	&dev_attr_sector_size.attr,
+	&dev_attr_backing_dev.attr,
+	&dev_attr_delete.attr,
+	&dev_attr_uuid.attr,
+	NULL,
+};
+
+static struct attribute_group nd_btt_attribute_group = {
+	.attrs = nd_btt_attributes,
+};
+
+static const struct attribute_group *nd_btt_attribute_groups[] = {
+	&nd_btt_attribute_group,
+	&nd_device_attribute_group,
+	NULL,
+};
+
+static struct nd_btt *__nd_btt_create(struct nvdimm_bus *nvdimm_bus,
+		unsigned long lbasize, u8 *uuid)
+{
+	struct nd_btt *nd_btt = kzalloc(sizeof(*nd_btt), GFP_KERNEL);
+	struct device *dev;
+
+	if (!nd_btt)
+		return NULL;
+	nd_btt->id = ida_simple_get(&btt_ida, 0, 0, GFP_KERNEL);
+	if (nd_btt->id < 0) {
+		kfree(nd_btt);
+		return NULL;
+	}
+
+	nd_btt->lbasize = lbasize;
+	if (uuid)
+		uuid = kmemdup(uuid, 16, GFP_KERNEL);
+	nd_btt->uuid = uuid;
+	dev = &nd_btt->dev;
+	dev_set_name(dev, "btt%d", nd_btt->id);
+	dev->parent = &nvdimm_bus->dev;
+	dev->type = &nd_btt_device_type;
+	dev->groups = nd_btt_attribute_groups;
+	return nd_btt;
+}
+
+struct nd_btt *nd_btt_create(struct nvdimm_bus *nvdimm_bus)
+{
+	struct nd_btt *nd_btt = __nd_btt_create(nvdimm_bus, 0, NULL);
+
+	if (!nd_btt)
+		return NULL;
+	nd_device_register(&nd_btt->dev);
+	return nd_btt;
+}
+
+/*
+ * nd_btt_sb_checksum: compute checksum for btt info block
+ *
+ * Returns a fletcher64 checksum of everything in the given info block
+ * except the last field (since that's where the checksum lives).
+ */
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
+{
+	u64 sum, sum_save;
+
+	sum_save = btt_sb->checksum;
+	btt_sb->checksum = 0;
+	sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
+	btt_sb->checksum = sum_save;
+	return sum;
+}
+EXPORT_SYMBOL(nd_btt_sb_checksum);
+
+static int nd_btt_autodetect(struct nvdimm_bus *nvdimm_bus, struct nd_io *ndio,
+		struct block_device *bdev)
+{
+	char name[BDEVNAME_SIZE];
+	struct nd_btt *nd_btt;
+	struct btt_sb *btt_sb;
+	u64 offset, checksum;
+	u32 lbasize;
+	u8 *uuid;
+	int rc;
+
+	btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL);
+	if (!btt_sb)
+		return -ENODEV;
+
+	offset = nd_partition_offset(bdev);
+	rc = ndio->rw_bytes(ndio, btt_sb, offset + SZ_4K, sizeof(*btt_sb), READ);
+	if (rc)
+		goto out_free_sb;
+
+	if (get_capacity(bdev->bd_disk) < SZ_16M / 512)
+		goto out_free_sb;
+
+	if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
+		goto out_free_sb;
+
+	checksum = le64_to_cpu(btt_sb->checksum);
+	btt_sb->checksum = 0;
+	if (checksum != nd_btt_sb_checksum(btt_sb))
+		goto out_free_sb;
+	btt_sb->checksum = cpu_to_le64(checksum);
+
+	uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
+	if (!uuid)
+		goto out_free_sb;
+
+	lbasize = le32_to_cpu(btt_sb->external_lbasize);
+	nd_btt = __nd_btt_create(nvdimm_bus, lbasize, uuid);
+	if (!nd_btt)
+		goto out_free_uuid;
+
+	device_initialize(&nd_btt->dev);
+	nd_btt->ndio_claim = ndio_add_claim(ndio, &nd_btt->dev,
+			nd_btt_ndio_notify_remove);
+	if (!nd_btt->ndio_claim)
+		goto out_free_btt;
+
+	nd_btt->backing_dev = bdev;
+	dev_dbg(&nd_btt->dev, "%s: activate %s\n", __func__,
+			bdevname(bdev, name));
+	__nd_device_register(&nd_btt->dev);
+	kfree(btt_sb);
+	return 0;
+
+ out_free_btt:
+	kfree(nd_btt);
+ out_free_uuid:
+	kfree(uuid);
+ out_free_sb:
+	kfree(btt_sb);
+
+	return -ENODEV;
+}
+
+void nd_btt_notify_ndio(struct nvdimm_bus *nvdimm_bus, struct nd_io *ndio)
+{
+	struct disk_part_iter piter;
+	struct hd_struct *part;
+
+	disk_part_iter_init(&piter, ndio->disk, DISK_PITER_INCL_PART0);
+	while ((part = disk_part_iter_next(&piter))) {
+		struct block_device *bdev;
+		int rc;
+
+		bdev = bdget_disk(ndio->disk, part->partno);
+		if (!bdev)
+			continue;
+		if (blkdev_get(bdev, nd_btt_devs_mode, nvdimm_bus) != 0)
+			continue;
+		rc = nd_btt_autodetect(nvdimm_bus, ndio, bdev);
+		if (rc)
+			blkdev_put(bdev, nd_btt_devs_mode);
+		/* no need to scan further in the case of whole disk btt */
+		if (rc == 0 && part->partno == 0)
+			break;
+	}
+	disk_part_iter_exit(&piter);
+}
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index fbf21dbd80fa..5c977c1306db 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/fcntl.h>
 #include <linux/async.h>
+#include <linux/genhd.h>
 #include <linux/ndctl.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -40,6 +41,8 @@ static int to_nd_device_type(struct device *dev)
 		return ND_DEVICE_REGION_BLK;
 	else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
 		return nd_region_to_namespace_type(to_nd_region(dev->parent));
+	else if (is_nd_btt(dev))
+		return ND_DEVICE_BTT;
 
 	return 0;
 }
@@ -84,6 +87,21 @@ static int nvdimm_bus_probe(struct device *dev)
 
 	dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
 			dev_name(dev), rc);
+
+	/* check if our btt-seed has sprouted, and plant another */
+	if (rc == 0 && is_nd_btt(dev) && dev == &nvdimm_bus->nd_btt->dev) {
+		const char *sep = "", *name = "", *status = "failed";
+
+		nvdimm_bus->nd_btt = nd_btt_create(nvdimm_bus);
+		if (nvdimm_bus->nd_btt) {
+			status = "succeeded";
+			sep = ": ";
+			name = dev_name(&nvdimm_bus->nd_btt->dev);
+		}
+		dev_dbg(&nvdimm_bus->dev, "btt seed creation %s%s%s\n",
+				status, sep, name);
+	}
+
 	if (rc != 0)
 		module_put(provider);
 	return rc;
@@ -144,14 +162,19 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie)
 	put_device(dev);
 }
 
-void nd_device_register(struct device *dev)
+void __nd_device_register(struct device *dev)
 {
 	dev->bus = &nvdimm_bus_type;
-	device_initialize(dev);
 	get_device(dev);
 	async_schedule_domain(nd_async_device_register, dev,
 			&nd_async_domain);
 }
+
+void nd_device_register(struct device *dev)
+{
+	device_initialize(dev);
+	__nd_device_register(dev);
+}
 EXPORT_SYMBOL(nd_device_register);
 
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
@@ -200,6 +223,107 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
 }
 EXPORT_SYMBOL(__nd_driver_register);
 
+/**
+ * nd_register_ndio() - register byte-aligned access capability for an nd-bdev
+ * @disk: child gendisk of the ndio namepace device
+ * @ndio: initialized ndio instance to register
+ *
+ * LOCKING: hold nvdimm_bus_lock() over the creation of ndio->disk and the
+ * subsequent nd_region_ndio event
+ */
+int nd_register_ndio(struct nd_io *ndio)
+{
+	struct nvdimm_bus *nvdimm_bus;
+	struct device *dev;
+
+	if (!ndio || !ndio->dev || !ndio->disk || !list_empty(&ndio->list)
+			|| !ndio->rw_bytes || !list_empty(&ndio->claims)) {
+		pr_debug("%s bad parameters from %pf\n", __func__,
+				__builtin_return_address(0));
+		return -EINVAL;
+	}
+
+	dev = ndio->dev;
+	nvdimm_bus = walk_to_nvdimm_bus(dev);
+	if (!nvdimm_bus)
+		return -EINVAL;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev));
+	list_add(&ndio->list, &nvdimm_bus->ndios);
+
+	/* TODO: generic infrastructure for 3rd party ndio claimers */
+	nd_btt_notify_ndio(nvdimm_bus, ndio);
+
+	return 0;
+}
+EXPORT_SYMBOL(nd_register_ndio);
+
+/**
+ * __nd_unregister_ndio() - try to remove an ndio interface
+ * @ndio: interface to remove
+ */
+static int __nd_unregister_ndio(struct nd_io *ndio)
+{
+	struct nd_io_claim *ndio_claim, *_n;
+	struct nvdimm_bus *nvdimm_bus;
+	LIST_HEAD(claims);
+
+	nvdimm_bus = walk_to_nvdimm_bus(ndio->dev);
+	if (!nvdimm_bus || list_empty(&ndio->list))
+		return -ENXIO;
+
+	spin_lock(&ndio->lock);
+	list_splice_init(&ndio->claims, &claims);
+	spin_unlock(&ndio->lock);
+
+	list_for_each_entry_safe(ndio_claim, _n, &claims, list)
+		ndio_claim->notify_remove(ndio_claim);
+
+	list_del_init(&ndio->list);
+
+	return 0;
+}
+
+int nd_unregister_ndio(struct nd_io *ndio)
+{
+	struct device *dev = ndio->dev;
+	int rc;
+
+	nvdimm_bus_lock(dev);
+	rc = __nd_unregister_ndio(ndio);
+	nvdimm_bus_unlock(dev);
+
+	/*
+	 * Flush in case ->notify_remove() kicked off asynchronous device
+	 * unregistration
+	 */
+	nd_synchronize();
+
+	return rc;
+}
+EXPORT_SYMBOL(nd_unregister_ndio);
+
+static struct nd_io *__ndio_lookup(struct nvdimm_bus *nvdimm_bus, const char *diskname)
+{
+	struct nd_io *ndio;
+
+	list_for_each_entry(ndio, &nvdimm_bus->ndios, list)
+		if (strcmp(diskname, ndio->disk->disk_name) == 0)
+			return ndio;
+
+	return NULL;
+}
+
+struct nd_io *ndio_lookup(struct nvdimm_bus *nvdimm_bus, const char *diskname)
+{
+	struct nd_io *ndio;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev));
+	ndio = __ndio_lookup(nvdimm_bus, diskname);
+
+	return ndio;
+}
+
 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 		char *buf)
 {
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 3cff35a94fb1..f4e638f6e721 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -55,6 +55,62 @@ bool is_nvdimm_bus_locked(struct device *dev)
 }
 EXPORT_SYMBOL(is_nvdimm_bus_locked);
 
+void nd_init_ndio(struct nd_io *ndio, nd_rw_bytes_fn rw_bytes,
+		struct device *dev, struct gendisk *disk, unsigned long align)
+{
+	memset(ndio, 0, sizeof(*ndio));
+	INIT_LIST_HEAD(&ndio->claims);
+	INIT_LIST_HEAD(&ndio->list);
+	spin_lock_init(&ndio->lock);
+	ndio->dev = dev;
+	ndio->disk = disk;
+	ndio->align = align;
+	ndio->rw_bytes = rw_bytes;
+}
+EXPORT_SYMBOL(nd_init_ndio);
+
+void ndio_del_claim(struct nd_io_claim *ndio_claim)
+{
+	struct nd_io *ndio;
+	struct device *holder;
+
+	if (!ndio_claim)
+		return;
+	ndio = ndio_claim->parent;
+	holder = ndio_claim->holder;
+
+	dev_dbg(holder, "%s: drop %s\n", __func__, dev_name(ndio->dev));
+	spin_lock(&ndio->lock);
+	list_del(&ndio_claim->list);
+	spin_unlock(&ndio->lock);
+	put_device(ndio->dev);
+	kfree(ndio_claim);
+	put_device(holder);
+}
+
+struct nd_io_claim *ndio_add_claim(struct nd_io *ndio, struct device *holder,
+		ndio_notify_remove_fn notify_remove)
+{
+	struct nd_io_claim *ndio_claim = kzalloc(sizeof(*ndio_claim), GFP_KERNEL);
+
+	if (!ndio_claim)
+		return NULL;
+
+	INIT_LIST_HEAD(&ndio_claim->list);
+	ndio_claim->parent = ndio;
+	get_device(ndio->dev);
+
+	spin_lock(&ndio->lock);
+	list_add(&ndio_claim->list, &ndio->claims);
+	spin_unlock(&ndio->lock);
+
+	ndio_claim->holder = holder;
+	ndio_claim->notify_remove = notify_remove;
+	get_device(holder);
+
+	return ndio_claim;
+}
+
 u64 nd_fletcher64(void *addr, size_t len, bool le)
 {
 	u32 *buf = addr;
@@ -75,6 +131,8 @@ static void nvdimm_bus_release(struct device *dev)
 {
 	struct nvdimm_bus *nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
 
+	WARN_ON(!list_empty(&nvdimm_bus->ndios));
+
 	ida_simple_remove(&nd_ida, nvdimm_bus->id);
 	kfree(nvdimm_bus);
 }
@@ -271,10 +329,28 @@ static ssize_t wait_probe_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(wait_probe);
 
+static ssize_t btt_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (nvdimm_bus->nd_btt)
+		rc = sprintf(buf, "%s\n", dev_name(&nvdimm_bus->nd_btt->dev));
+	else
+		rc = sprintf(buf, "\n");
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(btt_seed);
+
 static struct attribute *nvdimm_bus_attributes[] = {
 	&dev_attr_commands.attr,
 	&dev_attr_wait_probe.attr,
 	&dev_attr_provider.attr,
+	&dev_attr_btt_seed.attr,
 	NULL,
 };
 
@@ -291,6 +367,7 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
 
 	if (!nvdimm_bus)
 		return NULL;
+	INIT_LIST_HEAD(&nvdimm_bus->ndios);
 	INIT_LIST_HEAD(&nvdimm_bus->list);
 	init_waitqueue_head(&nvdimm_bus->probe_wait);
 	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
@@ -319,6 +396,8 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
 	list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
 	mutex_unlock(&nvdimm_bus_list_mutex);
 
+	nvdimm_bus->nd_btt = nd_btt_create(nvdimm_bus);
+
 	return nvdimm_bus;
  err:
 	put_device(&nvdimm_bus->dev);
diff --git a/drivers/nvdimm/nd-private.h b/drivers/nvdimm/nd-private.h
index b0eed3bcb76d..635af040023f 100644
--- a/drivers/nvdimm/nd-private.h
+++ b/drivers/nvdimm/nd-private.h
@@ -23,14 +23,21 @@ extern struct list_head nvdimm_bus_list;
 extern struct mutex nvdimm_bus_list_mutex;
 extern int nvdimm_major;
 
+struct block_device;
+struct nd_io_claim;
+struct nd_btt;
+struct nd_io;
+
 struct nvdimm_bus {
 	struct nvdimm_bus_descriptor *nd_desc;
 	wait_queue_head_t probe_wait;
 	struct module *module;
+	struct list_head ndios;
 	struct list_head list;
 	struct device dev;
 	int id, probe_active;
 	struct mutex reconfig_mutex;
+	struct nd_btt *nd_btt;
 };
 
 struct nvdimm {
@@ -42,9 +49,29 @@ struct nvdimm {
 	int id;
 };
 
+struct nd_io *ndio_lookup(struct nvdimm_bus *nvdimm_bus, const char *diskname);
 bool is_nvdimm(struct device *dev);
 bool is_nd_blk(struct device *dev);
 bool is_nd_pmem(struct device *dev);
+#if IS_ENABLED(CONFIG_ND_BTT_DEVS)
+bool is_nd_btt(struct device *dev);
+struct nd_btt *nd_btt_create(struct nvdimm_bus *nvdimm_bus);
+void nd_btt_notify_ndio(struct nvdimm_bus *nvdimm_bus, struct nd_io *ndio);
+#else
+static inline bool is_nd_btt(struct device *dev)
+{
+	return false;
+}
+
+static inline struct nd_btt *nd_btt_create(struct nvdimm_bus *nvdimm_bus)
+{
+	return NULL;
+}
+
+static inline void nd_btt_notify_ndio(struct nvdimm_bus *nvdimm_bus, struct nd_io *ndio)
+{
+}
+#endif
 struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
 int __init nvdimm_bus_init(void);
 void nvdimm_bus_exit(void);
@@ -59,6 +86,7 @@ void nd_synchronize(void);
 int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
 int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
 int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus);
+void __nd_device_register(struct device *dev);
 int nd_match_dimm(struct device *dev, void *data);
 struct nd_label_id;
 char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index c3ffb5174e8f..a695187330ff 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -14,11 +14,17 @@
 #define __ND_H__
 #include <linux/libnvdimm.h>
 #include <linux/device.h>
+#include <linux/genhd.h>
 #include <linux/mutex.h>
 #include <linux/ndctl.h>
 #include <linux/types.h>
+#include <linux/fs.h>
 #include "label.h"
 
+enum {
+	SECTOR_SHIFT = 9,
+};
+
 struct nvdimm_drvdata {
 	struct device *dev;
 	int nsindex_size;
@@ -111,6 +117,84 @@ static inline unsigned nd_inc_seq(unsigned seq)
 	return next[seq & 3];
 }
 
+struct nd_io;
+/**
+ * nd_rw_bytes_fn() - access bytes relative to the "whole disk" namespace device
+ * @ndio: per-namespace context
+ * @buf: source / target for the write / read
+ * @offset: offset relative to the start of the namespace device
+ * @n: num bytes to access
+ * @flags: READ, WRITE, and other REQ_* flags
+ *
+ * Note: Implementations may assume that offset + n never crosses ndio->align
+ */
+typedef int (*nd_rw_bytes_fn)(struct nd_io *ndio, void *buf, size_t offset,
+		size_t n, unsigned long flags);
+#define nd_data_dir(flags) (flags & 1)
+
+/**
+ * struct nd_io - info for byte-aligned access to nd devices
+ * @rw_bytes: operation to perform byte-aligned access
+ * @align: a single ->rw_bytes() request may not cross this alignment
+ * @gendisk: whole disk block device for the namespace
+ * @list: for the core to cache a list of "ndio"s for later association
+ * @dev: namespace device
+ * @claims: list of clients using this interface
+ * @lock: protect @claims mutation
+ */
+struct nd_io {
+	nd_rw_bytes_fn rw_bytes;
+	unsigned long align;
+	struct gendisk *disk;
+	struct list_head list;
+	struct device *dev;
+	struct list_head claims;
+	spinlock_t lock;
+};
+
+struct nd_io_claim;
+typedef void (*ndio_notify_remove_fn)(struct nd_io_claim *ndio_claim);
+
+/**
+ * struct nd_io_claim - instance of a claim on a parent ndio
+ * @notify_remove: ndio is going away, release resources
+ * @holder: object that has claimed this ndio
+ * @parent: ndio in use
+ * @holder: holder device
+ * @list: claim peers
+ *
+ * An ndio may be claimed multiple times, consider the case of a btt
+ * instance per partition on a namespace.
+ */
+struct nd_io_claim {
+	struct nd_io *parent;
+	ndio_notify_remove_fn notify_remove;
+	struct list_head list;
+	struct device *holder;
+};
+
+struct nd_btt {
+	struct device dev;
+	struct nd_io *ndio;
+	struct block_device *backing_dev;
+	unsigned long lbasize;
+	u8 *uuid;
+	u64 offset;
+	int id;
+	struct nd_io_claim *ndio_claim;
+};
+
+static inline u64 nd_partition_offset(struct block_device *bdev)
+{
+	struct hd_struct *p;
+
+	if (bdev == bdev->bd_contains)
+		return 0;
+
+	p = bdev->bd_part;
+	return ((u64) p->start_sect) << SECTOR_SHIFT;
+}
+
 enum nd_async_mode {
 	ND_SYNC,
 	ND_ASYNC,
@@ -125,6 +209,13 @@ ssize_t nd_sector_size_show(unsigned long current_lbasize,
 		const unsigned long *supported, char *buf);
 ssize_t nd_sector_size_store(struct device *dev, const char *buf,
 		unsigned long *current_lbasize, const unsigned long *supported);
+int nd_register_ndio(struct nd_io *ndio);
+int nd_unregister_ndio(struct nd_io *ndio);
+void nd_init_ndio(struct nd_io *ndio, nd_rw_bytes_fn rw_bytes,
+		struct device *dev, struct gendisk *disk, unsigned long align);
+void ndio_del_claim(struct nd_io_claim *ndio_claim);
+struct nd_io_claim *ndio_add_claim(struct nd_io *ndio, struct device *holder,
+		ndio_notify_remove_fn notify_remove);
 int __init nvdimm_init(void);
 int __init nd_region_init(void);
 void nvdimm_exit(void);
@@ -135,6 +226,9 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
 		void *buf, size_t len);
+struct nd_btt *to_nd_btt(struct device *dev);
+struct btt_sb;
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
 struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_namespace_type(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 7e7421d9c167..0a85a9ca3ca4 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -29,6 +29,7 @@
 struct pmem_device {
 	struct request_queue	*pmem_queue;
 	struct gendisk		*pmem_disk;
+	struct nd_io		ndio;
 
 	/* One contiguous memory region per device */
 	phys_addr_t		phys_addr;
@@ -96,6 +97,26 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	return 0;
 }
 
+static int pmem_rw_bytes(struct nd_io *ndio, void *buf, size_t offset,
+		size_t n, unsigned long flags)
+{
+	struct pmem_device *pmem = container_of(ndio, typeof(*pmem), ndio);
+	int rw = nd_data_dir(flags);
+
+	if (unlikely(offset + n > pmem->size)) {
+		dev_WARN_ONCE(ndio->dev, 1, "%s: request out of range\n",
+				__func__);
+		return -EFAULT;
+	}
+
+	if (rw == READ)
+		memcpy(buf, pmem->virt_addr + offset, n);
+	else
+		memcpy(pmem->virt_addr + offset, buf, n);
+
+	return 0;
+}
+
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
 			      void **kaddr, unsigned long *pfn, long size)
 {
@@ -169,8 +190,6 @@ static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res,
 	set_capacity(disk, pmem->size >> 9);
 	pmem->pmem_disk = disk;
 
-	add_disk(disk);
-
 	return pmem;
 
 out_free_queue:
@@ -222,7 +241,12 @@ static int nd_pmem_probe(struct device *dev)
 	if (IS_ERR(pmem))
 		return PTR_ERR(pmem);
 
+	nvdimm_bus_lock(dev);
+	add_disk(pmem->pmem_disk);
 	dev_set_drvdata(dev, pmem);
+	nd_init_ndio(&pmem->ndio, pmem_rw_bytes, dev, pmem->pmem_disk, 0);
+	nd_register_ndio(&pmem->ndio);
+	nvdimm_bus_unlock(dev);
 
 	return 0;
 }
@@ -231,6 +255,7 @@ static int nd_pmem_remove(struct device *dev)
 {
 	struct pmem_device *pmem = dev_get_drvdata(dev);
 
+	nd_unregister_ndio(&pmem->ndio);
 	pmem_free(pmem);
 	return 0;
 }
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index ac6d40f4fa7e..6f620838ba5a 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -181,6 +181,7 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
 #define ND_DEVICE_NAMESPACE_IO 4    /* legacy persistent memory */
 #define ND_DEVICE_NAMESPACE_PMEM 5  /* persistent memory namespace (may alias) */
 #define ND_DEVICE_NAMESPACE_BLK 6   /* block-data-window namespace (may alias) */
+#define ND_DEVICE_BTT 7		    /* block-translation table device */
 
 enum nd_driver_flags {
 	ND_DRIVER_DIMM            = 1 << ND_DEVICE_DIMM,
@@ -189,6 +190,7 @@ enum nd_driver_flags {
 	ND_DRIVER_NAMESPACE_IO    = 1 << ND_DEVICE_NAMESPACE_IO,
 	ND_DRIVER_NAMESPACE_PMEM  = 1 << ND_DEVICE_NAMESPACE_PMEM,
 	ND_DRIVER_NAMESPACE_BLK   = 1 << ND_DEVICE_NAMESPACE_BLK,
+	ND_DRIVER_BTT		  = 1 << ND_DEVICE_BTT,
 };
 
 enum {


WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@ml01.01.org
Cc: axboe@kernel.dk, sfr@canb.auug.org.au, rafael@kernel.org,
	neilb@suse.de, gregkh@linuxfoundation.org,
	linux-kernel@vger.kernel.org, mingo@kernel.org,
	linux-acpi@vger.kernel.org, jmoyer@redhat.com,
	linux-api@vger.kernel.org, akpm@linux-foundation.org, hch@lst.de
Subject: [PATCH v5 17/21] libnvdimm: infrastructure for btt devices
Date: Mon, 01 Jun 2015 20:15:41 -0400	[thread overview]
Message-ID: <20150602001541.4506.90125.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <20150602001134.4506.45867.stgit@dwillia2-desk3.amr.corp.intel.com>

Block devices from an nd bus, in addition to accepting "struct bio"
based requests, also have the capability to perform byte-aligned
accesses.  By default only the bio/block interface is used.  However, if
another driver can make effective use of the byte-aligned capability it
can claim/disable the block interface and use the byte-aligned "nd_io"
interface.

The BTT driver is the initial first consumer of this mechanism to allow
layering atomic sector update guarantees on top of nd_io capable
libnvdimm-block-devices, or their partitions.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/Kconfig      |    3 
 drivers/nvdimm/Makefile     |    1 
 drivers/nvdimm/btt.h        |   45 ++++
 drivers/nvdimm/btt_devs.c   |  442 +++++++++++++++++++++++++++++++++++++++++++
 drivers/nvdimm/bus.c        |  128 ++++++++++++
 drivers/nvdimm/core.c       |   79 ++++++++
 drivers/nvdimm/nd-private.h |   28 +++
 drivers/nvdimm/nd.h         |   94 +++++++++
 drivers/nvdimm/pmem.c       |   29 +++
 include/uapi/linux/ndctl.h  |    2 
 10 files changed, 847 insertions(+), 4 deletions(-)
 create mode 100644 drivers/nvdimm/btt.h
 create mode 100644 drivers/nvdimm/btt_devs.c

diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 020acf860258..564c8c9b2af6 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -34,4 +34,7 @@ config BLK_DEV_PMEM
 
 	  Say Y if you want to use a NVDIMM described by NFIT
 
+config ND_BTT_DEVS
+	def_bool y
+
 endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index abce98f87f16..eb1bbce86592 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -11,3 +11,4 @@ libnvdimm-y += region_devs.o
 libnvdimm-y += region.o
 libnvdimm-y += namespace_devs.o
 libnvdimm-y += label.o
+libnvdimm-$(CONFIG_ND_BTT_DEVS) += btt_devs.o
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
new file mode 100644
index 000000000000..e8f6d8e0ddd3
--- /dev/null
+++ b/drivers/nvdimm/btt.h
@@ -0,0 +1,45 @@
+/*
+ * Block Translation Table library
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_BTT_H
+#define _LINUX_BTT_H
+
+#include <linux/types.h>
+
+#define BTT_SIG_LEN 16
+#define BTT_SIG "BTT_ARENA_INFO\0"
+
+struct btt_sb {
+	u8 signature[BTT_SIG_LEN];
+	u8 uuid[16];
+	u8 parent_uuid[16];
+	__le32 flags;
+	__le16 version_major;
+	__le16 version_minor;
+	__le32 external_lbasize;
+	__le32 external_nlba;
+	__le32 internal_lbasize;
+	__le32 internal_nlba;
+	__le32 nfree;
+	__le32 infosize;
+	__le64 nextoff;
+	__le64 dataoff;
+	__le64 mapoff;
+	__le64 logoff;
+	__le64 info2off;
+	u8 padding[3968];
+	__le64 checksum;
+};
+
+#endif
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
new file mode 100644
index 000000000000..f288ed7c490f
--- /dev/null
+++ b/drivers/nvdimm/btt_devs.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-private.h"
+#include "btt.h"
+#include "nd.h"
+
+static DEFINE_IDA(btt_ida);
+
+static void nd_btt_release(struct device *dev)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	dev_dbg(dev, "%s\n", __func__);
+	WARN_ON(nd_btt->backing_dev);
+	ndio_del_claim(nd_btt->ndio_claim);
+	ida_simple_remove(&btt_ida, nd_btt->id);
+	kfree(nd_btt->uuid);
+	kfree(nd_btt);
+}
+
+static struct device_type nd_btt_device_type = {
+	.name = "nd_btt",
+	.release = nd_btt_release,
+};
+
+bool is_nd_btt(struct device *dev)
+{
+	return dev->type == &nd_btt_device_type;
+}
+
+struct nd_btt *to_nd_btt(struct device *dev)
+{
+	struct nd_btt *nd_btt = container_of(dev, struct nd_btt, dev);
+
+	WARN_ON(!is_nd_btt(dev));
+	return nd_btt;
+}
+EXPORT_SYMBOL(to_nd_btt);
+
+static const unsigned long btt_lbasize_supported[] = { 512, 4096, 0 };
+
+static ssize_t sector_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf);
+}
+
+static ssize_t sector_size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize,
+			btt_lbasize_supported);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(sector_size);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nd_btt->uuid)
+		return sprintf(buf, "%pUb\n", nd_btt->uuid);
+	return sprintf(buf, "\n");
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t backing_dev_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	char name[BDEVNAME_SIZE];
+
+	if (nd_btt->backing_dev)
+		return sprintf(buf, "/dev/%s\n",
+				bdevname(nd_btt->backing_dev, name));
+	else
+		return sprintf(buf, "\n");
+}
+
+static const fmode_t nd_btt_devs_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
+
+static void nd_btt_ndio_notify_remove(struct nd_io_claim *ndio_claim)
+{
+	char bdev_name[BDEVNAME_SIZE];
+	struct nd_btt *nd_btt;
+
+	if (!ndio_claim || !ndio_claim->holder)
+		return;
+
+	nd_btt = to_nd_btt(ndio_claim->holder);
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
+	dev_dbg(&nd_btt->dev, "%pf: %s: release /dev/%s\n",
+			__builtin_return_address(0), __func__,
+			bdevname(nd_btt->backing_dev, bdev_name));
+	blkdev_put(nd_btt->backing_dev, nd_btt_devs_mode);
+	nd_btt->backing_dev = NULL;
+
+	/*
+	 * Once we've had our backing device removed we need to be fully
+	 * reconfigured.  The bus will have already created a new seed
+	 * for this purpose, so now is a good time to clean up this
+	 * stale nd_btt instance.
+	 */
+	if (nd_btt->dev.driver)
+		nd_device_unregister(&nd_btt->dev, ND_ASYNC);
+	else {
+		ndio_del_claim(ndio_claim);
+		nd_btt->ndio_claim = NULL;
+	}
+}
+
+static ssize_t __backing_dev_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	char bdev_name[BDEVNAME_SIZE];
+	struct block_device *bdev;
+	struct nd_io *ndio;
+	char *path;
+
+	if (dev->driver) {
+		dev_dbg(dev, "%s: -EBUSY\n", __func__);
+		return -EBUSY;
+	}
+
+	path = kstrndup(buf, len, GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	/* detach the backing device */
+	if (strcmp(strim(path), "") == 0) {
+		if (!nd_btt->backing_dev)
+			goto out;
+		nd_btt_ndio_notify_remove(nd_btt->ndio_claim);
+		goto out;
+	} else if (nd_btt->backing_dev) {
+		dev_dbg(dev, "backing_dev already set\n");
+		len = -EBUSY;
+		goto out;
+	}
+
+	bdev = blkdev_get_by_path(strim(path), nd_btt_devs_mode, nd_btt);
+	if (IS_ERR(bdev)) {
+		dev_dbg(dev, "open '%s' failed: %ld\n", strim(path),
+				PTR_ERR(bdev));
+		len = PTR_ERR(bdev);
+		goto out;
+	}
+
+	if (get_capacity(bdev->bd_disk) < SZ_16M / 512) {
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENXIO;
+		goto out;
+	}
+
+	ndio = ndio_lookup(nvdimm_bus, bdevname(bdev->bd_contains, bdev_name));
+	if (!ndio) {
+		dev_dbg(dev, "%s does not have an ndio interface\n",
+				strim(path));
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENXIO;
+		goto out;
+	}
+
+	nd_btt->ndio_claim = ndio_add_claim(ndio, &nd_btt->dev,
+			nd_btt_ndio_notify_remove);
+	if (!nd_btt->ndio_claim) {
+		blkdev_put(bdev, nd_btt_devs_mode);
+		len = -ENOMEM;
+		goto out;
+	}
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
+	nd_btt->backing_dev = bdev;
+
+ out:
+	kfree(path);
+	return len;
+}
+
+static ssize_t backing_dev_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	device_lock(dev);
+	rc = __backing_dev_store(dev, attr, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RW(backing_dev);
+
+static bool is_nd_btt_idle(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nvdimm_bus->nd_btt == nd_btt || dev->driver || nd_btt->backing_dev)
+		return false;
+	return true;
+}
+
+static ssize_t delete_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	/* return 1 if can be deleted */
+	return sprintf(buf, "%d\n", is_nd_btt_idle(dev));
+}
+
+static ssize_t delete_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	unsigned long val;
+
+	/* write 1 to delete */
+	if (kstrtoul(buf, 0, &val) != 0 || val != 1)
+		return -EINVAL;
+
+	/* prevent deletion while this btt is active, or is the current seed */
+	if (!is_nd_btt_idle(dev))
+		return -EBUSY;
+
+	/*
+	 * userspace raced itself if device goes active here and it gets
+	 * to keep the pieces
+	 */
+	nd_device_unregister(dev, ND_ASYNC);
+
+	return len;
+}
+static DEVICE_ATTR_RW(delete);
+
+static struct attribute *nd_btt_attributes[] = {
+	&dev_attr_sector_size.attr,
+	&dev_attr_backing_dev.attr,
+	&dev_attr_delete.attr,
+	&dev_attr_uuid.attr,
+	NULL,
+};
+
+static struct attribute_group nd_btt_attribute_group = {
+	.attrs = nd_btt_attributes,
+};
+
+static const struct attribute_group *nd_btt_attribute_groups[] = {
+	&nd_btt_attribute_group,
+	&nd_device_attribute_group,
+	NULL,
+};
+
+static struct nd_btt *__nd_btt_create(struct nvdimm_bus *nvdimm_bus,
+		unsigned long lbasize, u8 *uuid)
+{
+	struct nd_btt *nd_btt = kzalloc(sizeof(*nd_btt), GFP_KERNEL);
+	struct device *dev;
+
+	if (!nd_btt)
+		return NULL;
+	nd_btt->id = ida_simple_get(&btt_ida, 0, 0, GFP_KERNEL);
+	if (nd_btt->id < 0) {
+		kfree(nd_btt);
+		return NULL;
+	}
+
+	nd_btt->lbasize = lbasize;
+	if (uuid)
+		uuid = kmemdup(uuid, 16, GFP_KERNEL);
+	nd_btt->uuid = uuid;
+	dev = &nd_btt->dev;
+	dev_set_name(dev, "btt%d", nd_btt->id);
+	dev->parent = &nvdimm_bus->dev;
+	dev->type = &nd_btt_device_type;
+	dev->groups = nd_btt_attribute_groups;
+	return nd_btt;
+}
+
+struct nd_btt *nd_btt_create(struct nvdimm_bus *nvdimm_bus)
+{
+	struct nd_btt *nd_btt = __nd_btt_create(nvdimm_bus, 0, NULL);
+
+	if (!nd_btt)
+		return NULL;
+	nd_device_register(&nd_btt->dev);
+	return nd_btt;
+}
+
+/*
+ * nd_btt_sb_checksum: compute checksum for btt info block
+ *
+ * Returns a fletcher64 checksum of everything in the given info block
+ * except the last field (since that's where the checksum lives).
+ */
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
+{
+	u64 sum, sum_save;
+
+	sum_save = btt_sb->checksum;
+	btt_sb->checksum = 0;
+	sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
+	btt_sb->checksum = sum_save;
+	return sum;
+}
+EXPORT_SYMBOL(nd_btt_sb_checksum);
+
+static int nd_btt_autodetect(struct nvdimm_bus *nvdimm_bus, struct nd_io *ndio,
+		struct block_device *bdev)
+{
+	char name[BDEVNAME_SIZE];
+	struct nd_btt *nd_btt;
+	struct btt_sb *btt_sb;
+	u64 offset, checksum;
+	u32 lbasize;
+	u8 *uuid;
+	int rc;
+
+	btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL);
+	if (!btt_sb)
+		return -ENODEV;
+
+	offset = nd_partition_offset(bdev);
+	rc = ndio->rw_bytes(ndio, btt_sb, offset + SZ_4K, sizeof(*btt_sb), READ);
+	if (rc)
+		goto out_free_sb;
+
+	if (get_capacity(bdev->bd_disk) < SZ_16M / 512)
+		goto out_free_sb;
+
+	if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
+		goto out_free_sb;
+
+	checksum = le64_to_cpu(btt_sb->checksum);
+	btt_sb->checksum = 0;
+	if (checksum != nd_btt_sb_checksum(btt_sb))
+		goto out_free_sb;
+	btt_sb->checksum = cpu_to_le64(checksum);
+
+	uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
+	if (!uuid)
+		goto out_free_sb;
+
+	lbasize = le32_to_cpu(btt_sb->external_lbasize);
+	nd_btt = __nd_btt_create(nvdimm_bus, lbasize, uuid);
+	if (!nd_btt)
+		goto out_free_uuid;
+
+	device_initialize(&nd_btt->dev);
+	nd_btt->ndio_claim = ndio_add_claim(ndio, &nd_btt->dev,
+			nd_btt_ndio_notify_remove);
+	if (!nd_btt->ndio_claim)
+		goto out_free_btt;
+
+	nd_btt->backing_dev = bdev;
+	dev_dbg(&nd_btt->dev, "%s: activate %s\n", __func__,
+			bdevname(bdev, name));
+	__nd_device_register(&nd_btt->dev);
+	kfree(btt_sb);
+	return 0;
+
+ out_free_btt:
+	kfree(nd_btt);
+ out_free_uuid:
+	kfree(uuid);
+ out_free_sb:
+	kfree(btt_sb);
+
+	return -ENODEV;
+}
+
+void nd_btt_notify_ndio(struct nvdimm_bus *nvdimm_bus, struct nd_io *ndio)
+{
+	struct disk_part_iter piter;
+	struct hd_struct *part;
+
+	disk_part_iter_init(&piter, ndio->disk, DISK_PITER_INCL_PART0);
+	while ((part = disk_part_iter_next(&piter))) {
+		struct block_device *bdev;
+		int rc;
+
+		bdev = bdget_disk(ndio->disk, part->partno);
+		if (!bdev)
+			continue;
+		if (blkdev_get(bdev, nd_btt_devs_mode, nvdimm_bus) != 0)
+			continue;
+		rc = nd_btt_autodetect(nvdimm_bus, ndio, bdev);
+		if (rc)
+			blkdev_put(bdev, nd_btt_devs_mode);
+		/* no need to scan further in the case of whole disk btt */
+		if (rc == 0 && part->partno == 0)
+			break;
+	}
+	disk_part_iter_exit(&piter);
+}
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index fbf21dbd80fa..5c977c1306db 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/fcntl.h>
 #include <linux/async.h>
+#include <linux/genhd.h>
 #include <linux/ndctl.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -40,6 +41,8 @@ static int to_nd_device_type(struct device *dev)
 		return ND_DEVICE_REGION_BLK;
 	else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
 		return nd_region_to_namespace_type(to_nd_region(dev->parent));
+	else if (is_nd_btt(dev))
+		return ND_DEVICE_BTT;
 
 	return 0;
 }
@@ -84,6 +87,21 @@ static int nvdimm_bus_probe(struct device *dev)
 
 	dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
 			dev_name(dev), rc);
+
+	/* check if our btt-seed has sprouted, and plant another */
+	if (rc == 0 && is_nd_btt(dev) && dev == &nvdimm_bus->nd_btt->dev) {
+		const char *sep = "", *name = "", *status = "failed";
+
+		nvdimm_bus->nd_btt = nd_btt_create(nvdimm_bus);
+		if (nvdimm_bus->nd_btt) {
+			status = "succeeded";
+			sep = ": ";
+			name = dev_name(&nvdimm_bus->nd_btt->dev);
+		}
+		dev_dbg(&nvdimm_bus->dev, "btt seed creation %s%s%s\n",
+				status, sep, name);
+	}
+
 	if (rc != 0)
 		module_put(provider);
 	return rc;
@@ -144,14 +162,19 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie)
 	put_device(dev);
 }
 
-void nd_device_register(struct device *dev)
+void __nd_device_register(struct device *dev)
 {
 	dev->bus = &nvdimm_bus_type;
-	device_initialize(dev);
 	get_device(dev);
 	async_schedule_domain(nd_async_device_register, dev,
 			&nd_async_domain);
 }
+
+void nd_device_register(struct device *dev)
+{
+	device_initialize(dev);
+	__nd_device_register(dev);
+}
 EXPORT_SYMBOL(nd_device_register);
 
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
@@ -200,6 +223,107 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
 }
 EXPORT_SYMBOL(__nd_driver_register);
 
+/**
+ * nd_register_ndio() - register byte-aligned access capability for an nd-bdev
+ * @disk: child gendisk of the ndio namepace device
+ * @ndio: initialized ndio instance to register
+ *
+ * LOCKING: hold nvdimm_bus_lock() over the creation of ndio->disk and the
+ * subsequent nd_region_ndio event
+ */
+int nd_register_ndio(struct nd_io *ndio)
+{
+	struct nvdimm_bus *nvdimm_bus;
+	struct device *dev;
+
+	if (!ndio || !ndio->dev || !ndio->disk || !list_empty(&ndio->list)
+			|| !ndio->rw_bytes || !list_empty(&ndio->claims)) {
+		pr_debug("%s bad parameters from %pf\n", __func__,
+				__builtin_return_address(0));
+		return -EINVAL;
+	}
+
+	dev = ndio->dev;
+	nvdimm_bus = walk_to_nvdimm_bus(dev);
+	if (!nvdimm_bus)
+		return -EINVAL;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev));
+	list_add(&ndio->list, &nvdimm_bus->ndios);
+
+	/* TODO: generic infrastructure for 3rd party ndio claimers */
+	nd_btt_notify_ndio(nvdimm_bus, ndio);
+
+	return 0;
+}
+EXPORT_SYMBOL(nd_register_ndio);
+
+/**
+ * __nd_unregister_ndio() - try to remove an ndio interface
+ * @ndio: interface to remove
+ */
+static int __nd_unregister_ndio(struct nd_io *ndio)
+{
+	struct nd_io_claim *ndio_claim, *_n;
+	struct nvdimm_bus *nvdimm_bus;
+	LIST_HEAD(claims);
+
+	nvdimm_bus = walk_to_nvdimm_bus(ndio->dev);
+	if (!nvdimm_bus || list_empty(&ndio->list))
+		return -ENXIO;
+
+	spin_lock(&ndio->lock);
+	list_splice_init(&ndio->claims, &claims);
+	spin_unlock(&ndio->lock);
+
+	list_for_each_entry_safe(ndio_claim, _n, &claims, list)
+		ndio_claim->notify_remove(ndio_claim);
+
+	list_del_init(&ndio->list);
+
+	return 0;
+}
+
+int nd_unregister_ndio(struct nd_io *ndio)
+{
+	struct device *dev = ndio->dev;
+	int rc;
+
+	nvdimm_bus_lock(dev);
+	rc = __nd_unregister_ndio(ndio);
+	nvdimm_bus_unlock(dev);
+
+	/*
+	 * Flush in case ->notify_remove() kicked off asynchronous device
+	 * unregistration
+	 */
+	nd_synchronize();
+
+	return rc;
+}
+EXPORT_SYMBOL(nd_unregister_ndio);
+
+static struct nd_io *__ndio_lookup(struct nvdimm_bus *nvdimm_bus, const char *diskname)
+{
+	struct nd_io *ndio;
+
+	list_for_each_entry(ndio, &nvdimm_bus->ndios, list)
+		if (strcmp(diskname, ndio->disk->disk_name) == 0)
+			return ndio;
+
+	return NULL;
+}
+
+struct nd_io *ndio_lookup(struct nvdimm_bus *nvdimm_bus, const char *diskname)
+{
+	struct nd_io *ndio;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev));
+	ndio = __ndio_lookup(nvdimm_bus, diskname);
+
+	return ndio;
+}
+
 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 		char *buf)
 {
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 3cff35a94fb1..f4e638f6e721 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -55,6 +55,62 @@ bool is_nvdimm_bus_locked(struct device *dev)
 }
 EXPORT_SYMBOL(is_nvdimm_bus_locked);
 
+void nd_init_ndio(struct nd_io *ndio, nd_rw_bytes_fn rw_bytes,
+		struct device *dev, struct gendisk *disk, unsigned long align)
+{
+	memset(ndio, 0, sizeof(*ndio));
+	INIT_LIST_HEAD(&ndio->claims);
+	INIT_LIST_HEAD(&ndio->list);
+	spin_lock_init(&ndio->lock);
+	ndio->dev = dev;
+	ndio->disk = disk;
+	ndio->align = align;
+	ndio->rw_bytes = rw_bytes;
+}
+EXPORT_SYMBOL(nd_init_ndio);
+
+void ndio_del_claim(struct nd_io_claim *ndio_claim)
+{
+	struct nd_io *ndio;
+	struct device *holder;
+
+	if (!ndio_claim)
+		return;
+	ndio = ndio_claim->parent;
+	holder = ndio_claim->holder;
+
+	dev_dbg(holder, "%s: drop %s\n", __func__, dev_name(ndio->dev));
+	spin_lock(&ndio->lock);
+	list_del(&ndio_claim->list);
+	spin_unlock(&ndio->lock);
+	put_device(ndio->dev);
+	kfree(ndio_claim);
+	put_device(holder);
+}
+
+struct nd_io_claim *ndio_add_claim(struct nd_io *ndio, struct device *holder,
+		ndio_notify_remove_fn notify_remove)
+{
+	struct nd_io_claim *ndio_claim = kzalloc(sizeof(*ndio_claim), GFP_KERNEL);
+
+	if (!ndio_claim)
+		return NULL;
+
+	INIT_LIST_HEAD(&ndio_claim->list);
+	ndio_claim->parent = ndio;
+	get_device(ndio->dev);
+
+	spin_lock(&ndio->lock);
+	list_add(&ndio_claim->list, &ndio->claims);
+	spin_unlock(&ndio->lock);
+
+	ndio_claim->holder = holder;
+	ndio_claim->notify_remove = notify_remove;
+	get_device(holder);
+
+	return ndio_claim;
+}
+
 u64 nd_fletcher64(void *addr, size_t len, bool le)
 {
 	u32 *buf = addr;
@@ -75,6 +131,8 @@ static void nvdimm_bus_release(struct device *dev)
 {
 	struct nvdimm_bus *nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
 
+	WARN_ON(!list_empty(&nvdimm_bus->ndios));
+
 	ida_simple_remove(&nd_ida, nvdimm_bus->id);
 	kfree(nvdimm_bus);
 }
@@ -271,10 +329,28 @@ static ssize_t wait_probe_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(wait_probe);
 
+static ssize_t btt_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (nvdimm_bus->nd_btt)
+		rc = sprintf(buf, "%s\n", dev_name(&nvdimm_bus->nd_btt->dev));
+	else
+		rc = sprintf(buf, "\n");
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(btt_seed);
+
 static struct attribute *nvdimm_bus_attributes[] = {
 	&dev_attr_commands.attr,
 	&dev_attr_wait_probe.attr,
 	&dev_attr_provider.attr,
+	&dev_attr_btt_seed.attr,
 	NULL,
 };
 
@@ -291,6 +367,7 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
 
 	if (!nvdimm_bus)
 		return NULL;
+	INIT_LIST_HEAD(&nvdimm_bus->ndios);
 	INIT_LIST_HEAD(&nvdimm_bus->list);
 	init_waitqueue_head(&nvdimm_bus->probe_wait);
 	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
@@ -319,6 +396,8 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
 	list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
 	mutex_unlock(&nvdimm_bus_list_mutex);
 
+	nvdimm_bus->nd_btt = nd_btt_create(nvdimm_bus);
+
 	return nvdimm_bus;
  err:
 	put_device(&nvdimm_bus->dev);
diff --git a/drivers/nvdimm/nd-private.h b/drivers/nvdimm/nd-private.h
index b0eed3bcb76d..635af040023f 100644
--- a/drivers/nvdimm/nd-private.h
+++ b/drivers/nvdimm/nd-private.h
@@ -23,14 +23,21 @@ extern struct list_head nvdimm_bus_list;
 extern struct mutex nvdimm_bus_list_mutex;
 extern int nvdimm_major;
 
+struct block_device;
+struct nd_io_claim;
+struct nd_btt;
+struct nd_io;
+
 struct nvdimm_bus {
 	struct nvdimm_bus_descriptor *nd_desc;
 	wait_queue_head_t probe_wait;
 	struct module *module;
+	struct list_head ndios;
 	struct list_head list;
 	struct device dev;
 	int id, probe_active;
 	struct mutex reconfig_mutex;
+	struct nd_btt *nd_btt;
 };
 
 struct nvdimm {
@@ -42,9 +49,29 @@ struct nvdimm {
 	int id;
 };
 
+struct nd_io *ndio_lookup(struct nvdimm_bus *nvdimm_bus, const char *diskname);
 bool is_nvdimm(struct device *dev);
 bool is_nd_blk(struct device *dev);
 bool is_nd_pmem(struct device *dev);
+#if IS_ENABLED(CONFIG_ND_BTT_DEVS)
+bool is_nd_btt(struct device *dev);
+struct nd_btt *nd_btt_create(struct nvdimm_bus *nvdimm_bus);
+void nd_btt_notify_ndio(struct nvdimm_bus *nvdimm_bus, struct nd_io *ndio);
+#else
+static inline bool is_nd_btt(struct device *dev)
+{
+	return false;
+}
+
+static inline struct nd_btt *nd_btt_create(struct nvdimm_bus *nvdimm_bus)
+{
+	return NULL;
+}
+
+static inline void nd_btt_notify_ndio(struct nvdimm_bus *nvdimm_bus, struct nd_io *ndio)
+{
+}
+#endif
 struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
 int __init nvdimm_bus_init(void);
 void nvdimm_bus_exit(void);
@@ -59,6 +86,7 @@ void nd_synchronize(void);
 int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
 int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
 int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus);
+void __nd_device_register(struct device *dev);
 int nd_match_dimm(struct device *dev, void *data);
 struct nd_label_id;
 char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index c3ffb5174e8f..a695187330ff 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -14,11 +14,17 @@
 #define __ND_H__
 #include <linux/libnvdimm.h>
 #include <linux/device.h>
+#include <linux/genhd.h>
 #include <linux/mutex.h>
 #include <linux/ndctl.h>
 #include <linux/types.h>
+#include <linux/fs.h>
 #include "label.h"
 
+enum {
+	SECTOR_SHIFT = 9,
+};
+
 struct nvdimm_drvdata {
 	struct device *dev;
 	int nsindex_size;
@@ -111,6 +117,84 @@ static inline unsigned nd_inc_seq(unsigned seq)
 	return next[seq & 3];
 }
 
+struct nd_io;
+/**
+ * nd_rw_bytes_fn() - access bytes relative to the "whole disk" namespace device
+ * @ndio: per-namespace context
+ * @buf: source / target for the write / read
+ * @offset: offset relative to the start of the namespace device
+ * @n: num bytes to access
+ * @flags: READ, WRITE, and other REQ_* flags
+ *
+ * Note: Implementations may assume that offset + n never crosses ndio->align
+ */
+typedef int (*nd_rw_bytes_fn)(struct nd_io *ndio, void *buf, size_t offset,
+		size_t n, unsigned long flags);
+#define nd_data_dir(flags) (flags & 1)
+
+/**
+ * struct nd_io - info for byte-aligned access to nd devices
+ * @rw_bytes: operation to perform byte-aligned access
+ * @align: a single ->rw_bytes() request may not cross this alignment
+ * @gendisk: whole disk block device for the namespace
+ * @list: for the core to cache a list of "ndio"s for later association
+ * @dev: namespace device
+ * @claims: list of clients using this interface
+ * @lock: protect @claims mutation
+ */
+struct nd_io {
+	nd_rw_bytes_fn rw_bytes;
+	unsigned long align;
+	struct gendisk *disk;
+	struct list_head list;
+	struct device *dev;
+	struct list_head claims;
+	spinlock_t lock;
+};
+
+struct nd_io_claim;
+typedef void (*ndio_notify_remove_fn)(struct nd_io_claim *ndio_claim);
+
+/**
+ * struct nd_io_claim - instance of a claim on a parent ndio
+ * @notify_remove: ndio is going away, release resources
+ * @holder: object that has claimed this ndio
+ * @parent: ndio in use
+ * @holder: holder device
+ * @list: claim peers
+ *
+ * An ndio may be claimed multiple times, consider the case of a btt
+ * instance per partition on a namespace.
+ */
+struct nd_io_claim {
+	struct nd_io *parent;
+	ndio_notify_remove_fn notify_remove;
+	struct list_head list;
+	struct device *holder;
+};
+
+struct nd_btt {
+	struct device dev;
+	struct nd_io *ndio;
+	struct block_device *backing_dev;
+	unsigned long lbasize;
+	u8 *uuid;
+	u64 offset;
+	int id;
+	struct nd_io_claim *ndio_claim;
+};
+
+static inline u64 nd_partition_offset(struct block_device *bdev)
+{
+	struct hd_struct *p;
+
+	if (bdev == bdev->bd_contains)
+		return 0;
+
+	p = bdev->bd_part;
+	return ((u64) p->start_sect) << SECTOR_SHIFT;
+}
+
 enum nd_async_mode {
 	ND_SYNC,
 	ND_ASYNC,
@@ -125,6 +209,13 @@ ssize_t nd_sector_size_show(unsigned long current_lbasize,
 		const unsigned long *supported, char *buf);
 ssize_t nd_sector_size_store(struct device *dev, const char *buf,
 		unsigned long *current_lbasize, const unsigned long *supported);
+int nd_register_ndio(struct nd_io *ndio);
+int nd_unregister_ndio(struct nd_io *ndio);
+void nd_init_ndio(struct nd_io *ndio, nd_rw_bytes_fn rw_bytes,
+		struct device *dev, struct gendisk *disk, unsigned long align);
+void ndio_del_claim(struct nd_io_claim *ndio_claim);
+struct nd_io_claim *ndio_add_claim(struct nd_io *ndio, struct device *holder,
+		ndio_notify_remove_fn notify_remove);
 int __init nvdimm_init(void);
 int __init nd_region_init(void);
 void nvdimm_exit(void);
@@ -135,6 +226,9 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
 		void *buf, size_t len);
+struct nd_btt *to_nd_btt(struct device *dev);
+struct btt_sb;
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
 struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_namespace_type(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 7e7421d9c167..0a85a9ca3ca4 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -29,6 +29,7 @@
 struct pmem_device {
 	struct request_queue	*pmem_queue;
 	struct gendisk		*pmem_disk;
+	struct nd_io		ndio;
 
 	/* One contiguous memory region per device */
 	phys_addr_t		phys_addr;
@@ -96,6 +97,26 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	return 0;
 }
 
+static int pmem_rw_bytes(struct nd_io *ndio, void *buf, size_t offset,
+		size_t n, unsigned long flags)
+{
+	struct pmem_device *pmem = container_of(ndio, typeof(*pmem), ndio);
+	int rw = nd_data_dir(flags);
+
+	if (unlikely(offset + n > pmem->size)) {
+		dev_WARN_ONCE(ndio->dev, 1, "%s: request out of range\n",
+				__func__);
+		return -EFAULT;
+	}
+
+	if (rw == READ)
+		memcpy(buf, pmem->virt_addr + offset, n);
+	else
+		memcpy(pmem->virt_addr + offset, buf, n);
+
+	return 0;
+}
+
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
 			      void **kaddr, unsigned long *pfn, long size)
 {
@@ -169,8 +190,6 @@ static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res,
 	set_capacity(disk, pmem->size >> 9);
 	pmem->pmem_disk = disk;
 
-	add_disk(disk);
-
 	return pmem;
 
 out_free_queue:
@@ -222,7 +241,12 @@ static int nd_pmem_probe(struct device *dev)
 	if (IS_ERR(pmem))
 		return PTR_ERR(pmem);
 
+	nvdimm_bus_lock(dev);
+	add_disk(pmem->pmem_disk);
 	dev_set_drvdata(dev, pmem);
+	nd_init_ndio(&pmem->ndio, pmem_rw_bytes, dev, pmem->pmem_disk, 0);
+	nd_register_ndio(&pmem->ndio);
+	nvdimm_bus_unlock(dev);
 
 	return 0;
 }
@@ -231,6 +255,7 @@ static int nd_pmem_remove(struct device *dev)
 {
 	struct pmem_device *pmem = dev_get_drvdata(dev);
 
+	nd_unregister_ndio(&pmem->ndio);
 	pmem_free(pmem);
 	return 0;
 }
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index ac6d40f4fa7e..6f620838ba5a 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -181,6 +181,7 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
 #define ND_DEVICE_NAMESPACE_IO 4    /* legacy persistent memory */
 #define ND_DEVICE_NAMESPACE_PMEM 5  /* persistent memory namespace (may alias) */
 #define ND_DEVICE_NAMESPACE_BLK 6   /* block-data-window namespace (may alias) */
+#define ND_DEVICE_BTT 7		    /* block-translation table device */
 
 enum nd_driver_flags {
 	ND_DRIVER_DIMM            = 1 << ND_DEVICE_DIMM,
@@ -189,6 +190,7 @@ enum nd_driver_flags {
 	ND_DRIVER_NAMESPACE_IO    = 1 << ND_DEVICE_NAMESPACE_IO,
 	ND_DRIVER_NAMESPACE_PMEM  = 1 << ND_DEVICE_NAMESPACE_PMEM,
 	ND_DRIVER_NAMESPACE_BLK   = 1 << ND_DEVICE_NAMESPACE_BLK,
+	ND_DRIVER_BTT		  = 1 << ND_DEVICE_BTT,
 };
 
 enum {


  parent reply	other threads:[~2015-06-02  0:15 UTC|newest]

Thread overview: 108+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-02  0:14 [PATCH v5 00/21] libnvdimm: non-volatile memory devices Dan Williams
2015-06-02  0:14 ` Dan Williams
2015-06-02  0:14 ` [PATCH v5 01/21] e820, efi: add ACPI 6.0 persistent memory types Dan Williams
2015-06-02  0:14   ` Dan Williams
2015-06-02  0:14 ` [PATCH v5 02/21] libnvdimm, nfit: initial libnvdimm infrastructure and NFIT support Dan Williams
2015-06-02  0:14   ` Dan Williams
2015-06-03 14:57   ` Christoph Hellwig
2015-06-03 14:57     ` Christoph Hellwig
2015-06-03 19:24     ` Williams, Dan J
2015-06-03 19:24       ` Williams, Dan J
2015-06-03 19:24       ` Williams, Dan J
2015-06-09  6:33       ` hch
2015-06-09  6:33         ` hch
2015-06-09  6:33         ` hch-jcswGhMUV9g
2015-06-09 22:27         ` Dan Williams
2015-06-09 22:27           ` Dan Williams
2015-06-02  0:14 ` [PATCH v5 03/21] libnvdimm: control character device and libnvdimm bus sysfs attributes Dan Williams
2015-06-02  0:14   ` Dan Williams
2015-06-02  0:14 ` [PATCH v5 04/21] libnvdimm, nfit: dimm/memory-devices Dan Williams
2015-06-02  0:14   ` Dan Williams
2015-06-02  0:14 ` [PATCH v5 05/21] libnvdimm: control (ioctl) messages for libnvdimm bus and dimm devices Dan Williams
2015-06-02  0:14   ` Dan Williams
2015-06-09  6:34   ` Christoph Hellwig
2015-06-09  6:34     ` Christoph Hellwig
2015-06-09  6:34     ` Christoph Hellwig
2015-06-09  6:57     ` Dan Williams
2015-06-09  6:57       ` Dan Williams
2015-06-09  6:57       ` Dan Williams
2015-06-10  7:33       ` Christoph Hellwig
2015-06-10  7:33         ` Christoph Hellwig
2015-06-10  7:33         ` Christoph Hellwig
2015-06-02  0:14 ` [PATCH v5 06/21] libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver infrastructure Dan Williams
2015-06-02  0:14   ` Dan Williams
2015-06-02  0:14   ` Dan Williams
2015-06-02  0:14 ` [PATCH v5 07/21] libnvdimm, nfit: regions (block-data-window, persistent memory, volatile memory) Dan Williams
2015-06-02  0:14   ` Dan Williams
2015-06-02  0:14 ` [PATCH v5 08/21] libnvdimm: support for legacy (non-aliasing) nvdimms Dan Williams
2015-06-02  0:14   ` Dan Williams
2015-06-02  0:14 ` [PATCH v5 09/21] libnvdimm, nd_pmem: add libnvdimm support to the pmem driver Dan Williams
2015-06-02  0:14   ` Dan Williams
2015-06-03  7:44   ` Christoph Hellwig
     [not found]     ` <20150603074424.GA24949-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
2015-06-03 19:31       ` Williams, Dan J
2015-06-03 19:31         ` Williams, Dan J
     [not found]         ` <1433359894.21035.33.camel-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2015-06-09  6:36           ` hch-jcswGhMUV9g
2015-06-09  6:36             ` hch
2015-06-02  0:15 ` [PATCH v5 10/21] pmem: Dynamically allocate partition numbers Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15 ` [PATCH v5 11/21] libnvdimm, nfit: add interleave-set state-tracking infrastructure Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15 ` [PATCH v5 12/21] libnvdimm: namespace indices: read and validate Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-09  6:39   ` Christoph Hellwig
2015-06-09  6:39     ` Christoph Hellwig
2015-06-09  6:39     ` Christoph Hellwig
2015-06-10 15:54     ` Dan Williams
2015-06-10 15:54       ` Dan Williams
2015-06-02  0:15 ` [PATCH v5 13/21] libnvdimm: pmem label sets and namespace instantiation Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15 ` [PATCH v5 14/21] libnvdimm: blk labels " Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15 ` [PATCH v5 15/21] libnvdimm: write pmem label set Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15 ` [PATCH v5 16/21] libnvdimm: write blk " Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15 ` Dan Williams [this message]
2015-06-02  0:15   ` [PATCH v5 17/21] libnvdimm: infrastructure for btt devices Dan Williams
2015-06-09  6:42   ` Christoph Hellwig
2015-06-09  6:42     ` Christoph Hellwig
2015-06-10 18:46     ` Matthew Wilcox
2015-06-10 18:46       ` Matthew Wilcox
2015-06-11  7:28       ` Christoph Hellwig
2015-06-11  7:28         ` Christoph Hellwig
     [not found]         ` <20150611072812.GB1905-jcswGhMUV9g@public.gmane.org>
2015-06-17 16:47           ` Jeff Moyer
2015-06-17 16:47             ` Jeff Moyer
     [not found]             ` <x49381qp9ic.fsf-RRHT56Q3PSP4kTEheFKJxxDDeQx5vsVwAInAS/Ez/D0@public.gmane.org>
2015-06-17 16:50               ` Dan Williams
2015-06-17 16:50                 ` Dan Williams
2015-06-17 16:57                 ` Jeff Moyer
2015-06-17 16:57                   ` Jeff Moyer
2015-06-17 17:09                   ` Dan Williams
2015-06-02  0:15 ` [PATCH v5 18/21] nd_btt: atomic sector updates Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-09  6:44   ` Christoph Hellwig
2015-06-09  6:44     ` Christoph Hellwig
2015-06-09  6:44     ` Christoph Hellwig
2015-06-09 18:27     ` Vishal Verma
2015-06-09 18:27       ` Vishal Verma
2015-06-10  7:34       ` Christoph Hellwig
2015-06-10  7:34         ` Christoph Hellwig
2015-06-10  7:34         ` Christoph Hellwig
2015-06-10 18:24         ` Vishal Verma
2015-06-10 18:24           ` Vishal Verma
2015-06-02  0:15 ` [PATCH v5 19/21] libnvdimm, nfit, nd_blk: driver for BLK-mode access persistent memory Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15 ` [PATCH v5 20/21] tools/testing/nvdimm: manufactured NFITs for interface development Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-02  0:15   ` Dan Williams
2015-06-09  6:48   ` Christoph Hellwig
2015-06-09  6:48     ` Christoph Hellwig
2015-06-09  6:48     ` Christoph Hellwig
2015-06-11 20:12     ` Dan Williams
2015-06-11 20:12       ` Dan Williams
2015-06-02  0:16 ` [PATCH v5 21/21] libnvdimm: Non-Volatile Devices Dan Williams
2015-06-02  0:16   ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150602001541.4506.90125.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=axboe@kernel.dk \
    --cc=gregkh@linuxfoundation.org \
    --cc=hch@lst.de \
    --cc=jmoyer@redhat.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=mingo@kernel.org \
    --cc=neilb@suse.de \
    --cc=rafael@kernel.org \
    --cc=sfr@canb.auug.org.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.