linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Lin Yongting <linyongting@huawei.com>
To: <ross.zwisler@linux.intel.com>, <dan.j.williams@intel.com>,
	<axboe@fb.com>, <akpm@linux-foundation.org>, <richard@nod.at>,
	<linyongting@huawei.com>, <willy@linux.intel.com>,
	<wangxiaozhe@huawei.com>, <mingo@kernel.org>,
	<gregkh@linuxfoundation.org>
Cc: <linux-kernel@vger.kernel.org>
Subject: [PATCH] pramdisk: new block disk driver to perform persistent storage
Date: Fri, 18 Sep 2015 15:46:30 +0800	[thread overview]
Message-ID: <1442562390-91051-1-git-send-email-linyongting@huawei.com> (raw)

In embed devices, user space applications will use reserved memory
(i.e. persistent memory) to store business data, the data is kept
in this memory region after system rebooting or panic.

pramdisk is a block disk driver based on Persistent memory, it provide
file system interface for application to read/write data in persistent
memory. Application can use pramdisk to store log file or business data
in persistent memory in the way of file system operation, avoid operating
or managing memory directly.

pramdisk support multiple Persistent menory regions and each one is a
block device named as /dev/pram<N>.

Usage:
modprobe pramdisk.ko pmem=<size1>@<addr2> [ pmem=<size2>@<addr2> ... ]

For example:
  modprobe pramdisk.ko pmem=20M@2050M pmem=20M@2150M
  mkfs.ext3 /dev/pram1
  mkdir /tmp/test1
  mount -t ext3 /dev/pram1 /tmp/test1

Signed-off-by: Lin Yongting <linyongting@huawei.com>
Signed-off-by: Wang xiaozhe <wangxiaozhe@huawei.com>
---
 drivers/block/Kconfig    |   18 +++
 drivers/block/Makefile   |    1 +
 drivers/block/pramdisk.c |  305 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 324 insertions(+)
 create mode 100644 drivers/block/pramdisk.c

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 1b8094d..9bd68a5 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -404,6 +404,24 @@ config BLK_DEV_RAM_DAX
 	  and will prevent RAM block device backing store memory from being
 	  allocated from highmem (only a problem for highmem systems).
 
+config BLK_DEV_PRAM_DISK
+	tristate "Persistent RAM Disk device support"
+	---help---
+	  Saying Y here will allow you to use some portion of your Persistent
+	  RAM memory as disk(i.e. block device), so that you can make file
+	  systems on it, read and write to it. The RAM memory region is
+	  persistent and the data will be kept after system rebooting, panic
+	  or NMI.
+
+	  This functionality is very useful in embed device, can be used to
+	  access log data or business data in the way of file systems.
+
+	  Saying M here will compile this driver as a Module.
+	  Saying N here if don't need this Persistent RAM Block Device
+	  functionality.
+
+	  Use devices /dev/pram$N.
+
 config CDROM_PKTCDVD
 	tristate "Packet writing on CD/DVD media"
 	depends on !UML
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 02b688d..3cab7e5 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= xen-blkback/
 obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/
 obj-$(CONFIG_BLK_DEV_RBD)     += rbd.o
+obj-$(CONFIG_BLK_DEV_PRAM_DISK)		+= pramdisk.o
 obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX)	+= mtip32xx/
 
 obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
diff --git a/drivers/block/pramdisk.c b/drivers/block/pramdisk.c
new file mode 100644
index 0000000..36e7bfe
--- /dev/null
+++ b/drivers/block/pramdisk.c
@@ -0,0 +1,305 @@
+/*
+ * Persistent RAM Disk device
+ *
+ * Copyright (C), 2001-2015, Huawei Tech. Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/types.h>
+
+#define SECTOR_SHIFT            9
+#define SECTOR_SIZE		(1 << SECTOR_SHIFT)
+
+#define PRAM_DEVICE_NAME	"pramdisk"
+
+struct pram_dev {
+	struct request_queue	*queue;
+	struct gendisk		*gendisk;
+	struct list_head	dev_list;
+
+	unsigned long		capability;
+	unsigned long		pram_addr;
+	unsigned long		pram_size;
+};
+
+static LIST_HEAD(pram_devices);
+static int pram_major;
+
+static const struct block_device_operations prbd_fops = {
+	.owner		= THIS_MODULE,
+};
+
+static int pram_do_bvec(struct pram_dev *dev, struct page *page,
+			unsigned int len, unsigned int off, int rw,
+			unsigned long pram_ofs)
+{
+	void *mem;
+	phys_addr_t addr;
+	void __iomem *buf;
+
+	if (pram_ofs >= dev->pram_size) {
+		pr_debug("Access address %lx exceeds the disk size %lx.\n",
+			pram_ofs, dev->pram_size);
+		return -ERANGE;
+	}
+
+	addr = dev->pram_addr + pram_ofs;
+	buf = ioremap(addr, len);
+
+	if (!buf) {
+		pr_debug("ioremap fault\n");
+		return -EADDRNOTAVAIL;
+	}
+
+	mem = kmap_atomic(page);
+
+	if (rw == READ) {
+		memcpy(mem+off, (void *)buf, len);
+		flush_dcache_page(page);
+	} else {
+		flush_dcache_page(page);
+		memcpy((void *)buf, mem+off, len);
+	}
+
+	kunmap_atomic(mem);
+	iounmap(buf);
+
+	return 0;
+}
+
+static void pram_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct block_device *bdev = bio->bi_bdev;
+	struct pram_dev	*dev = bdev->bd_disk->private_data;
+	int rw;
+	struct bio_vec bvec;
+	sector_t sector;
+	unsigned long pram_ofs;
+	struct bvec_iter iter;
+	int ret;
+	int len = 0;
+
+	sector = bio->bi_iter.bi_sector;
+	if (sector + (bio->bi_iter.bi_size >> SECTOR_SHIFT) >
+		get_capacity(bdev->bd_disk)) {
+		pr_debug("sector %lx is out of range.\n",
+			sector + (bio->bi_iter.bi_size >> SECTOR_SHIFT));
+		goto fail;
+	}
+
+	rw = bio_rw(bio);
+	if (rw == READA)
+		rw = READ;
+
+	pram_ofs = sector << SECTOR_SHIFT;
+	bio_for_each_segment(bvec, bio, iter) {
+		len = bvec.bv_len;
+		ret = pram_do_bvec(dev, bvec.bv_page, len,
+			bvec.bv_offset, rw, pram_ofs);
+		if (ret)
+			goto fail;
+
+		pram_ofs += len;
+	}
+
+	bio_endio(bio);
+	return;
+fail:
+	bio_io_error(bio);
+}
+
+static struct pram_dev *pram_alloc(phys_addr_t addr, phys_addr_t size)
+{
+	struct pram_dev *dev;
+	struct gendisk *disk;
+	static int minor;
+	int ret;
+
+	ret = -ENOMEM;
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		goto out;
+
+	dev->queue = blk_alloc_queue(GFP_KERNEL);
+	if (!dev->queue)
+		goto out_free_dev;
+
+	dev->pram_addr = addr;
+	dev->pram_size = size;
+	blk_queue_make_request(dev->queue, pram_make_request);
+
+	disk = dev->gendisk = alloc_disk(1);
+	if (!disk)
+		goto out_free_queue;
+
+	disk->first_minor	= ++minor;
+	disk->fops		= &prbd_fops;
+	disk->private_data	= dev;
+	disk->queue		= dev->queue;
+	snprintf(disk->disk_name, DISK_NAME_LEN, "pram%d", minor);
+
+	ret = -EBUSY;
+	if (!request_mem_region(addr, size, disk->disk_name)) {
+		pr_err("request memory region [%llx - %llx] fail.\n",
+			(unsigned long long)addr,
+			(unsigned long long)(addr + size));
+		goto out_free_disk;
+	}
+
+	set_capacity(disk, size >> SECTOR_SHIFT);
+
+	return dev;
+
+out_free_disk:
+	put_disk(dev->gendisk);
+out_free_queue:
+	blk_cleanup_queue(dev->queue);
+out_free_dev:
+	kfree(dev);
+out:
+	return ERR_PTR(ret);
+}
+
+static void pram_free(struct pram_dev *dev)
+{
+	release_mem_region(dev->pram_addr, dev->pram_size);
+	put_disk(dev->gendisk);
+	blk_cleanup_queue(dev->queue);
+	kfree(dev);
+}
+
+static int pmem_cmdline_get(char *buffer, const struct kernel_param *kp)
+{
+	struct pram_dev *dev;
+	int len = 0;
+
+	list_for_each_entry(dev, &pram_devices, dev_list) {
+		len += scnprintf(buffer + len, PAGE_SIZE - len,
+			"%llx@%llx\n",
+			(unsigned long long)dev->pram_size,
+			(unsigned long long)dev->pram_addr);
+	}
+
+	return len;
+}
+
+/*
+ * Module param format:
+ * pmem=<size1>@<addr1> [pmem=<size2>@<addr2> ... ]
+ *
+ * each pmem param represent a Persistent memory region and setup a memory
+ * disk.
+ * <sizeX> and <addrX> can be octl, decimal or hexadecimal. if followed by
+ * "K", "M" or "G", the numbers will be interpreted as kilo, mega or
+ * gigabytes.
+ *
+ * Example:
+ *   pmem=100M@2050M pmem=20M@2150M
+ */
+static int pmem_cmdline_set(const char *val, const struct kernel_param *kp)
+{
+	char *p;
+	phys_addr_t addr;
+	phys_addr_t size;
+	struct pram_dev *dev, *next;
+	int ret;
+
+	ret = -EINVAL;
+	size = memparse(val, &p);
+	if (*p != '@')
+		goto fail;
+
+	addr = memparse(p+1, &p);
+	if ((!addr) || (!size) || (addr % SECTOR_SIZE) || (size % SECTOR_SIZE))
+		goto fail;
+
+	dev = pram_alloc(addr, size);
+	if (IS_ERR(dev)) {
+		ret = PTR_ERR(dev);
+		goto fail;
+	}
+
+	list_add_tail(&dev->dev_list, &pram_devices);
+
+	return 0;
+
+fail:
+	list_for_each_entry_safe(dev, next, &pram_devices, dev_list) {
+		list_del(&dev->dev_list);
+		pram_free(dev);
+	}
+
+	return ret;
+}
+
+static const struct kernel_param_ops pmem_cmdline_param_ops = {
+	.set = pmem_cmdline_set,
+	.get = pmem_cmdline_get,
+};
+
+device_param_cb(pmem, &pmem_cmdline_param_ops, NULL, 0444);
+MODULE_PARM_DESC(pmem, "size and start address of continuous Presistent RAM region.");
+
+static void pram_del_one(struct pram_dev *dev)
+{
+	list_del(&dev->dev_list);
+	del_gendisk(dev->gendisk);
+	pram_free(dev);
+}
+
+
+
+static int __init prbd_init(void)
+{
+	int ret;
+	struct pram_dev *dev;
+
+	ret = register_blkdev(0, PRAM_DEVICE_NAME);
+	if (ret < 0)
+		return ret;
+
+	pram_major = ret;
+	list_for_each_entry(dev, &pram_devices, dev_list) {
+		dev->gendisk->major = pram_major;
+		add_disk(dev->gendisk);
+	}
+
+	return 0;
+}
+
+static void __exit prbd_exit(void)
+{
+	struct pram_dev *dev, *next;
+
+	list_for_each_entry_safe(dev, next, &pram_devices, dev_list) {
+		pram_del_one(dev);
+	}
+
+	unregister_blkdev(pram_major, PRAM_DEVICE_NAME);
+}
+
+module_init(prbd_init);
+module_exit(prbd_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Persistent RAM Disk device");
+MODULE_AUTHOR("Lin Yongting <linyongting@huawei.com>");
+MODULE_AUTHOR("Wang xiaozhe <wangxiaozhe@huawei.com>");
-- 
1.7.9.5


             reply	other threads:[~2015-09-18  7:50 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-18  7:46 Lin Yongting [this message]
2015-09-18 16:18 ` [PATCH] pramdisk: new block disk driver to perform persistent storage Ross Zwisler
2015-09-18 16:41   ` Dan Williams
2015-09-21  9:35     ` Lin Yongting
2015-09-21  9:30   ` Lin Yongting

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1442562390-91051-1-git-send-email-linyongting@huawei.com \
    --to=linyongting@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=axboe@fb.com \
    --cc=dan.j.williams@intel.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=richard@nod.at \
    --cc=ross.zwisler@linux.intel.com \
    --cc=wangxiaozhe@huawei.com \
    --cc=willy@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).