linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] File backed target for device-mapper
@ 2003-07-22 21:17 Christophe Saout
  2003-07-22 21:39 ` Christophe Saout
  2003-07-22 21:58 ` [dm-devel] " Alasdair G Kergon
  0 siblings, 2 replies; 3+ messages in thread
From: Christophe Saout @ 2003-07-22 21:17 UTC (permalink / raw)
  To: dm-devel; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 543 bytes --]

Hi!

I just wrote a dm target uses a file as backend instead of another block
device.

It's heavily based on the linux 2.5 loop device (so it uses the inode
operation sendfile for read operations and the address space operations
prepare_write and commit_write for write operations).

A dm device can be created with the target options "filename offset"
where offset is given in bytes.

--
Christophe Saout <christophe@saout.de>
Please avoid sending me Word or PowerPoint attachments.
See http://www.fsf.org/philosophy/no-word-attachments.html

[-- Attachment #2: dm-file.c --]
[-- Type: text/x-c, Size: 10905 bytes --]

/*
 * Copyright (C) 2001 Sistina Software (UK) Limited.
 *
 * This file is released under the GPL.
 */

#include "dm.h"

#include <linux/module.h>
#include <linux/init.h>
#include <linux/file.h>
#include <linux/stat.h>
#include <linux/dcache.h>
#include <linux/bio.h>
#include <linux/ctype.h>
#include <linux/slab.h>
#include <linux/spinlock.h>

/*
 * FIXME: Shouldn't this better be with the
 * definition of loff_t in posix_types.h
 */
#define OFFSET_FORMAT "%Ld"

/*
 * File: maps a linear range of a file.
 */
struct file_c {
	struct file *file;
	loff_t offset;

	/*
	 * a workqueue would have been nice but unfortunatly
	 * this needs additional memory to be allocated per
	 * request hence mempools again and so on, but since
	 * bios already have concatenation pointers use these
	 * and do the worker thread management ourself
	 */
	spinlock_t lock;
	struct bio *bio_head;
	struct bio *bio_tail;
	struct semaphore bh_mutex;
	struct semaphore sem;
};

/*
 * Fetch next bio from work queue
 */
static struct bio *fileio_next_bio(struct file_c *fc)
{
	struct bio *bio;

	spin_lock_irq(&fc->lock);
	bio = fc->bio_head;
	if (bio) {
		if (bio == fc->bio_tail)
			fc->bio_tail = NULL;
		fc->bio_head = bio->bi_next;
		bio->bi_next = NULL;
	}
	spin_unlock_irq(&fc->lock);

	return bio;
}

/*
 * Append bio to work queue
 */
static void fileio_queue_bio(struct file_c *fc, struct bio *bio)
{
	unsigned long flags;

	spin_lock_irqsave(&fc->lock, flags);
	if (fc->bio_tail)
		fc->bio_tail->bi_next = bio;
	else
		fc->bio_head = bio;
	fc->bio_tail = bio;
	spin_unlock_irqrestore(&fc->lock, flags);
}

/*
 * Callback for f_op->sendfile
 */
static int fileio_read_actor(read_descriptor_t *desc, struct page *page,
                             unsigned long offset, unsigned long size)
{
	char *src;
	char *dst = (char *)desc->buf;
	unsigned long count = desc->count;

	if (size > count)
		size = count;

	src = kmap(page) + offset;
	if (src != dst)	/* FIXME: is src == dst possible? */
		memcpy(dst, src, size);
	kunmap(page);

	desc->count = count - size;
	desc->written += size;
	(char *)desc->buf += size;

	return size;
}

/*
 * Handles a bio as a file read operation
 */
static int fileio_read(struct file_c *fc, struct bio *bio, loff_t pos)
{
	struct bio_vec *bv;
	int i;

	bio_for_each_segment(bv, bio, i) {
		char *data;
		int r;

		/*
		 * No need do use that handy bio_kmap_irq macro since we are always
		 * alled in a non-irq context (from our worker thread actually)
		 */
		data = kmap(bv->bv_page) + bv->bv_offset,
		r = fc->file->f_op->sendfile(fc->file, &pos, bv->bv_len,
		                                 fileio_read_actor, data);
		kunmap(bv->bv_page);

		if (r < 0)
			break;

		pos += bv->bv_len;
	}

	return 0;
}

/*
 * Handles a bio_vec as a file write operation
 */
static int fileio_write_bvec(struct file_c *fc, struct bio_vec *bv,
                             int blocksize, loff_t pos)
{
	struct file *file = fc->file;
	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
	struct address_space_operations *aops = mapping->a_ops;
	struct page *page;
	char *src = kmap(bv->bv_page) + bv->bv_offset;
	char *dst;
	pgoff_t index;
	unsigned int size, offset;
	int len = bv->bv_len;

	index = pos >> PAGE_CACHE_SHIFT;
	offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);

	down(&mapping->host->i_sem);
	while(len > 0) {
		size = PAGE_CACHE_SIZE - offset;
		if (size > len)
			size = len;

		page = grab_cache_page(mapping, index);
		if (!page)
			goto fail;

		if (aops->prepare_write(file, page, offset, offset+size))
			goto unlock;

		dst = kmap(page);
		memcpy(dst, src, size);
		flush_dcache_page(page);
		kunmap(page);

		if (aops->commit_write(file, page, offset, offset+size))
			goto unlock;

		src += size;
		len -= size;
		offset = 0;
		index++;
		pos += size;
		unlock_page(page);
		page_cache_release(page);
	}
	up(&mapping->host->i_sem);

	kunmap(bv->bv_page);
	return 0;

      unlock:
	unlock_page(page);
	page_cache_release(page);
      fail:
	up(&mapping->host->i_sem);
	kunmap(bv->bv_page);
	return -EIO;
}

/*
 * Handles a bio as a file write operation
 */
static int fileio_write(struct file_c *fc, struct bio *bio, loff_t pos)
{
	struct bio_vec *bv;
	int blocksize = fc->file->f_dentry->d_inode->i_blksize;
	int i;

	bio_for_each_segment(bv, bio, i) {
		char *data;
		int r;

		/*
		 * No need do use that handy bio_kmap_irq macro since we are always
		 * alled in a non-irq context (from our worker thread actually)
		 */
		data = kmap(bv->bv_page) + bv->bv_offset,
		r = fileio_write_bvec(fc, bv, blocksize, pos);
		kunmap(bv->bv_page);

		if (r < 0)
			break;

		pos += bv->bv_len;
	}

	return 0;
}

/*
 * Worker thread
 */
static int fileio_thread(void *data)
{
	struct file_c *fc = data;
	struct bio *bio;
	loff_t pos;
	int r;

	daemonize("dm-file");

	current->flags |= PF_IOTHREAD;
	set_user_nice(current, -20);

	up(&fc->sem);

	while (1) {
		down_interruptible(&fc->bh_mutex);

		bio = fileio_next_bio(fc);

		/* woken up but no data: termination signal */
		if (!bio)
			 break;

		pos = ((loff_t)bio->bi_sector << SECTOR_SHIFT) + fc->offset;

		if (bio_rw(bio) == WRITE)
			r = fileio_write(fc, bio, pos);
		else
			r = fileio_read(fc, bio, pos);

		bio_endio(bio, bio->bi_size, r);
	}

	up(&fc->sem);
	return 0;
}

/*
 * Returns the minimum that is _not_ zero, unless both are zero.
 */
#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))

/*
 * Combine the io_restrictions with the default restrictions
 */
static void combine_default_restrictions(struct io_restrictions *rs)
{
	rs->max_sectors =
		min_not_zero(rs->max_sectors, (unsigned short)MAX_SECTORS);

	rs->max_phys_segments =
		min_not_zero(rs->max_phys_segments, (unsigned short)MAX_PHYS_SEGMENTS);

	rs->max_hw_segments =
		min_not_zero(rs->max_hw_segments, (unsigned short)MAX_HW_SEGMENTS);

	rs->hardsect_size =
		max(rs->hardsect_size, (unsigned short)(1 << SECTOR_SHIFT));

	rs->max_segment_size =
		min_not_zero(rs->max_segment_size, (unsigned int)MAX_SEGMENT_SIZE);

	rs->seg_boundary_mask =
		min_not_zero(rs->seg_boundary_mask, (unsigned long)0xffffffff);
}

/*
 * Construct a file backed mapping: <file_path> <offset>
 */
static int file_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
	struct file_c *fc;
	struct inode *inode;
	loff_t file_size;
	int write_access = (dm_table_get_mode(ti->table) & FMODE_WRITE);
	int fopen_flags;

	if (argc != 2) {
		ti->error = "dm-file: Not enough arguments";
		return -EINVAL;
	}

	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
	if (fc == NULL) {
		ti->error = "dm-file: Cannot allocate file backed context";
		return -ENOMEM;
	}

	if (sscanf(argv[1], OFFSET_FORMAT, &fc->offset) != 1 || fc->offset < 0) {
		ti->error = "dm-file: Invalid file offset";
		goto bad1;
	}

	fopen_flags = write_access ? 2 : 0;
#if BITS_PER_LONG != 32
	fopen_flags |= O_LARGEFILE;
#endif
	fc->file = filp_open(argv[0], fopen_flags, 0);
	if (IS_ERR(fc->file)) {
		ti->error = "dm-file: File open failed";
		DMWARN("file: fopen failed %d", (int)PTR_ERR(fc->file));
		goto bad1;
	}

	inode = fc->file->f_dentry->d_inode;
	if (!S_ISREG(inode->i_mode)) {
		ti->error = "dm-file: Not a regular file";
		goto bad2;
	}

	if (!inode->i_fop->sendfile) {
		ti->error = "dm-file: Need sendfile support";
		goto bad2;
	}
	if (write_access) {
		struct address_space_operations *aops = inode->i_mapping->a_ops;
		if (!aops->prepare_write || !aops->commit_write) {
			ti->error = "dm-file: Need prepare_write and commit_write support";
			goto bad2;
		}
	}

	file_size = i_size_read(inode->i_mapping->host);
	if (fc->offset > file_size) {
		ti->error = "dm-file: Offset beyond end of file";
		goto bad2;
	}

	file_size -= fc->offset;
	file_size >>= SECTOR_SHIFT;
	if (file_size < (loff_t)ti->len) {
		ti->error = "dm-file: File too small";
		goto bad2;
	}

	/*
	 * FIXME: Since we don't call dm_get_device, we
	 * have to combine the device limits ourself
	 */
	combine_default_restrictions(&ti->limits);

	inode->i_mapping->gfp_mask &= ~(__GFP_IO|__GFP_FS);

	spin_lock_init(&fc->lock);
	fc->bio_head = NULL;
	fc->bio_tail = NULL;
	init_MUTEX_LOCKED(&fc->sem);
	init_MUTEX_LOCKED(&fc->bh_mutex);

	kernel_thread(fileio_thread, fc, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);

	ti->private = fc;
	return 0;

      bad2:
	fput(fc->file);

      bad1:
	kfree(fc);
	return -EINVAL;
}

static void file_dtr(struct dm_target *ti)
{
	struct file_c *fc = (struct file_c *) ti->private;

	/*
	 * device-mapper invokes us when there are no bios left
	 * this is the termination signal for the worker thread
	 */
	up(&fc->bh_mutex);

	/* wait for worker thread to terminate */
	down(&fc->sem);

	fput(fc->file);

	kfree(fc);
}

/*
 * Queue io operation to worker thread
 */
static int file_map(struct dm_target *ti, struct bio *bio,
		      union map_info *map_context)
{
	struct file_c *fc = (struct file_c *) ti->private;

	/* queue bio */
	fileio_queue_bio(fc, bio);

	/* wake up worker thread */
	up(&fc->bh_mutex);

	/* accepted bio, don't make new request */
	return 0;
}

/*
 * FIXME: Should this function be moved to dm-table.c?
 */
static int quote_spaces(char *out, const char *in, int maxlen)
{
	char *end = out + (maxlen - 1);

	while (out < end) {
		if (!*in)
			break;

		if (isspace(*in) || *in == '\\') {
			*out++ = '\\';
			if (out >= end)
				break;
		}

		*out++ = *in++;
	} 

	*out = '\0';

	return (maxlen - 1) - (end - out);
}

static int file_status(struct dm_target *ti, status_type_t type,
                       char *result, unsigned int maxlen)
{
	struct file_c *fc = (struct file_c *) ti->private;
	char *tmp, *path;
	int offset = 0;

	switch (type) {
	case STATUSTYPE_INFO:
		result[0] = '\0';
		break;

	case STATUSTYPE_TABLE:
		tmp = kmalloc(PATH_MAX, GFP_KERNEL);
		if (tmp) {
			path = d_path(fc->file->f_dentry, fc->file->f_vfsmnt,
			              tmp, PATH_MAX);

			if (IS_ERR(path))
				DMWARN("dm-file: d_path failed %d", (int)PTR_ERR(path));
			else
				offset = quote_spaces(result, path, maxlen);

			kfree(tmp);
		} else
			DMWARN("dm-file: Cannot allocate temporary path buffer");

		if (!offset) {
			*result = '?';
			offset = 1;
		}

		snprintf(result + offset, maxlen - offset, " " OFFSET_FORMAT,
		         fc->offset);
		break;
	}
	return 0;
}

static struct target_type file_target = {
	.name   = "file",
	.module = THIS_MODULE,
	.ctr    = file_ctr,
	.dtr    = file_dtr,
	.map    = file_map,
	.status = file_status,
};

int __init dm_file_init(void)
{
	int r = dm_register_target(&file_target);

	if (r < 0)
		DMERR("file: register failed %d", r);

	return r;
}

void dm_file_exit(void)
{
	int r = dm_unregister_target(&file_target);

	if (r < 0)
		DMERR("file: unregister failed %d", r);
}

/*
 * module hooks
 */
module_init(dm_file_init)
module_exit(dm_file_exit)

MODULE_AUTHOR("Christophe Saout <christophe@saout.de>");
MODULE_DESCRIPTION(DM_NAME " target for file backed mapping");
MODULE_LICENSE("GPL");

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC] File backed target for device-mapper
  2003-07-22 21:17 [RFC] File backed target for device-mapper Christophe Saout
@ 2003-07-22 21:39 ` Christophe Saout
  2003-07-22 21:58 ` [dm-devel] " Alasdair G Kergon
  1 sibling, 0 replies; 3+ messages in thread
From: Christophe Saout @ 2003-07-22 21:39 UTC (permalink / raw)
  To: dm-devel; +Cc: linux-kernel

Am Di, 2003-07-22 um 23.17 schrieb Christophe Saout:

> I just wrote a dm target uses a file as backend instead of another block
> device.
> 
> It's heavily based on the linux 2.5 loop device (so it uses the inode
> operation sendfile for read operations and the address space operations
> prepare_write and commit_write for write operations).

Umm... lalala... (a cleanup and a *fix*) :D

--- dm-file.c.orig      2003-07-22 23:36:57.639746368 +0200
+++ dm-file.c   2003-07-22 23:37:26.203404032 +0200
@@ -87,17 +87,16 @@
 {
        char *src;
        char *dst = (char *)desc->buf;
-       unsigned long count = desc->count;
  
-       if (size > count)
-               size = count;
+       if (size > desc->count)
+               size = desc->count;
  
        src = kmap(page) + offset;
        if (src != dst) /* FIXME: is src == dst possible? */
                memcpy(dst, src, size);
        kunmap(page);
  
-       desc->count = count - size;
+       desc->count -= size;
        desc->written += size;
        (char *)desc->buf += size;
  
@@ -127,8 +126,6 @@
  
                if (r < 0)
                        break;
-
-               pos += bv->bv_len;
        }
  
        return 0;


--
Christophe Saout <christophe@saout.de>
Please avoid sending me Word or PowerPoint attachments.
See http://www.fsf.org/philosophy/no-word-attachments.html


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dm-devel] [RFC] File backed target for device-mapper
  2003-07-22 21:17 [RFC] File backed target for device-mapper Christophe Saout
  2003-07-22 21:39 ` Christophe Saout
@ 2003-07-22 21:58 ` Alasdair G Kergon
  1 sibling, 0 replies; 3+ messages in thread
From: Alasdair G Kergon @ 2003-07-22 21:58 UTC (permalink / raw)
  To: dm-devel; +Cc: linux-kernel

On Tue, Jul 22, 2003 at 11:17:39PM +0200, Christophe Saout wrote:
> I just wrote a dm target uses a file as backend instead of another block
> device.

Another suggestion:

  A target that always returns a block of zeros [or more generally,
  some other repeating pattern e.g. if the read goes beyond the end
  of your file - required to be multiple of sector size - it loops 
  round to the beginning; writes get dropped]

So you can easily create, for example a /dev/zero-like block device of
arbitrary size, which might be useful for replacing a lost disk that
contained a stripe if you want to read off the data from the other
stripes at correct offsets.

Alasdair
-- 
agk@uk.sistina.com

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2003-07-22 21:43 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-07-22 21:17 [RFC] File backed target for device-mapper Christophe Saout
2003-07-22 21:39 ` Christophe Saout
2003-07-22 21:58 ` [dm-devel] " Alasdair G Kergon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).