All of lore.kernel.org
 help / color / mirror / Atom feed
* delta filesystem prototype
@ 2009-02-28 14:42 Miklos Szeredi
  2009-02-28 17:22 ` [fuse-devel] " Goswin von Brederlow
  2009-03-03  8:31 ` hooanon05
  0 siblings, 2 replies; 51+ messages in thread
From: Miklos Szeredi @ 2009-02-28 14:42 UTC (permalink / raw)
  To: linux-fsdevel, fuse-devel

Here is my first try at a "delta" filesystem.  It takes two
directories, one of which is a read-only base, and the other is where
the differences are stored.  It stores data, metadata and directory
modifications without copying up whole files from the read-only
branch.

The layout of the delta store may look similar to the writable branch
of a union fs, but this is basically just coincidence (it was easier
to start out this way).

Currently it's implemented with fuse and it's not optimized at all, so
performance may suck in some cases.  But I think this is a useful
concept and a better model, than trying to fit writable branches into
a union filesystem.

Comments, bug reports are welcome.

Thanks,
Miklos


/*
  Delta filesystem
  Copyright (C) 2009  Miklos Szeredi <miklos@szeredi.hu>

  This program can be distributed under the terms of the GNU GPL.

  gcc -Wall `pkg-config fuse --cflags --libs` deltafs.c -o deltafs

  usage: deltafs [opts] baseroot deltaroot mountpoint
*/

#define FUSE_USE_VERSION 26
#define _GNU_SOURCE

#include <fuse.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <dirent.h>
#include <string.h>
#include <assert.h>
#include <err.h>
#include <sys/time.h>

#define BLOCK_SIZE 4096
#define BLOCK_MASK 4095ULL

#define MAGIC_SEQ "\\$@"
#define REDIR_NULL "(null)"

static char *baseroot = NULL;
static char *deltaroot = NULL;

typedef char pathstr[4096];

struct deltaheader {
	pathstr dmpath;
	pathstr ddpath;
	pathstr drpath;
	pathstr bpath;
};

static int check_exist(const char *path)
{
	int res;
	struct stat dummy;

	res = lstat(path, &dummy);
	if (res == -1) {
		if (errno == ENOENT) {
			return 0;
		} else {
			warn("lstat %s", path);
			return -EIO;
		}
	}
	return 1;
}

static void get_deltapath(const char *path, size_t len, pathstr dpath)
{
	snprintf(dpath, sizeof(pathstr), "%s%.*s", deltaroot, (int) len, path);
}

static void get_mergepath(const char *path, size_t len, pathstr mpath)
{
	snprintf(mpath, sizeof(pathstr), "%s%.*s" MAGIC_SEQ "merge",
		 deltaroot, (int) len, path);
}

static void get_redirpath(const char *path, size_t len, pathstr rpath)
{
	snprintf(rpath, sizeof(pathstr), "%s%.*s" MAGIC_SEQ "redir",
		 deltaroot, (int) len, path);
}

static void create_path(pathstr newpath, const char *path, const char *name)
{
	snprintf(newpath, sizeof(pathstr), "%s/%s", path, name);
}

static const char *last_slash(const char *s, const char *beg)
{
	do {
		s--;
	} while (*s != '/' && s > beg);

	return s;
}

static int check_redirect(const char *rpath, const char *end, size_t len,
			  pathstr bpath)
{
	int res;
	pathstr linkbuf;

	res = readlink(rpath, linkbuf, sizeof(linkbuf) - 1);
	if (res == -1) {
		if (errno != ENOENT) {
			warn("readlink %s", rpath);
			return -EIO;
		}
		return 0;
	}

	linkbuf[res] = '\0';
	if (strcmp(linkbuf, REDIR_NULL) == 0) {
		bpath[0] = '\0';
		return 1;
	}

	if (linkbuf[0] != '*') {
		warnx("invalid redirect in %s: %s\n", rpath, linkbuf);
		return -EIO;
	}
	snprintf(bpath, sizeof(pathstr), "%s%s%.*s", baseroot, linkbuf + 1,
		 (int) len, end);

	return 1;
}

static int get_basepath(struct deltaheader *head, const char *path, size_t len)
{
	int res;
	const char *s = path + strlen(path);

	res = check_redirect(head->drpath, "", 0, head->bpath);
	if (res < 0)
		return res;

	while (!res) {
		pathstr rpath;

		s = last_slash(s, path);
		if (s == path) {
			snprintf(head->bpath, sizeof(pathstr), "%s%.*s",
				 baseroot, (int) len, path);
			break;
		}

		get_redirpath(path, s - path, rpath);
		res = check_redirect(rpath, s, len - (s - path), head->bpath);
		if (res < 0)
			return res;
	}

	return 0;
}

static int get_deltaheader_len(struct deltaheader *head, const char *path,
			       unsigned int len)
{
	get_deltapath(path, len, head->ddpath);
	get_mergepath(path, len, head->dmpath);
	get_redirpath(path, len, head->drpath);

	return get_basepath(head, path, len);
}

static int get_deltaheader(struct deltaheader *head, const char *path)
{
	return get_deltaheader_len(head, path, strlen(path));
}

static int read_file(char *path, char *buf, loff_t offset, size_t size)
{
	int fd;
	int res;

	fd = open(path, O_RDONLY);
	if (fd == -1)
		return -errno;

	res = pread(fd, buf, size, offset);
	if (res == -1)
		res = -errno;

	close(fd);

	return res;
}

static int write_file(char *path, const char *buf, loff_t offset, size_t size)
{
	int fd;
	int res;

	fd = open(path, O_WRONLY);
	if (fd == -1)
		return -errno;

	res = pwrite(fd, buf, size, offset);
	if (res == -1)
		res = -errno;

	close(fd);

	return res;
}

static int is_bitmap_delta(struct deltaheader *head, off_t offset)
{
	int fd;
	int res;
	off_t index = offset >> 12;
	size_t n = index >> 3;
	unsigned char mask = 1 << (index & 7);
	unsigned char x = 0;

	fd = open(head->dmpath, O_RDONLY);
	if (fd == -1) {
		if (errno == ENOENT)
			return 0;

		warn("open %s read-only", head->dmpath);
		return -EIO;
	}
	res = pread(fd, &x, 1, n);
	if (res == -1) {
		warn("read from %s pos %zi", head->dmpath, n);
		close(fd);
		return -EIO;
	}
	close(fd);

	return (x & mask) != 0;
}

static int set_bitmap_delta(struct deltaheader *head, off_t offset)
{
	int fd;
	int res;
	off_t index = offset >> 12;
	size_t n = index >> 3;
	unsigned char mask = 1 << (index & 7);
	unsigned char x = 0;

	fd = open(head->dmpath, O_RDWR);
	if (fd == -1) {
		warn("open %s read-write", head->dmpath);
		return -EIO;
	}

	res = pread(fd, &x, 1, n);
	if (res == -1) {
		warn("read from %s pos %zi", head->dmpath, n);
		goto out_eio;
	}
	x |= mask;
	res = pwrite(fd, &x, 1, n);
	if (res == -1) {
		warn("write to %s pos %zi", head->dmpath, n);
		goto out_eio;
	}
	close(fd);

	return 0;

out_eio:
	close(fd);
	return -EIO;
}

static int read_block(struct deltaheader *head, char *buf, off_t offset, 
		      size_t size)
{
	char *xpath;
	int res;

	assert((offset & BLOCK_MASK) == 0);

	res = is_bitmap_delta(head, offset);
	if (res < 0)
		return res;

	xpath = res ? head->ddpath : head->bpath;

	if (size > BLOCK_SIZE)
		size = BLOCK_SIZE;

	res = read_file(xpath, buf, offset, size);
	if (res < 0)
		return res;

	return res;
}

static int write_block(struct deltaheader *head, const char *buf, off_t offset, 
		       size_t size)
{
	char *xpath;
	int res;
	int res2;

	assert((offset & BLOCK_MASK) == 0);

	if (size > BLOCK_SIZE)
		size = BLOCK_SIZE;

	res = write_file(head->ddpath, buf, offset, size);
	if (res < 0)
		return res;

	if (res != size) {
		warnx("short write at %lli %zibytes from %s\n",
		      (long long) offset, size, xpath);
		return -EIO;
	}

	res2 = set_bitmap_delta(head, offset);
	if (res2)
		return res2;

	return res;
}

static int extend_file(struct deltaheader *head, off_t oldsize, off_t size)
{
	int res;
	off_t offset = oldsize;

	if ((oldsize & BLOCK_MASK) != 0) {
		offset &= ~BLOCK_MASK;

		res = is_bitmap_delta(head, offset);
		if (res < 0)
			return res;

		if (!res) {
			char buf[BLOCK_SIZE];
			size_t num = oldsize - offset;

			memset(buf, 0, sizeof(buf));
			res = read_block(head, buf, offset, num);
			if (res < 0)
				return res;

			num = size - offset;
			res = write_block(head, buf, offset, num);
			if (res < 0)
				return res;
			}
		offset += BLOCK_SIZE;
	}
	while (offset < size) {
		set_bitmap_delta(head, offset);
		offset += BLOCK_SIZE;
	}
	return 0;
}


static void add_name(char ***arrayp, unsigned *nump, const char *name)
{
	unsigned num = *nump + 1;
	char **array = realloc(*arrayp, num * sizeof(char *));
	char *copy = strdup(name);

	if (array == NULL || copy == NULL)
		err(1, "memory alocation failed");

	array[num - 1] = copy;
	*nump = num;
	*arrayp = array;
}

static int check_name_array(char **array, unsigned num, const char *name)
{
	unsigned ctr;

	for (ctr = 0; ctr < num; ctr++) {
		if (strcmp(name, array[ctr]) == 0)
			return 1;
	}
	return 0;
}

static void free_names(char **array, unsigned num)
{
	unsigned ctr;

	for (ctr = 0; ctr < num; ctr++)
		free(array[ctr]);
	free(array);
}

static int set_attr(const char *path, struct stat *stbuf)
{
	int res;
	struct timespec times[2];

	res = lchown(path, stbuf->st_uid, stbuf->st_gid);
	if (res == -1) {
		warn("chown %s", path);
		return -EIO;
	}

	times[0] = stbuf->st_atim;
	times[1] = stbuf->st_mtim;
	res = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
	if (res == -1) {
		warn("utimes %s", path);
		return -EIO;
	}

	return 0;
}

static int create_merge(struct deltaheader *head)
{
	int res;

	res = mknod(head->dmpath, 0600 | S_IFREG, 0);
	if (res == -1) {
		warn("create %s", head->dmpath);
		return -EIO;
	}

	return 0;
}

static int create_parent(const char *path)
{
	int res;
	struct deltaheader head;
	const char *s = path + strlen(path);
	struct stat stbuf;

	while (1) {
		s = last_slash(s, path);
		if (s == path)
			break;

		get_deltapath(path, s - path, head.ddpath);
		res = lstat(head.ddpath, &stbuf);
		if (res == 0) {
			if (!S_ISDIR(stbuf.st_mode)) {
				warnx("%s not a directory", head.ddpath);
				return -EIO;
			}

			get_mergepath(path, s - path, head.dmpath);
			res = check_exist(head.dmpath);
			if (res < 0)
				return res;
			if (!res)
				return 0;

			break;
		} else if (errno != ENOENT) {
			warn("stat %s", path);
			return -EIO;
		}
	}

	while (1) {
		s = strchr(s + 1, '/');
		if (!s)
			break;

		res = get_deltaheader_len(&head, path, s - path);
		if (res < 0)
			return res;

		res = lstat(head.bpath, &stbuf);
		if (res == -1) {
			warn("stat %s", head.bpath);
			return -EIO;
		}
		if (!S_ISDIR(stbuf.st_mode)) {
			warnx("%s not a directory", head.bpath);
			return -EIO;
		}
		res = mkdir(head.ddpath, stbuf.st_mode);
		if (res == -1) {
			warn("mkdir %s", head.ddpath);
			return -EIO;
		}

		res = set_attr(head.ddpath, &stbuf);
		if (res < 0)
			return res;

		res = create_merge(&head);
		if (res < 0)
			return res;
	}

	return 1;
}

static int is_magic(const char *name, unsigned namelen)
{
	if (namelen <= 8)
		return 0;

	if (memcmp(name + namelen - 8, MAGIC_SEQ, 3) != 0)
		return 0;

	return 1;
}

static int is_redir(const char *name, unsigned namelen)
{
	if (!is_magic(name, namelen))
		return 0;

	return memcmp(name + namelen - 5, "redir", 5) == 0;
}

static int remove_empty_dir(struct deltaheader *head)
{
	int res;
	DIR *dp;
	struct dirent *de;

	res = check_exist(head->dmpath);
	if (res < 0)
		return res;
	if (!res)
		return 0;

	dp = opendir(head->ddpath);
	if (dp == NULL) {
		warn("opendir %s", head->ddpath);
		return -EIO;
	}

	while ((de = readdir(dp)) != NULL) {
		unsigned namelen = strlen(de->d_name);

		if (is_redir(de->d_name, namelen)) {
			pathstr linkpath;

			create_path(linkpath, head->ddpath, de->d_name);
			res = unlink(linkpath);
			if (res == -1) {
				warn("unlink %s", linkpath);
				closedir(dp);
				return -EIO;
			}
		}
	}
	closedir(dp);
	res = rmdir(head->ddpath);
	if (res == -1) {
		warn("rmdir %s", head->ddpath);
		return -EIO;
	}
	res = unlink(head->dmpath);
	if (res == -1) {
		warn("unlink %s", head->dmpath);
		return -EIO;
	}

	return 0;
}


static int remove_base(struct deltaheader *head, const char *path)
{
	int res;

	res = unlink(head->drpath);
	if (res == -1 && errno != ENOENT) {
		warn("unlink %s", head->drpath);
		return -EIO;
	}

	res = get_basepath(head, path, strlen(path));
	if (res < 0)
		return res;

	res = check_exist(head->bpath);
	if (res < 0)
		return res;

	if (res) {
		res = create_parent(path);
		if (res < 0)
			return res;

		if (res) {
			res = symlink(REDIR_NULL, head->drpath);
			if (res == -1) {
				warn("symlink %s", head->drpath);
				return -EIO;
			}
		}
	}

	return remove_empty_dir(head);
}

static int check_empty_dir(const char *path)
{
	DIR *dp;
	struct dirent *de;
	int isempty = 1;

	dp = opendir(path);
	if (dp == NULL)
		return -errno;

	while ((de = readdir(dp)) != NULL) {
		if (strcmp(de->d_name, ".") != 0 &&
		    strcmp(de->d_name, "..") != 0) {
			isempty = 0;
			break;
		}
	}
	closedir(dp);

	return isempty ? 0 : -ENOTEMPTY;
}

static int check_empty_merged(struct deltaheader *head)
{
	DIR *dp;
	struct dirent *de;
	char **narray = NULL;
	unsigned nnum = 0;
	int res = 0;

	dp = opendir(head->ddpath);
	if (dp == NULL) {
		warn("opendir %s", head->ddpath);
		return -EIO;
	}

	while ((de = readdir(dp)) != NULL) {
		unsigned namelen = strlen(de->d_name);

		if (is_redir(de->d_name, namelen)) {
			int res;
			pathstr buf;
			pathstr linkpath;

			create_path(linkpath, head->ddpath, de->d_name);
			res = readlink(linkpath, buf, sizeof(buf) - 1);
			if (res == -1) {
				warn("readlink %s", linkpath);
				res = -EIO;
				break;
			}
			buf[res] = '\0';
			de->d_name[namelen - 8] = '\0';
			if (strcmp(buf, REDIR_NULL) == 0) {
				add_name(&narray, &nnum, de->d_name);
			} else {
				res = -ENOTEMPTY;
				break;
			}
		} else if (!is_magic(de->d_name, namelen)) {
			if (strcmp(de->d_name, ".") != 0 &&
			    strcmp(de->d_name, "..") != 0) {
				res = -ENOTEMPTY;
				break;
			}
		}
	}
	closedir(dp);
	if (res < 0)
		goto out;

	dp = opendir(head->bpath);
	if (dp == NULL) {
		res = -errno;
		goto out;
	} else {
		while ((de = readdir(dp)) != NULL) {
			if (strcmp(de->d_name, ".") != 0 &&
			    strcmp(de->d_name, "..") != 0 &&
			    !check_name_array(narray, nnum, de->d_name)) {
				res = -ENOTEMPTY;
				break;
			}
		}
		closedir(dp);
	}

out:
	free_names(narray, nnum);

	return res;
}

static int copy_up(struct deltaheader *head, const char *path,
		   struct stat *stbuf)
{
	int res;

	res = create_parent(path);
	if (res < 0)
		return res;

	if (S_ISDIR(stbuf->st_mode)) {
		res = mkdir(head->ddpath, stbuf->st_mode);
		if (res == -1) {
			warn("mkdir %s", head->ddpath);
			return -EIO;
		}
	} else if (S_ISREG(stbuf->st_mode)) {
		int fd;

		fd = open(head->ddpath, O_WRONLY | O_CREAT | O_EXCL,
			  stbuf->st_mode);
		if (fd == -1) {
			warn("create %s", head->ddpath);
			return -EIO;
		}
		if (stbuf->st_size != 0)
			ftruncate(fd, stbuf->st_size);
		close(fd);
	} else if (S_ISLNK(stbuf->st_mode)) {
		pathstr linkbuf;

		res = readlink(head->bpath, linkbuf, sizeof(linkbuf) - 1);
		if (res == -1)
			return -errno;

		linkbuf[res] = '\0';

		res = symlink(linkbuf, head->ddpath);
		if (res == -1) {
			warn("symlink %s", head->ddpath);
			return -EIO;
		}
	} else {
		res = mknod(head->ddpath, stbuf->st_mode, stbuf->st_rdev);
		if (res == -1) {
			warn("mknod %s", head->ddpath);
			return -EIO;
		}
	}

	res = set_attr(head->ddpath, stbuf);
	if (res < 0)
		return res;

	res = create_merge(head);
	if (res < 0)
		return res;

	if ((!S_ISREG(stbuf->st_mode) && !S_ISDIR(stbuf->st_mode)) ||
	    (S_ISREG(stbuf->st_mode) && stbuf->st_size == 0)) {
		res = unlink(head->drpath);
		if (res == -1 && errno != ENOENT) {
			warn("unlink %s", head->drpath);
			return -EIO;
		}
	}
	return 0;
}

static int delta_getattr(const char *path, struct stat *stbuf)
{
	int res;
	struct deltaheader head;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = lstat(head.ddpath, stbuf);
	if (res == -1) {
		char *s;
		pathstr parentpath;

		if (errno != ENOENT) {
			warn("stat %s", head.ddpath);
			return -EIO;
		}
		if (!head.bpath[0])
			return -ENOENT;

		s = strrchr(path, '/');
		assert(s != NULL);
		if (s == path)
			goto check;

		get_deltapath(path, s - path, parentpath);
		res = check_exist(parentpath);
		if (res < 0)
			return res;

		if (res) {
			pathstr mpath;

			get_mergepath(path, s - path, mpath);
			res = check_exist(mpath);
			if (res < 0)
				return res;

			if (!res)
				return -ENOENT;
		}
check:
		res = lstat(head.bpath, stbuf);
	}
	if (res == -1)
		return -errno;

	return 0;
}

static int delta_readlink(const char *path, char *buf, size_t size)
{
	int res;
	struct deltaheader head;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = readlink(head.ddpath, buf, size - 1);
	if (res == -1) {
		if (errno != ENOENT) {
			warn("readlink %s", head.ddpath);
			return -EIO;
		}
		res = readlink(head.bpath, buf, size - 1);
	}
	if (res == -1)
		return -errno;

	buf[res] = '\0';
	return 0;
}

static int delta_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
			 off_t offset, struct fuse_file_info *fi)
{
	int res;
	DIR *dp;
	struct dirent *de;
	struct deltaheader head;
	char **parray = NULL;
	char **narray = NULL;
	unsigned pnum = 0;
	unsigned nnum = 0;
	unsigned ctr;
	int merge = 0;

	(void) offset;
	(void) fi;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	dp = opendir(head.ddpath);
	if (dp == NULL) {
		if (errno != ENOENT) {
			warn("opendir %s", head.ddpath);
			return -EIO;
		}
		dp = opendir(head.bpath);
		if (dp == NULL)
			return -errno;

		while ((de = readdir(dp)) != NULL) {
			if (filler(buf, de->d_name, NULL, 0))
				break;
		}
		closedir(dp);
		return 0;
	}
	if (strcmp(path, "/") == 0) {
		merge = 1;
	} else {
		res = check_exist(head.dmpath);
		if (res < 0) {
			closedir(dp);
			return res;
		}
		if (res)
			merge = 1;
	}

	while ((de = readdir(dp)) != NULL) {
		unsigned namelen = strlen(de->d_name);

		if (!is_magic(de->d_name, namelen)) {
			if (!check_name_array(parray, pnum, de->d_name))
				add_name(&parray, &pnum, de->d_name);
		} else if (is_redir(de->d_name, namelen)) {
			int res;
			pathstr buf;
			pathstr linkpath;

			create_path(linkpath, head.ddpath, de->d_name);
			res = readlink(linkpath, buf, sizeof(buf) - 1);
			if (res == -1) {
				warn("readlink %s", linkpath);
				closedir(dp);
				res = -EIO;
				goto out;
			}
			buf[res] = '\0';
			de->d_name[namelen - 8] = '\0';
			if (strcmp(buf, REDIR_NULL) == 0)
				add_name(&narray, &nnum, de->d_name);
			else if (!check_name_array(parray, pnum, de->d_name))
				add_name(&parray, &pnum, de->d_name);
		}
	}
	closedir(dp);

	res = 0;
	for (ctr = 0; ctr < pnum; ctr++) {
		if (filler(buf, parray[ctr], NULL, 0))
			goto out;
	}

	if (!merge)
		goto out;

	dp = opendir(head.bpath);
	if (dp == NULL) {
		if (errno != ENOENT) {
			warn("opendir %s", head.bpath);
			res = -EIO;
			goto out;
		}
	} else {
		while ((de = readdir(dp)) != NULL) {
			if (!check_name_array(parray, pnum, de->d_name) &&
			    !check_name_array(narray, nnum, de->d_name)) {
				if (filler(buf, de->d_name, NULL, 0))
					break;
			}
		}
		closedir(dp);
	}

out:
	free_names(parray, pnum);
	free_names(narray, nnum);

	return res;
}

static int delta_mknod(const char *path, mode_t mode, dev_t rdev)
{
	int res;
	struct deltaheader head;
	struct fuse_context *ctx;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = create_parent(path);
	if (res < 0)
		return res;

	res = mknod(head.ddpath, mode, rdev);
	if (res == -1)
		return -errno;

	ctx = fuse_get_context();
	res = lchown(head.ddpath, ctx->uid, ctx->gid);
	if (res == -1) {
		warn("chown %s", head.ddpath);
		return -EIO;
	}

	res = unlink(head.drpath);
	if (res == -1 && errno != ENOENT) {
		warn("unlink %s", head.drpath);
		return -EIO;
	}

	return 0;
}

static int delta_mkdir(const char *path, mode_t mode)
{
	int res;
	struct deltaheader head;
	struct fuse_context *ctx;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = create_parent(path);
	if (res < 0)
		return res;

	res = mkdir(head.ddpath, mode);
	if (res == -1)
		return -errno;

	ctx = fuse_get_context();
	res = lchown(head.ddpath, ctx->uid, ctx->gid);
	if (res == -1) {
		warn("chown %s", head.ddpath);
		return -EIO;
	}

	res = unlink(head.drpath);
	if (res == -1 && errno != ENOENT) {
		warn("unlink %s", head.drpath);
		return -EIO;
	}

	return 0;
}

static int delta_symlink(const char *from, const char *to)
{
	int res;
	struct deltaheader head;
	struct fuse_context *ctx;

	res = get_deltaheader(&head, to);
	if (res < 0)
		return res;

	res = create_parent(to);
	if (res < 0)
		return res;

	res = symlink(from, head.ddpath);
	if (res == -1)
		return -errno;

	ctx = fuse_get_context();
	res = lchown(head.ddpath, ctx->uid, ctx->gid);
	if (res == -1) {
		warn("chown %s", head.ddpath);
		return -EIO;
	}

	res = unlink(head.drpath);
	if (res == -1 && errno != ENOENT) {
		warn("unlink %s", head.drpath);
		return -EIO;
	}

	return 0;
}

static int delta_unlink(const char *path)
{
	int res;
	struct deltaheader head;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = unlink(head.ddpath);
	if (res != -1) {
		res = unlink(head.dmpath);
		if (res == -1) {
			if (errno != ENOENT) {
				warn("unlink %s", head.dmpath);
				return -EIO;
			}
		}
	} else if (errno != ENOENT) {
		warn("unlink %s", head.ddpath);
		return -EIO;
	}

	return remove_base(&head, path);
}

static int delta_rmdir(const char *path)
{
	int res;
	struct deltaheader head;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = rmdir(head.ddpath);
	if (res != -1) {
		res = unlink(head.dmpath);
		if (res == -1) {
			if (errno != ENOENT) {
				warn("unlink %s", head.dmpath);
				return -EIO;
			}
		} else {
			res = check_empty_dir(head.bpath);
			if (res < 0)
				return res;
		}
	} else if (errno == ENOENT) {
		res = check_empty_dir(head.bpath);
		if (res < 0)
			return res;
	} else if (errno == ENOTEMPTY) {
		res = check_exist(head.dmpath);
		if (res < 0)
			return res;
		if (!res)
			return -ENOTEMPTY;

		res = check_empty_merged(&head);
		if (res < 0)
			return res;
	} else {
		warn("rmdir %s", head.ddpath);
		return -EIO;
	}

	return remove_base(&head, path);
}

static int delta_rename(const char *from, const char *to)
{
	int res;
	struct deltaheader hfrom;
	struct deltaheader hto;
	int need_redir = 1;
	struct stat stbuf;

	res = get_deltaheader(&hfrom, from);
	if (res < 0)
		return res;

	res = get_deltaheader(&hto, to);
	if (res < 0)
		return res;

	res = lstat(hto.ddpath, &stbuf);
	if (res == -1) {
		if (errno != ENOENT) {
			warn("stat %s", hto.ddpath);
			return -EIO;
		}
		res = lstat(hto.bpath, &stbuf);
		if (res == -1) {
			if (errno != ENOENT) {
				warn("stat %s", hto.bpath);
				return -EIO;
			}
		} else {
			if (S_ISDIR(stbuf.st_mode)) {
				res = check_empty_dir(hto.bpath);
				if (res < 0)
					return res;
			}
		}
	} else {
		if (S_ISDIR(stbuf.st_mode)) {
			res = check_exist(hto.dmpath);
			if (res < 0)
				return res;

			if (res) {
				res = check_empty_merged(&hto);
				if (res < 0)
					return res;

				res = remove_empty_dir(&hto);
				if (res < 0)
					return res;
			}
		}
	}

	res = create_parent(to);
	if (res < 0)
		return res;

	res = rename(hfrom.ddpath, hto.ddpath);
	if (res != -1) {
		res = rename(hfrom.dmpath, hto.dmpath);
		if (res == -1) {
			if (errno != ENOENT) {
				warn("rename %s %s", hfrom.dmpath, hto.dmpath);
				return -EIO;
			}
			need_redir = 0;
		}
	} else if (errno == ENOENT) {
		res = unlink(hto.ddpath);
		if (res == -1) {
			if (errno == EISDIR) {
				res = rmdir(hto.ddpath);
				if (res == -1) {
					if (errno == ENOTEMPTY)
						return -ENOTEMPTY;
					warn("rmdir %s", hto.ddpath);
					return -EIO;
				}
			} else if (errno != ENOENT) {
				warn("unlink %s", hto.ddpath);
				return -EIO;
			}
		}
	} else if (errno == ENOTEMPTY) {
		return -ENOTEMPTY;
	} else {
		warn("rename %s %s", hfrom.ddpath, hto.ddpath);
		return -EIO;
	}

	if (need_redir) {
		res = rename(hfrom.drpath, hto.drpath);
		if (res == -1) {
			pathstr link;

			if (errno != ENOENT) {
				warn("rename %s %s", hfrom.drpath, hto.drpath);
				return -EIO;
			}

			res = unlink(hto.drpath);
			if (res == -1 && errno != ENOENT) {
				warn("unlink %s", hto.drpath);
				return -EIO;
			}

			snprintf(link, sizeof(pathstr), "*%s",
				 hfrom.bpath + strlen(baseroot));
			res = symlink(link, hto.drpath);
			if (res == -1) {
				warn("symlink %s", hto.drpath);
				return -EIO;
			}
		}
	}

	return remove_base(&hfrom, from);
}

static int delta_chmod(const char *path, mode_t mode)
{
	int res;
	struct deltaheader head;
	struct stat stbuf;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = chmod(head.ddpath, mode);
	if (res == 0)
		return 0;

	if (errno != ENOENT) {
		warn("chmod %s", head.ddpath);
		return -EIO;
	}
	res = lstat(head.bpath, &stbuf);
	if (res == -1)
		return -errno;

	mode &= 07777;
	if ((stbuf.st_mode & 07777) == mode)
		return 0;

	stbuf.st_mode &= S_IFMT;
	stbuf.st_mode |= mode;

	return copy_up(&head, path, &stbuf);
}

static int delta_chown(const char *path, uid_t uid, gid_t gid)
{
	int res;
	struct deltaheader head;
	struct stat stbuf;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = lchown(head.ddpath, uid, gid);
	if (res == 0)
		return 0;

	if (errno != ENOENT) {
		warn("chown %s", head.ddpath);
		return -EIO;
	}
	res = lstat(head.bpath, &stbuf);
	if (res == -1)
		return -errno;

	if ((uid == -1 || stbuf.st_uid == uid) && 
	    (gid == -1 || stbuf.st_gid == gid))
		return 0;

	if (uid != -1)
		stbuf.st_uid = uid;
	if (gid != -1)
		stbuf.st_gid = gid;

	return copy_up(&head, path, &stbuf);
}

static int delta_truncate(const char *path, off_t size)
{
	int res;
	struct deltaheader head;
	struct stat stbuf;
	off_t oldsize;
	int need_copy = 0;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = lstat(head.ddpath, &stbuf);
	if (res == -1) {
		if (errno != ENOENT) {
			warn("stat %s", head.ddpath);
			return -EIO;
		}
		res = lstat(head.bpath, &stbuf);
		if (res == -1)
			return -errno;

		if (size == stbuf.st_size)
			return 0;

		oldsize = stbuf.st_size;
		stbuf.st_size = size;
		res = copy_up(&head, path, &stbuf);
		if (res < 0)
			return res;

		need_copy = 1;
	} else {
		oldsize = stbuf.st_size;

		res = truncate(head.ddpath, size);
		if (res == -1) {
			warn("truncate %s", head.ddpath);
			return -EIO;
		}

		if (size == 0) {
			res = unlink(head.dmpath);
			if (res == -1 && errno != ENOENT) {
				warn("unlink %s", head.dmpath);
				return -EIO;
			}
			res = unlink(head.drpath);
			if (res == -1 && errno != ENOENT) {
				warn("unlink %s", head.drpath);
				return -EIO;
			}
		}
	}


	if (size <= oldsize)
		return 0;

	if (!need_copy) {
		res = check_exist(head.dmpath);
		if (res < 0)
			return res;

		if (!res)
			return 0;
	}

	res = extend_file(&head, oldsize, size);
	if (res < 0)
		return res;

	return 0;
}

static int delta_utimens(const char *path, const struct timespec ts[2])
{
	int res;
	struct deltaheader head;
	struct stat stbuf;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = utimensat(AT_FDCWD, head.ddpath, ts, AT_SYMLINK_NOFOLLOW);
	if (res == 0)
		return 0;

	if (errno != ENOENT) {
		warn("utimes %s", head.ddpath);
		return -EIO;
	}
	res = lstat(head.bpath, &stbuf);
	if (res == -1)
		return -errno;

	if ((ts[0].tv_nsec == UTIME_OMIT ||
	     (ts[0].tv_nsec == stbuf.st_atim.tv_nsec &&
	      ts[0].tv_sec == stbuf.st_atim.tv_sec)) ||
	    (ts[1].tv_nsec == UTIME_OMIT ||
	     (ts[1].tv_nsec == stbuf.st_mtim.tv_nsec &&
	      ts[1].tv_sec == stbuf.st_mtim.tv_sec)))
		return 0;

	if (ts[0].tv_nsec != UTIME_OMIT)
		stbuf.st_atim = ts[0];
	if (ts[1].tv_nsec != UTIME_OMIT)
		stbuf.st_mtim = ts[1];

	return copy_up(&head, path, &stbuf);
}

static int delta_read(const char *path, char *buf, size_t size, off_t offset,
		    struct fuse_file_info *fi)
{
	int res;
	size_t num = 0;
	struct deltaheader head;

	(void) fi;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = check_exist(head.ddpath);
	if (res < 0)
		return res;

	if (!res)
		return read_file(head.bpath, buf, offset, size);

	res = check_exist(head.dmpath);
	if (res < 0)
		return res;
	if (!res)
		return read_file(head.ddpath, buf, offset, size);

	while (size) {
		res = read_block(&head, buf, offset, size);
		if (res < 0)
			break;

		num += res;
		offset += res;
		buf += res;
		size -= res;

		if (res != BLOCK_SIZE)
			break;
	}

	return num ? num : res;
}

static int delta_write(const char *path, const char *buf, size_t size,
		     off_t offset, struct fuse_file_info *fi)
{
	int res;
	struct deltaheader head;
	struct stat stbuf;
	off_t blkoffset;

	res = get_deltaheader(&head, path);
	if (res < 0)
		return res;

	res = lstat(head.ddpath, &stbuf);
	if (res == -1) {
		if (errno != ENOENT) {
			warn("stat %s", head.ddpath);
			return -EIO;
		}
		res = lstat(head.bpath, &stbuf);
		if (res == -1)
			return -errno;

		res = copy_up(&head, path, &stbuf);
		if (res < 0)
			return res;
	} else {
		res = check_exist(head.dmpath);
		if (res < 0)
			return res;
		if (!res)
			return write_file(head.ddpath, buf, offset, size);
	}

	if (offset > stbuf.st_size) {
		res = extend_file(&head, stbuf.st_size, offset);
		if (res < 0)
			return res;
	}

	assert((offset & BLOCK_MASK) + size <= BLOCK_SIZE);

	if ((offset & BLOCK_MASK) == 0 && size == BLOCK_SIZE)
		return write_block(&head, buf, offset, size);

	blkoffset = offset & ~BLOCK_MASK;
	res = is_bitmap_delta(&head, blkoffset);
	if (res < 0)
		return res;

	if (!res) {
		char blkbuf[BLOCK_SIZE];
		size_t num = stbuf.st_size - blkoffset;
		off_t newsize;

		res = read_block(&head, blkbuf, blkoffset, num);
		if (res < 0)
			return res;

		memcpy(blkbuf + (offset & BLOCK_MASK), buf, size);

		newsize = stbuf.st_size;
		if (offset + size > newsize)
			newsize = offset + size;
		num = newsize - blkoffset;
		res = write_block(&head, blkbuf, blkoffset, num);
		if (res < 0)
			return res;
	} else {
		res = write_file(head.ddpath, buf, offset, size);
		if (res < 0)
			return res;
	}

	return size;
}

static struct fuse_operations delta_oper = {
	.getattr	= delta_getattr,
	.readlink	= delta_readlink,
	.readdir	= delta_readdir,
	.mknod		= delta_mknod,
	.mkdir		= delta_mkdir,
	.symlink	= delta_symlink,
	.unlink		= delta_unlink,
	.rmdir		= delta_rmdir,
	.rename		= delta_rename,
	.chmod		= delta_chmod,
	.chown		= delta_chown,
	.truncate	= delta_truncate,
	.utimens	= delta_utimens,
	.read		= delta_read,
	.write		= delta_write,
};

static int delta_opt_proc(void *data, const char *arg, int key,
			  struct fuse_args *outargs)
{
	(void) data;
	(void) outargs;

	if (key == FUSE_OPT_KEY_NONOPT) {
		if (baseroot == NULL) {
			baseroot = strdup(arg);
			assert(baseroot != NULL);
			return 0;
		} else if (deltaroot == NULL) {
			deltaroot = strdup(arg);
			assert(deltaroot != NULL);
			return 0;
		}
	}
	return 1;
}

int main(int argc, char *argv[])
{
	int res;
	struct fuse_args args = FUSE_ARGS_INIT(argc, argv);

	umask(0);
	res = fuse_opt_parse(&args, NULL, NULL, delta_opt_proc);
	if (res == -1)
		exit(1);
	fuse_opt_add_arg(&args, "-s");
	fuse_opt_add_arg(&args, "-oallow_other,default_permissions");
	if (res == -1)
		exit(1);

	if (baseroot == NULL || deltaroot == NULL) {
		fprintf(stderr,
			"usage: %s [opts] baseroot deltaroot mountpoint\n",
			argv[0]);
		exit(1);
	}

	res = fuse_main(args.argc, args.argv, &delta_oper, NULL);
	fuse_opt_free_args(&args);

	return res;
}

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-02-28 14:42 delta filesystem prototype Miklos Szeredi
@ 2009-02-28 17:22 ` Goswin von Brederlow
  2009-03-01  0:38   ` Bernd Schubert
  2009-03-03  8:31 ` hooanon05
  1 sibling, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-02-28 17:22 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, fuse-devel

Miklos Szeredi <miklos@szeredi.hu> writes:

> Here is my first try at a "delta" filesystem.  It takes two
> directories, one of which is a read-only base, and the other is where
> the differences are stored.  It stores data, metadata and directory
> modifications without copying up whole files from the read-only
> branch.
>
> The layout of the delta store may look similar to the writable branch
> of a union fs, but this is basically just coincidence (it was easier
> to start out this way).
>
> Currently it's implemented with fuse and it's not optimized at all, so
> performance may suck in some cases.  But I think this is a useful
> concept and a better model, than trying to fit writable branches into
> a union filesystem.
>
> Comments, bug reports are welcome.
>
> Thanks,
> Miklos

Wouldn't it make more sense to start with unionfs-fuse and add a delta
feature to it? unionfs-fuse already has all you need except that it
will copy the whole file (if on a read-only branch) on write.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-02-28 17:22 ` [fuse-devel] " Goswin von Brederlow
@ 2009-03-01  0:38   ` Bernd Schubert
  2009-03-01 10:17     ` Goswin von Brederlow
  0 siblings, 1 reply; 51+ messages in thread
From: Bernd Schubert @ 2009-03-01  0:38 UTC (permalink / raw)
  To: fuse-devel; +Cc: Goswin von Brederlow, Miklos Szeredi, linux-fsdevel

On Saturday 28 February 2009, Goswin von Brederlow wrote:
> Miklos Szeredi <miklos@szeredi.hu> writes:
> > Here is my first try at a "delta" filesystem.  It takes two
> > directories, one of which is a read-only base, and the other is where
> > the differences are stored.  It stores data, metadata and directory
> > modifications without copying up whole files from the read-only
> > branch.
> >
> > The layout of the delta store may look similar to the writable branch
> > of a union fs, but this is basically just coincidence (it was easier
> > to start out this way).
> >
> > Currently it's implemented with fuse and it's not optimized at all, so
> > performance may suck in some cases.  But I think this is a useful
> > concept and a better model, than trying to fit writable branches into
> > a union filesystem.
> >
> > Comments, bug reports are welcome.
> >
> > Thanks,
> > Miklos
>
> Wouldn't it make more sense to start with unionfs-fuse and add a delta
> feature to it? unionfs-fuse already has all you need except that it
> will copy the whole file (if on a read-only branch) on write.

Well yes, but it would need to be configurable by the user. IMMHO, the 'delta' 
ansatz has a big problem - what happens if the admin decides to modify the 
underlying ro-branch, which is a distribution chroot seen by all clients as 
their '/'? Any time files may be modified or even deleted on this branch when 
the admin does an update.
This is also the exactly the problem why none of the kernel unionfs 
implementation fits my needs and why I started to work on unionfs-fuse. Only, 
the delta approach makes it even worse ;)


Cheers,
Bernd

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-01  0:38   ` Bernd Schubert
@ 2009-03-01 10:17     ` Goswin von Brederlow
  2009-03-04 11:21       ` Miklos Szeredi
  0 siblings, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-01 10:17 UTC (permalink / raw)
  To: Bernd Schubert
  Cc: fuse-devel, Goswin von Brederlow, Miklos Szeredi, linux-fsdevel

Bernd Schubert <bs_lists@aakef.fastmail.fm> writes:

> On Saturday 28 February 2009, Goswin von Brederlow wrote:
>> Miklos Szeredi <miklos@szeredi.hu> writes:
>> > Here is my first try at a "delta" filesystem.  It takes two
>> > directories, one of which is a read-only base, and the other is where
>> > the differences are stored.  It stores data, metadata and directory
>> > modifications without copying up whole files from the read-only
>> > branch.
>> >
>> > The layout of the delta store may look similar to the writable branch
>> > of a union fs, but this is basically just coincidence (it was easier
>> > to start out this way).
>> >
>> > Currently it's implemented with fuse and it's not optimized at all, so
>> > performance may suck in some cases.  But I think this is a useful
>> > concept and a better model, than trying to fit writable branches into
>> > a union filesystem.
>> >
>> > Comments, bug reports are welcome.
>> >
>> > Thanks,
>> > Miklos
>>
>> Wouldn't it make more sense to start with unionfs-fuse and add a delta
>> feature to it? unionfs-fuse already has all you need except that it
>> will copy the whole file (if on a read-only branch) on write.
>
> Well yes, but it would need to be configurable by the user. IMMHO, the 'delta' 
> ansatz has a big problem - what happens if the admin decides to modify the 
> underlying ro-branch, which is a distribution chroot seen by all clients as 
> their '/'? Any time files may be modified or even deleted on this branch when 
> the admin does an update.
> This is also the exactly the problem why none of the kernel unionfs 
> implementation fits my needs and why I started to work on unionfs-fuse. Only, 
> the delta approach makes it even worse ;)
>
>
> Cheers,
> Bernd

Obviously with delta storage the underlying branches really MUST be
read-only. As for configuration I see no problem in clasifying a
branch as RO, RW or D. If the user wants a modifiable RO branch then
he can not have any D branch. Easy enough.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: delta filesystem prototype
  2009-02-28 14:42 delta filesystem prototype Miklos Szeredi
  2009-02-28 17:22 ` [fuse-devel] " Goswin von Brederlow
@ 2009-03-03  8:31 ` hooanon05
  2009-03-03 10:59   ` [fuse-devel] " Goswin von Brederlow
  1 sibling, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-03  8:31 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, fuse-devel


Miklos Szeredi:
> Here is my first try at a "delta" filesystem.  It takes two
	:::
> Comments, bug reports are welcome.

As I wrote before, it is unclear how do you implment mmap.
I am afrid you need to copyup the entire file to support MAP_SHARED.

Since you don't care the inum, the hardlink will not work correctly. For
instance,
$ cd /base
$ echo a > a
$ ln a b
$ cd /deltafs
$ echo a >> a
$ cat b

By the way, are you going to review the aufs2-tmp-ro branch which I made
it responding you.


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03  8:31 ` hooanon05
@ 2009-03-03 10:59   ` Goswin von Brederlow
  2009-03-03 13:11     ` hooanon05
  0 siblings, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-03 10:59 UTC (permalink / raw)
  To: hooanon05; +Cc: Miklos Szeredi, linux-fsdevel, fuse-devel

hooanon05@yahoo.co.jp writes:

> Miklos Szeredi:
>> Here is my first try at a "delta" filesystem.  It takes two
> 	:::
>> Comments, bug reports are welcome.
>
> As I wrote before, it is unclear how do you implment mmap.
> I am afrid you need to copyup the entire file to support MAP_SHARED.

Why? When a page is accessed the filesystem gets a read request and
reads it either from the RO branch or the delta branch. Why should it
need to copy up the full file?

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03 10:59   ` [fuse-devel] " Goswin von Brederlow
@ 2009-03-03 13:11     ` hooanon05
  2009-03-03 15:27       ` Dave Kleikamp
  2009-03-04 11:49       ` Goswin von Brederlow
  0 siblings, 2 replies; 51+ messages in thread
From: hooanon05 @ 2009-03-03 13:11 UTC (permalink / raw)
  To: Goswin von Brederlow; +Cc: Miklos Szeredi, linux-fsdevel, fuse-devel


Goswin von Brederlow:
> Why? When a page is accessed the filesystem gets a read request and
> reads it either from the RO branch or the delta branch. Why should it
> need to copy up the full file?

For example,
- you have two mmap for a single file
- the first mapping is done, it may map the file on the lower rdonly
  layer
- the other mapping modifies the contents
- can the first mapping see the new content?

Of course, it may depend on the implementation.


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03 13:11     ` hooanon05
@ 2009-03-03 15:27       ` Dave Kleikamp
  2009-03-03 15:50         ` hooanon05
  2009-03-04 11:49       ` Goswin von Brederlow
  1 sibling, 1 reply; 51+ messages in thread
From: Dave Kleikamp @ 2009-03-03 15:27 UTC (permalink / raw)
  To: hooanon05; +Cc: Goswin von Brederlow, Miklos Szeredi, linux-fsdevel, fuse-devel

On Tue, 2009-03-03 at 22:11 +0900, hooanon05@yahoo.co.jp wrote:
> Goswin von Brederlow:
> > Why? When a page is accessed the filesystem gets a read request and
> > reads it either from the RO branch or the delta branch. Why should it
> > need to copy up the full file?
> 
> For example,
> - you have two mmap for a single file
> - the first mapping is done, it may map the file on the lower rdonly
>   layer
> - the other mapping modifies the contents
> - can the first mapping see the new content?

The lower read-only file would not appear to user-space as the same file
at all.  It would have its own address space.  It clearly would not see
the new content.

There wouldn't be any problem with reading pages from the lower file for
the upper file one at a time as they are faulted.

Shaggy
-- 
David Kleikamp
IBM Linux Technology Center


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03 15:27       ` Dave Kleikamp
@ 2009-03-03 15:50         ` hooanon05
  2009-03-03 15:54           ` Dave Kleikamp
  0 siblings, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-03 15:50 UTC (permalink / raw)
  To: Dave Kleikamp
  Cc: Goswin von Brederlow, Miklos Szeredi, linux-fsdevel, fuse-devel


Dave Kleikamp:
> The lower read-only file would not appear to user-space as the same file
> at all.  It would have its own address space.  It clearly would not see
> the new content.
> 
> There wouldn't be any problem with reading pages from the lower file for
> the upper file one at a time as they are faulted.

I am afraid that I cannot understand fully what you wrote (due to my
poor English).
Do you mean that the entire file copyup will be necessary?


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03 15:50         ` hooanon05
@ 2009-03-03 15:54           ` Dave Kleikamp
  2009-03-03 16:02             ` hooanon05
  0 siblings, 1 reply; 51+ messages in thread
From: Dave Kleikamp @ 2009-03-03 15:54 UTC (permalink / raw)
  To: hooanon05; +Cc: Goswin von Brederlow, Miklos Szeredi, linux-fsdevel, fuse-devel

On Wed, 2009-03-04 at 00:50 +0900, hooanon05@yahoo.co.jp wrote:
> Dave Kleikamp:
> > The lower read-only file would not appear to user-space as the same file
> > at all.  It would have its own address space.  It clearly would not see
> > the new content.
> > 
> > There wouldn't be any problem with reading pages from the lower file for
> > the upper file one at a time as they are faulted.
> 
> I am afraid that I cannot understand fully what you wrote (due to my
> poor English).
> Do you mean that the entire file copyup will be necessary?

No.  I was saying the opposite.  Nothing that happens to the upper
address space would be visible to the lower address space.  The upper
file could read from the lower file system on-demand as pages are
faulted.  There is no need to copy everything at once.

Shaggy
-- 
David Kleikamp
IBM Linux Technology Center


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03 15:54           ` Dave Kleikamp
@ 2009-03-03 16:02             ` hooanon05
  2009-03-03 16:14               ` Dave Kleikamp
  0 siblings, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-03 16:02 UTC (permalink / raw)
  To: Dave Kleikamp
  Cc: Goswin von Brederlow, Miklos Szeredi, linux-fsdevel, fuse-devel


Dave Kleikamp:
> No.  I was saying the opposite.  Nothing that happens to the upper
> address space would be visible to the lower address space.  The upper
> file could read from the lower file system on-demand as pages are
> faulted.  There is no need to copy everything at once.

So you mean,
- you have two mmap for a single file
- the first mapping is done, it may map the file on the lower rdonly
  layer
- the other mapping modifies the contents
- when a page in the first mapping accessed again, the page is read from
  the upper layer.
Right?


J. R. Okajima


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03 16:02             ` hooanon05
@ 2009-03-03 16:14               ` Dave Kleikamp
  2009-03-03 16:19                 ` hooanon05
  0 siblings, 1 reply; 51+ messages in thread
From: Dave Kleikamp @ 2009-03-03 16:14 UTC (permalink / raw)
  To: hooanon05; +Cc: Goswin von Brederlow, Miklos Szeredi, linux-fsdevel, fuse-devel

On Wed, 2009-03-04 at 01:02 +0900, hooanon05@yahoo.co.jp wrote:
> Dave Kleikamp:
> > No.  I was saying the opposite.  Nothing that happens to the upper
> > address space would be visible to the lower address space.  The upper
> > file could read from the lower file system on-demand as pages are
> > faulted.  There is no need to copy everything at once.
> 
> So you mean,
> - you have two mmap for a single file

Explain what you mean by a single file.  If there are two mmaps to the
upper file, they will see the same changes.  If one is to the upper
file, and one is to the lower file, they will NOT be mmap'ed to the same
file.

> - the first mapping is done, it may map the file on the lower rdonly
>   layer

This mapping will only ever see the lower contents

> - the other mapping modifies the contents

The upper mapping will contain data pages with modified content.  Only
those pages accessed will be copied (if necessary) from the lower file.

> - when a page in the first mapping accessed again, the page is read from
>   the upper layer.

No.  The first mapping is not even aware of the second mapping.  It
continues to see the read-only data

> Right?
No

Shaggy
-- 
David Kleikamp
IBM Linux Technology Center


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03 16:14               ` Dave Kleikamp
@ 2009-03-03 16:19                 ` hooanon05
  2009-03-03 16:46                   ` Dave Kleikamp
  0 siblings, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-03 16:19 UTC (permalink / raw)
  To: Dave Kleikamp
  Cc: Goswin von Brederlow, Miklos Szeredi, linux-fsdevel, fuse-devel


Dave Kleikamp:
> > - you have two mmap for a single file
> 
> Explain what you mean by a single file.  If there are two mmaps to the
> upper file, they will see the same changes.  If one is to the upper
> file, and one is to the lower file, they will NOT be mmap'ed to the same
> file.

A regular file on the lower readonly layer. Not a single block/page is
copied-up yet.


> No.  The first mapping is not even aware of the second mapping.  It
> continues to see the read-only data

Should the first mapping see the latest (modified) content?


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03 16:19                 ` hooanon05
@ 2009-03-03 16:46                   ` Dave Kleikamp
  2009-03-03 17:13                     ` hooanon05
  0 siblings, 1 reply; 51+ messages in thread
From: Dave Kleikamp @ 2009-03-03 16:46 UTC (permalink / raw)
  To: hooanon05; +Cc: Goswin von Brederlow, Miklos Szeredi, linux-fsdevel, fuse-devel

On Wed, 2009-03-04 at 01:19 +0900, hooanon05@yahoo.co.jp wrote:
> Dave Kleikamp:
> > > - you have two mmap for a single file
> > 
> > Explain what you mean by a single file.  If there are two mmaps to the
> > upper file, they will see the same changes.  If one is to the upper
> > file, and one is to the lower file, they will NOT be mmap'ed to the same
> > file.
> 
> A regular file on the lower readonly layer. Not a single block/page is
> copied-up yet.

I understand the file physically resides on the lower layer.  The delta
file system will presents a new file on a different path that initially
has the contents of the lower file.  If you are mmapping the file
presented by the delta files system, then both mmaps will see the same
modified data.

Any mmaps to the original path of the read-only file system will not see
any modified data.

dd if=/dev/zero of=/ro/a bs=4096 count=1000
# /ro/a contains 1000 pages of zeros
mount -o remount,ro /ro # make sure it's read-only
mount -t deltafs /ro /rw
 mmap(addr1, "/ro/a", ...);
 mmap(addr2, "/rw/a", ...);
mmap(addr3, "/rw/a", ...);

If you modify the file through addr3, addr2 will see the changes.  the
address space for /rw/a is shared.  However, addr1 will only see the
read-only data.  /ro/a will not be modified.

deltafs (does this filesystem have a name yet?) does not need to copy
all 1000 pages from the lower file.  It can read from the lower file as
needed when a page is accessed.  The modified data will be present in
the upper address space's page cache.  I assume it will eventually be
written as a "delta" in the upper file system.

> > No.  The first mapping is not even aware of the second mapping.  It
> > continues to see the read-only data
> 
> Should the first mapping see the latest (modified) content?

If you're asking about the case of addr2 and addr3 above, then yes.  For
addr1, no.

-- 
David Kleikamp
IBM Linux Technology Center


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03 16:46                   ` Dave Kleikamp
@ 2009-03-03 17:13                     ` hooanon05
  2009-03-04 11:52                       ` Goswin von Brederlow
  0 siblings, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-03 17:13 UTC (permalink / raw)
  To: Dave Kleikamp
  Cc: Goswin von Brederlow, Miklos Szeredi, linux-fsdevel, fuse-devel


Dave Kleikamp:
> I understand the file physically resides on the lower layer.  The delta
> file system will presents a new file on a different path that initially
> has the contents of the lower file.  If you are mmapping the file
> presented by the delta files system, then both mmaps will see the same
> modified data.

Ok, I will read fuse (instead of deltafs.c) again.
Thank you.


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-01 10:17     ` Goswin von Brederlow
@ 2009-03-04 11:21       ` Miklos Szeredi
  2009-03-04 14:12         ` Goswin von Brederlow
  0 siblings, 1 reply; 51+ messages in thread
From: Miklos Szeredi @ 2009-03-04 11:21 UTC (permalink / raw)
  To: goswin-v-b; +Cc: bs_lists, fuse-devel, goswin-v-b, miklos, linux-fsdevel

On Sun, 01 Mar 2009, Goswin von Brederlow wrote:
> Obviously with delta storage the underlying branches really MUST be
> read-only. As for configuration I see no problem in clasifying a
> branch as RO, RW or D. If the user wants a modifiable RO branch then
> he can not have any D branch. Easy enough.

Not necessarily.  A delta filesystem could support automatic or manual
merges very well.  This is not really possible with a writable union.

Miklos

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03 13:11     ` hooanon05
  2009-03-03 15:27       ` Dave Kleikamp
@ 2009-03-04 11:49       ` Goswin von Brederlow
  1 sibling, 0 replies; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-04 11:49 UTC (permalink / raw)
  To: hooanon05; +Cc: Goswin von Brederlow, Miklos Szeredi, linux-fsdevel, fuse-devel

hooanon05@yahoo.co.jp writes:

> Goswin von Brederlow:
>> Why? When a page is accessed the filesystem gets a read request and
>> reads it either from the RO branch or the delta branch. Why should it
>> need to copy up the full file?
>
> For example,
> - you have two mmap for a single file

On open you have to create an internal FD structure that keeps track
of the delta informtaions and the read-only and delta filedescriptors
(one or both can be -1). If the same file is opened again you have to
use the same internal FD structure so both share the same delta
informations.

> - the first mapping is done, it may map the file on the lower rdonly
>   layer
> - the other mapping modifies the contents
> - can the first mapping see the new content?

Both mmaps would be to the internal FD and thus share any change made
by one of them.

> Of course, it may depend on the implementation.

Sure. If you don't create an internal FD structure and share it
between open calls things will break. Not just mmap.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-03 17:13                     ` hooanon05
@ 2009-03-04 11:52                       ` Goswin von Brederlow
  2009-03-04 14:10                         ` Dave Kleikamp
  0 siblings, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-04 11:52 UTC (permalink / raw)
  To: hooanon05
  Cc: Dave Kleikamp, Goswin von Brederlow, Miklos Szeredi,
	linux-fsdevel, fuse-devel

hooanon05@yahoo.co.jp writes:

> Dave Kleikamp:
>> I understand the file physically resides on the lower layer.  The delta
>> file system will presents a new file on a different path that initially
>> has the contents of the lower file.  If you are mmapping the file
>> presented by the delta files system, then both mmaps will see the same
>> modified data.
>
> Ok, I will read fuse (instead of deltafs.c) again.
> Thank you.
>
>
> J. R. Okajima

By the way. If you mmap a file in fuse shared twice wouldn't the
kernel share the physical pages. So the first access calls fuse and
loads the data into memory, the second access would just get the same
physical page mapped. Right?

MfG
        Goswin


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-04 11:52                       ` Goswin von Brederlow
@ 2009-03-04 14:10                         ` Dave Kleikamp
  2009-03-04 16:23                           ` hooanon05
  0 siblings, 1 reply; 51+ messages in thread
From: Dave Kleikamp @ 2009-03-04 14:10 UTC (permalink / raw)
  To: Goswin von Brederlow; +Cc: hooanon05, Miklos Szeredi, linux-fsdevel, fuse-devel

On Wed, 2009-03-04 at 12:52 +0100, Goswin von Brederlow wrote:
> hooanon05@yahoo.co.jp writes:
> 
> > Dave Kleikamp:
> >> I understand the file physically resides on the lower layer.  The delta
> >> file system will presents a new file on a different path that initially
> >> has the contents of the lower file.  If you are mmapping the file
> >> presented by the delta files system, then both mmaps will see the same
> >> modified data.
> >
> > Ok, I will read fuse (instead of deltafs.c) again.
> > Thank you.
> >
> >
> > J. R. Okajima
> 
> By the way. If you mmap a file in fuse shared twice wouldn't the
> kernel share the physical pages. So the first access calls fuse and
> loads the data into memory, the second access would just get the same
> physical page mapped. Right?

Right.  Some of my comments were a result of not being clear on what J.
R. meant when he asked about the first mapping being to the lower file.
Both mmaps to the same file in fuse would access the same physical
pages.

Shaggy
-- 
David Kleikamp
IBM Linux Technology Center


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-04 11:21       ` Miklos Szeredi
@ 2009-03-04 14:12         ` Goswin von Brederlow
  2009-03-05 13:06           ` Miklos Szeredi
  0 siblings, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-04 14:12 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: goswin-v-b, bs_lists, fuse-devel, linux-fsdevel

Miklos Szeredi <miklos@szeredi.hu> writes:

> On Sun, 01 Mar 2009, Goswin von Brederlow wrote:
>> Obviously with delta storage the underlying branches really MUST be
>> read-only. As for configuration I see no problem in clasifying a
>> branch as RO, RW or D. If the user wants a modifiable RO branch then
>> he can not have any D branch. Easy enough.
>
> Not necessarily.  A delta filesystem could support automatic or manual
> merges very well.  This is not really possible with a writable union.
>
> Miklos

Bernd and I ment the following scenario:

/dev/sda1 /union/read-only
tmpfs     /union/read-write

with a delta-fs merging the two. Then running "echo foo >
/union/read-only/path/file" could be desasterous to your data.

The underlying branch must not change on its own. The delta-fs could
change it on command but that wasn't what we where thinking of.

MfG
        Goswin


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-04 14:10                         ` Dave Kleikamp
@ 2009-03-04 16:23                           ` hooanon05
  0 siblings, 0 replies; 51+ messages in thread
From: hooanon05 @ 2009-03-04 16:23 UTC (permalink / raw)
  To: Dave Kleikamp
  Cc: Goswin von Brederlow, Miklos Szeredi, linux-fsdevel, fuse-devel


Dave Kleikamp:
> On Wed, 2009-03-04 at 12:52 +0100, Goswin von Brederlow wrote:
	:::
> > By the way. If you mmap a file in fuse shared twice wouldn't the
> > kernel share the physical pages. So the first access calls fuse and
> > loads the data into memory, the second access would just get the same
> > physical page mapped. Right?
> 
> Right.  Some of my comments were a result of not being clear on what J.
> R. meant when he asked about the first mapping being to the lower file.
> Both mmaps to the same file in fuse would access the same physical
> pages.

I didn't know how fuse implements mmap and sharing pages. Additionally
there is no mmap code in deltafs.c. So I wrote "it is unclear how do you
implment mmap".

Now I am reading fuse (instead of deltafs.c) and begin understanding
that fuse and deltafs work as you expected. But the implementation seems
different a little to me.
These two mappings are not sharing memory pages but keeping the latest
contents by re-reading, do they?


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-04 14:12         ` Goswin von Brederlow
@ 2009-03-05 13:06           ` Miklos Szeredi
  2009-03-05 19:58             ` Goswin von Brederlow
  0 siblings, 1 reply; 51+ messages in thread
From: Miklos Szeredi @ 2009-03-05 13:06 UTC (permalink / raw)
  To: goswin-v-b; +Cc: miklos, goswin-v-b, bs_lists, fuse-devel, linux-fsdevel

On Wed, 04 Mar 2009, Goswin von Brederlow wrote:
> Bernd and I ment the following scenario:
> 
> /dev/sda1 /union/read-only
> tmpfs     /union/read-write
> 
> with a delta-fs merging the two. Then running "echo foo >
> /union/read-only/path/file" could be desasterous to your data.

Well, if the writable branch is really meant to be a clone of the
underlying fs, then yes.  But writable unions are _not_ clones either,
very far from that.

> The underlying branch must not change on its own. The delta-fs could
> change it on command but that wasn't what we where thinking of.

Delta-fs could deal with changes in the underlying filesystem, in some
cases much better than unionfs (for example if the file was renamed in
the delta-fs, and it was later modified in the underlying fs).

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-05 13:06           ` Miklos Szeredi
@ 2009-03-05 19:58             ` Goswin von Brederlow
  2009-03-06  4:10               ` hooanon05
  2009-03-06 11:35               ` Miklos Szeredi
  0 siblings, 2 replies; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-05 19:58 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: goswin-v-b, bs_lists, fuse-devel, linux-fsdevel

Miklos Szeredi <miklos@szeredi.hu> writes:

> On Wed, 04 Mar 2009, Goswin von Brederlow wrote:
>> Bernd and I ment the following scenario:
>> 
>> /dev/sda1 /union/read-only
>> tmpfs     /union/read-write
>> 
>> with a delta-fs merging the two. Then running "echo foo >
>> /union/read-only/path/file" could be desasterous to your data.
>
> Well, if the writable branch is really meant to be a clone of the
> underlying fs, then yes.  But writable unions are _not_ clones either,
> very far from that.

The problem is that /delta-fs/path/file would suddenly be a composite
of the new file /union/read-only/path/file and any stored delta
information in /union/read-write/path/file of the old file.

In unionfs-fuse files are currently always completly copy-up-ed when
modified. There a change of /union/read-only/path/file will give the
new file if it wasn't modified or the old modified file if it was. But
never mix the two.

>> The underlying branch must not change on its own. The delta-fs could
>> change it on command but that wasn't what we where thinking of.
>
> Delta-fs could deal with changes in the underlying filesystem, in some
> cases much better than unionfs (for example if the file was renamed in
> the delta-fs, and it was later modified in the underlying fs).
>
> Thanks,
> Miklos

Ugh, no. The delta-fs has no way of knowing when a file in a branch
will be changed outside of delta-fs. And it would have to know that
before the change so it can copy-up relevant data before they change.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-05 19:58             ` Goswin von Brederlow
@ 2009-03-06  4:10               ` hooanon05
  2009-03-06 12:37                 ` Goswin von Brederlow
  2009-03-06 11:35               ` Miklos Szeredi
  1 sibling, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-06  4:10 UTC (permalink / raw)
  To: Goswin von Brederlow; +Cc: Miklos Szeredi, bs_lists, fuse-devel, linux-fsdevel


Goswin von Brederlow:
> The problem is that /delta-fs/path/file would suddenly be a composite
> of the new file /union/read-only/path/file and any stored delta
> information in /union/read-write/path/file of the old file.

Yes.
And I am afraid stat.st_blocks will be incorrect too. But it may be a
smaller problem for who think there is another bigger advantage. It
might be just a trade off.


J. R. Okajima


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-05 19:58             ` Goswin von Brederlow
  2009-03-06  4:10               ` hooanon05
@ 2009-03-06 11:35               ` Miklos Szeredi
  2009-03-06 12:50                 ` Goswin von Brederlow
  2009-03-07  1:19                 ` hooanon05
  1 sibling, 2 replies; 51+ messages in thread
From: Miklos Szeredi @ 2009-03-06 11:35 UTC (permalink / raw)
  To: goswin-v-b; +Cc: miklos, goswin-v-b, bs_lists, fuse-devel, linux-fsdevel

On Thu, 05 Mar 2009, Goswin von Brederlow wrote:
> Miklos Szeredi <miklos@szeredi.hu> writes:
> 
> > On Wed, 04 Mar 2009, Goswin von Brederlow wrote:
> >> Bernd and I ment the following scenario:
> >> 
> >> /dev/sda1 /union/read-only
> >> tmpfs     /union/read-write
> >> 
> >> with a delta-fs merging the two. Then running "echo foo >
> >> /union/read-only/path/file" could be desasterous to your data.
> >
> > Well, if the writable branch is really meant to be a clone of the
> > underlying fs, then yes.  But writable unions are _not_ clones either,
> > very far from that.
> 
> The problem is that /delta-fs/path/file would suddenly be a composite
> of the new file /union/read-only/path/file and any stored delta
> information in /union/read-write/path/file of the old file.

It can detect changes to the underlying file from the modification
time (which it can store together with the delta).

But having data deltas are in fact not even that interesting.  Files
are not often modified without being completely rewritten.  Appending:
that happens, but again that can be handled in an intelligent way by
the delta layer.

The most interesting is the directory and metadata deltas, which do
make a delta-fs like implementation much more effective and nicer as a
dumb union type filesystem.  Mind, unionfs and aufs are rapidly
acquiring non-union traits, like inode number storage, virtual hard
links (not to speak of whiteouts).  Which makes them all the more
hackish, I much prefer a conceptually clean solution.

> In unionfs-fuse files are currently always completly copy-up-ed when
> modified. There a change of /union/read-only/path/file will give the
> new file if it wasn't modified or the old modified file if it was. But
> never mix the two.

So?  Some datasets may reside in multiple files, and inconsistencies
could just as as well present themselves in that case.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-06  4:10               ` hooanon05
@ 2009-03-06 12:37                 ` Goswin von Brederlow
  2009-03-07  1:16                   ` hooanon05
  0 siblings, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-06 12:37 UTC (permalink / raw)
  To: hooanon05; +Cc: bs_lists, fuse-devel, linux-fsdevel

hooanon05@yahoo.co.jp writes:

> Goswin von Brederlow:
>> The problem is that /delta-fs/path/file would suddenly be a composite
>> of the new file /union/read-only/path/file and any stored delta
>> information in /union/read-write/path/file of the old file.
>
> Yes.
> And I am afraid stat.st_blocks will be incorrect too. But it may be a
> smaller problem for who think there is another bigger advantage. It
> might be just a trade off.
>
>
> J. R. Okajima

I could think of three possible values:

1) the sum of the st_blocks of both branches (total space used)
2) min(size/blocksize, sum of st_blocks) (appox. what the file would use)
3) st_bocks of read-write branch (read-write space used only)

I could think of a use for any one of them. Although the 3rd can be
done by "du /union/read-write" if there is an identity mapping between
filenames.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-06 11:35               ` Miklos Szeredi
@ 2009-03-06 12:50                 ` Goswin von Brederlow
  2009-03-06 13:21                   ` Miklos Szeredi
  2009-03-07  1:19                 ` hooanon05
  1 sibling, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-06 12:50 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: goswin-v-b, bs_lists, fuse-devel, linux-fsdevel

Miklos Szeredi <miklos@szeredi.hu> writes:

> On Thu, 05 Mar 2009, Goswin von Brederlow wrote:
>> Miklos Szeredi <miklos@szeredi.hu> writes:
>> 
>> > On Wed, 04 Mar 2009, Goswin von Brederlow wrote:
>> >> Bernd and I ment the following scenario:
>> >> 
>> >> /dev/sda1 /union/read-only
>> >> tmpfs     /union/read-write
>> >> 
>> >> with a delta-fs merging the two. Then running "echo foo >
>> >> /union/read-only/path/file" could be desasterous to your data.
>> >
>> > Well, if the writable branch is really meant to be a clone of the
>> > underlying fs, then yes.  But writable unions are _not_ clones either,
>> > very far from that.
>> 
>> The problem is that /delta-fs/path/file would suddenly be a composite
>> of the new file /union/read-only/path/file and any stored delta
>> information in /union/read-write/path/file of the old file.
>
> It can detect changes to the underlying file from the modification
> time (which it can store together with the delta).

But then what do you do? You don't know what changed and it is to late
to recover any data that might be lost.

> But having data deltas are in fact not even that interesting.  Files
> are not often modified without being completely rewritten.  Appending:
> that happens, but again that can be handled in an intelligent way by
> the delta layer.
>
> The most interesting is the directory and metadata deltas, which do
> make a delta-fs like implementation much more effective and nicer as a
> dumb union type filesystem.  Mind, unionfs and aufs are rapidly
> acquiring non-union traits, like inode number storage, virtual hard
> links (not to speak of whiteouts).  Which makes them all the more
> hackish, I much prefer a conceptually clean solution.

That is certainly something for unionfs-fuse as far as it isn't done
already. Doesn't even need any delta algorithm, just seperate storage
of metadata and normal data. Iirc Bernd already did look into
preserving inodes in unionfs-fuse.

>> In unionfs-fuse files are currently always completly copy-up-ed when
>> modified. There a change of /union/read-only/path/file will give the
>> new file if it wasn't modified or the old modified file if it was. But
>> never mix the two.
>
> So?  Some datasets may reside in multiple files, and inconsistencies
> could just as as well present themselves in that case.
>
> Thanks,
> Miklos

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-06 12:50                 ` Goswin von Brederlow
@ 2009-03-06 13:21                   ` Miklos Szeredi
  2009-03-07  8:56                     ` Goswin von Brederlow
  0 siblings, 1 reply; 51+ messages in thread
From: Miklos Szeredi @ 2009-03-06 13:21 UTC (permalink / raw)
  To: goswin-v-b; +Cc: miklos, fuse-devel, linux-fsdevel

On Fri, 06 Mar 2009, Goswin von Brederlow wrote:
> > It can detect changes to the underlying file from the modification
> > time (which it can store together with the delta).
> 
> But then what do you do? You don't know what changed and it is to late
> to recover any data that might be lost.

No data has been lost.  Or rather the data has been lost, when the
underlying filesystem was modified.  Oh, you didn't want that data to
be lost?  Why did you overwrite it then?  See?

All the data is there: the changes you made to the file are there in
the delta, and the underlying file is still there too, we just didn't
preserve the old version, but this is not what deltafs is about, and
it's not what unionfs is about either.

What to do about such a situation?  It's a policy decision, it could

 - return the underlying file and ignore the delta
 - ignore the changes made to the underlying file and mechanically
   merge the delta (possibly resulting in inconsistencies).
 - return an error (EIO or ESTALE)

In addition the user might have the option to merge the delta by hand,
this should be much easier than trying to merge two files, where you
know _nothing_ about the changes themselves.

In fact the delta format might not even want to implement data deltas,
or they could be made optional, or configurable per file.  Whatever.

> > The most interesting is the directory and metadata deltas, which do
> > make a delta-fs like implementation much more effective and nicer as a
> > dumb union type filesystem.  Mind, unionfs and aufs are rapidly
> > acquiring non-union traits, like inode number storage, virtual hard
> > links (not to speak of whiteouts).  Which makes them all the more
> > hackish, I much prefer a conceptually clean solution.
> 
> That is certainly something for unionfs-fuse as far as it isn't done
> already. Doesn't even need any delta algorithm, just seperate storage
> of metadata and normal data. Iirc Bernd already did look into
> preserving inodes in unionfs-fuse.

At which point it's less of a union and more of a delta.  A union is
taking two ordinary plain filesystems and merging them according to
some rules.  A delta is having a plain filesystem and merging a
special format one into this.  See where I'm going?

My main question in all this is: does it make sense to keep the union
semantics and tack on various features into separate storage?  Or is
it better to abandon the unioning altogether in favor of a more
efficient delta format?

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-06 12:37                 ` Goswin von Brederlow
@ 2009-03-07  1:16                   ` hooanon05
  2009-03-07  9:01                     ` Goswin von Brederlow
  0 siblings, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-07  1:16 UTC (permalink / raw)
  To: Goswin von Brederlow; +Cc: bs_lists, fuse-devel, linux-fsdevel


Goswin von Brederlow:
> I could think of three possible values:
> 
> 1) the sum of the st_blocks of both branches (total space used)
> 2) min(size/blocksize, sum of st_blocks) (appox. what the file would use)
> 3) st_bocks of read-write branch (read-write space used only)

4) calculate st_blocks by merging based upon bitmap (dmpath)

Some applications may not like st_blksize either since it may be
different per block.

One more difficulty (hard to support issue).
- open a file on deltafs
- unlink it
- rmdir its parent
- write or fchmod to it
- rewind+read or mmap+read from the opened file
- cat it be read correctly?


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-06 11:35               ` Miklos Szeredi
  2009-03-06 12:50                 ` Goswin von Brederlow
@ 2009-03-07  1:19                 ` hooanon05
  2009-03-07  9:03                   ` Goswin von Brederlow
  1 sibling, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-07  1:19 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: goswin-v-b, bs_lists, fuse-devel, linux-fsdevel


Miklos Szeredi:
> The most interesting is the directory and metadata deltas, which do
> make a delta-fs like implementation much more effective and nicer as a
> dumb union type filesystem.  Mind, unionfs and aufs are rapidly
> acquiring non-union traits, like inode number storage, virtual hard
> links (not to speak of whiteouts).  Which makes them all the more
> hackish, I much prefer a conceptually clean solution.

I agree that the delta is a good approach. But it is for filedata only.
As I wrote in another mail, how do you support hardlinks on the lower
readonly layer?


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-06 13:21                   ` Miklos Szeredi
@ 2009-03-07  8:56                     ` Goswin von Brederlow
  0 siblings, 0 replies; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-07  8:56 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: goswin-v-b, fuse-devel, linux-fsdevel

Miklos Szeredi <miklos@szeredi.hu> writes:

> On Fri, 06 Mar 2009, Goswin von Brederlow wrote:
>> > The most interesting is the directory and metadata deltas, which do
>> > make a delta-fs like implementation much more effective and nicer as a
>> > dumb union type filesystem.  Mind, unionfs and aufs are rapidly
>> > acquiring non-union traits, like inode number storage, virtual hard
>> > links (not to speak of whiteouts).  Which makes them all the more
>> > hackish, I much prefer a conceptually clean solution.
>> 
>> That is certainly something for unionfs-fuse as far as it isn't done
>> already. Doesn't even need any delta algorithm, just seperate storage
>> of metadata and normal data. Iirc Bernd already did look into
>> preserving inodes in unionfs-fuse.
>
> At which point it's less of a union and more of a delta.  A union is
> taking two ordinary plain filesystems and merging them according to
> some rules.  A delta is having a plain filesystem and merging a
> special format one into this.  See where I'm going?
>
> My main question in all this is: does it make sense to keep the union
> semantics and tack on various features into separate storage?  Or is
> it better to abandon the unioning altogether in favor of a more
> efficient delta format?
>
> Thanks,
> Miklos

The union should be transparent to the user and applications. The
problem is that some application do look at the inode number,
m/a/ctime and so on. So to be transparent the metadata has to be
handled in a union as well. If you want to call it a delta filesystem
then fine. I would expect a delta filesystem to do deltaing of file
content though, not just be able to track metadata changes seperate
from file data. But that is nothing to fight over.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-07  1:16                   ` hooanon05
@ 2009-03-07  9:01                     ` Goswin von Brederlow
  2009-03-07  9:12                       ` hooanon05
  0 siblings, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-07  9:01 UTC (permalink / raw)
  To: hooanon05; +Cc: Goswin von Brederlow, bs_lists, fuse-devel, linux-fsdevel

hooanon05@yahoo.co.jp writes:

> Goswin von Brederlow:
>> I could think of three possible values:
>> 
>> 1) the sum of the st_blocks of both branches (total space used)
>> 2) min(size/blocksize, sum of st_blocks) (appox. what the file would use)
>> 3) st_bocks of read-write branch (read-write space used only)
>
> 4) calculate st_blocks by merging based upon bitmap (dmpath)
>
> Some applications may not like st_blksize either since it may be
> different per block.
>
> One more difficulty (hard to support issue).
> - open a file on deltafs
> - unlink it
> - rmdir its parent
> - write or fchmod to it
> - rewind+read or mmap+read from the opened file
> - cat it be read correctly?
>
>
> J. R. Okajima

For unlink and rmdir you need whiteout files.

For all the rest you just have to keep the file descriptor(s) alive as
long as the file is opened and cache metadat in the internal FD
structure.

It isn't trivial but it isn't really hard.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-07  1:19                 ` hooanon05
@ 2009-03-07  9:03                   ` Goswin von Brederlow
  2009-03-07  9:16                     ` hooanon05
  0 siblings, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-07  9:03 UTC (permalink / raw)
  To: hooanon05; +Cc: Miklos Szeredi, goswin-v-b, bs_lists, fuse-devel, linux-fsdevel

hooanon05@yahoo.co.jp writes:

> Miklos Szeredi:
>> The most interesting is the directory and metadata deltas, which do
>> make a delta-fs like implementation much more effective and nicer as a
>> dumb union type filesystem.  Mind, unionfs and aufs are rapidly
>> acquiring non-union traits, like inode number storage, virtual hard
>> links (not to speak of whiteouts).  Which makes them all the more
>> hackish, I much prefer a conceptually clean solution.
>
> I agree that the delta is a good approach. But it is for filedata only.
> As I wrote in another mail, how do you support hardlinks on the lower
> readonly layer?
>
>
> J. R. Okajima

Use a filename -> inode indirection and delta based on inode
numbers. Although the you also have to consider the device id in case
there are multiple filesystem mounted in your read-only branch. So
filename -> (dev, inode).

MfG
        Goswin



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-07  9:01                     ` Goswin von Brederlow
@ 2009-03-07  9:12                       ` hooanon05
  2009-03-09 12:21                         ` Goswin von Brederlow
  0 siblings, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-07  9:12 UTC (permalink / raw)
  To: Goswin von Brederlow; +Cc: bs_lists, fuse-devel, linux-fsdevel


Goswin von Brederlow:
> For unlink and rmdir you need whiteout files.
> 
> For all the rest you just have to keep the file descriptor(s) alive as
> long as the file is opened and cache metadat in the internal FD
> structure.
> 
> It isn't trivial but it isn't really hard.

Generally it is true.
Actually deltafs already has something like whiteout. It is a symlink to
"(null)".
When a user writes something to the file after unlink+rmdir, where can
deltafs copyup? At least in the current implementation, there is no
place for it.


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-07  9:03                   ` Goswin von Brederlow
@ 2009-03-07  9:16                     ` hooanon05
  2009-03-09 12:28                       ` Goswin von Brederlow
  0 siblings, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-07  9:16 UTC (permalink / raw)
  To: Goswin von Brederlow; +Cc: Miklos Szeredi, bs_lists, fuse-devel, linux-fsdevel


Goswin von Brederlow:
> > Miklos Szeredi:
> >> The most interesting is the directory and metadata deltas, which do
> >> make a delta-fs like implementation much more effective and nicer as a
> >> dumb union type filesystem.  Mind, unionfs and aufs are rapidly
> >> acquiring non-union traits, like inode number storage, virtual hard
> >> links (not to speak of whiteouts).  Which makes them all the more
> >> hackish, I much prefer a conceptually clean solution.
	:::
> Use a filename -> inode indirection and delta based on inode
> numbers. Although the you also have to consider the device id in case
> there are multiple filesystem mounted in your read-only branch. So
> filename -> (dev, inode).

Agreed.
While Miklos seems to dislike the inum table, it is necessary I think.


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-07  9:12                       ` hooanon05
@ 2009-03-09 12:21                         ` Goswin von Brederlow
  2009-03-09 13:35                           ` hooanon05
  0 siblings, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-09 12:21 UTC (permalink / raw)
  To: hooanon05; +Cc: Goswin von Brederlow, bs_lists, fuse-devel, linux-fsdevel

hooanon05@yahoo.co.jp writes:

> Goswin von Brederlow:
>> For unlink and rmdir you need whiteout files.
>> 
>> For all the rest you just have to keep the file descriptor(s) alive as
>> long as the file is opened and cache metadat in the internal FD
>> structure.
>> 
>> It isn't trivial but it isn't really hard.
>
> Generally it is true.
> Actually deltafs already has something like whiteout. It is a symlink to
> "(null)".
> When a user writes something to the file after unlink+rmdir, where can
> deltafs copyup? At least in the current implementation, there is no
> place for it.
>
>
> J. R. Okajima

In the delta branch create a meta/ and files/ directory. In the meta/
directory you keep whiteout files and stat updates. In files/ you
store only changes in the file data itself.

So "rm foo/bar/baz" will create meta/foo/bar/baz.whiteout. Then "echo
blafase >>foo/bar/baz" first searches for a file to copy-up, sees the
meta/foo/bar/baz.whiteout and knows the file was deleted. It then
creates a new files/foo/bar/baz. It might have to copy-up foo and
foo/bar for that though.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-07  9:16                     ` hooanon05
@ 2009-03-09 12:28                       ` Goswin von Brederlow
  2009-03-09 13:36                         ` hooanon05
  2009-03-09 14:13                         ` Nikolaus Rath
  0 siblings, 2 replies; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-09 12:28 UTC (permalink / raw)
  To: hooanon05
  Cc: Goswin von Brederlow, Miklos Szeredi, bs_lists, fuse-devel,
	linux-fsdevel

hooanon05@yahoo.co.jp writes:

> Goswin von Brederlow:
>> > Miklos Szeredi:
>> >> The most interesting is the directory and metadata deltas, which do
>> >> make a delta-fs like implementation much more effective and nicer as a
>> >> dumb union type filesystem.  Mind, unionfs and aufs are rapidly
>> >> acquiring non-union traits, like inode number storage, virtual hard
>> >> links (not to speak of whiteouts).  Which makes them all the more
>> >> hackish, I much prefer a conceptually clean solution.
> 	:::
>> Use a filename -> inode indirection and delta based on inode
>> numbers. Although the you also have to consider the device id in case
>> there are multiple filesystem mounted in your read-only branch. So
>> filename -> (dev, inode).
>
> Agreed.
> While Miklos seems to dislike the inum table, it is necessary I think.
>
>
> J. R. Okajima

Only if you want to fully support hardlinks. Do you know of anything
that really needs true hardlinks?

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-09 12:21                         ` Goswin von Brederlow
@ 2009-03-09 13:35                           ` hooanon05
  2009-03-09 14:22                             ` Goswin von Brederlow
  0 siblings, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-09 13:35 UTC (permalink / raw)
  To: Goswin von Brederlow; +Cc: bs_lists, fuse-devel, linux-fsdevel


Goswin von Brederlow:
> hooanon05@yahoo.co.jp writes:
	::
> > When a user writes something to the file after unlink+rmdir, where can
> > deltafs copyup? At least in the current implementation, there is no
> > place for it.
	:::
> In the delta branch create a meta/ and files/ directory. In the meta/
> directory you keep whiteout files and stat updates. In files/ you
> store only changes in the file data itself.
> 
> So "rm foo/bar/baz" will create meta/foo/bar/baz.whiteout. Then "echo
> blafase >>foo/bar/baz" first searches for a file to copy-up, sees the
> meta/foo/bar/baz.whiteout and knows the file was deleted. It then
> creates a new files/foo/bar/baz. It might have to copy-up foo and
> foo/bar for that though.

??
What I am pointing out is systemcall level operation instead of command
level.
In your example (or implementation approach), can deltafs successfully
operate "write(2) to and read(2) from foo/bar/baz" after "rm -r foo/bar"?

(from my previous mail)
----------------------------------------------------------------------
- open a file on deltafs
- unlink it
- rmdir its parent
- write or fchmod to it
- rewind+read or mmap+read from the opened file
- cat it be read correctly?
----------------------------------------------------------------------


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-09 12:28                       ` Goswin von Brederlow
@ 2009-03-09 13:36                         ` hooanon05
  2009-03-09 14:25                           ` Goswin von Brederlow
  2009-03-12  9:19                           ` Tomas M
  2009-03-09 14:13                         ` Nikolaus Rath
  1 sibling, 2 replies; 51+ messages in thread
From: hooanon05 @ 2009-03-09 13:36 UTC (permalink / raw)
  To: Goswin von Brederlow; +Cc: Miklos Szeredi, bs_lists, fuse-devel, linux-fsdevel


Goswin von Brederlow:
> Only if you want to fully support hardlinks. Do you know of anything
> that really needs true hardlinks?

Do you expect me the name of an application in real world?
I don't know. But I believe when fileA and fileB are hardlinked, users
and applications expect they always provide same filedata.
If deltafs never support hardlink, then I will say nothing about it.


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: delta filesystem prototype
  2009-03-09 12:28                       ` Goswin von Brederlow
  2009-03-09 13:36                         ` hooanon05
@ 2009-03-09 14:13                         ` Nikolaus Rath
  1 sibling, 0 replies; 51+ messages in thread
From: Nikolaus Rath @ 2009-03-09 14:13 UTC (permalink / raw)
  To: fuse-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
  Cc: linux-fsdevel-u79uwXL29TY76Z2rM5mHXA

Goswin von Brederlow <goswin-v-b@web.de> writes:
> hooanon05@yahoo.co.jp writes:
>
>> Goswin von Brederlow:
>>> > Miklos Szeredi:
>>> >> The most interesting is the directory and metadata deltas, which do
>>> >> make a delta-fs like implementation much more effective and nicer as a
>>> >> dumb union type filesystem.  Mind, unionfs and aufs are rapidly
>>> >> acquiring non-union traits, like inode number storage, virtual hard
>>> >> links (not to speak of whiteouts).  Which makes them all the more
>>> >> hackish, I much prefer a conceptually clean solution.
>> 	:::
>>> Use a filename -> inode indirection and delta based on inode
>>> numbers. Although the you also have to consider the device id in case
>>> there are multiple filesystem mounted in your read-only branch. So
>>> filename -> (dev, inode).
>>
>> Agreed.
>> While Miklos seems to dislike the inum table, it is necessary I think.
>
> Only if you want to fully support hardlinks. Do you know of anything
> that really needs true hardlinks?

storeBackup, for example (but I guess that's nothing you'd run on a
unionfs). 


Best,

   -Nikolaus

-- 
 »Time flies like an arrow, fruit flies like a Banana.«

  PGP fingerprint: 5B93 61F8 4EA2 E279 ABF6  02CF A9AD B7F8 AE4E 425C


------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
fuse-devel mailing list
fuse-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/fuse-devel

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-09 13:35                           ` hooanon05
@ 2009-03-09 14:22                             ` Goswin von Brederlow
  2009-03-09 15:25                               ` hooanon05
  2009-03-09 16:36                               ` Miklos Szeredi
  0 siblings, 2 replies; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-09 14:22 UTC (permalink / raw)
  To: hooanon05; +Cc: bs_lists, fuse-devel, linux-fsdevel

hooanon05@yahoo.co.jp writes:

> Goswin von Brederlow:
>> hooanon05@yahoo.co.jp writes:
> 	::
>> > When a user writes something to the file after unlink+rmdir, where can
>> > deltafs copyup? At least in the current implementation, there is no
>> > place for it.
> 	:::
>> In the delta branch create a meta/ and files/ directory. In the meta/
>> directory you keep whiteout files and stat updates. In files/ you
>> store only changes in the file data itself.
>> 
>> So "rm foo/bar/baz" will create meta/foo/bar/baz.whiteout. Then "echo
>> blafase >>foo/bar/baz" first searches for a file to copy-up, sees the
>> meta/foo/bar/baz.whiteout and knows the file was deleted. It then
>> creates a new files/foo/bar/baz. It might have to copy-up foo and
>> foo/bar for that though.
>
> ??
> What I am pointing out is systemcall level operation instead of command
> level.
> In your example (or implementation approach), can deltafs successfully
> operate "write(2) to and read(2) from foo/bar/baz" after "rm -r foo/bar"?
>
> (from my previous mail)
> ----------------------------------------------------------------------
> - open a file on deltafs
> - unlink it
> - rmdir its parent
> - write or fchmod to it
> - rewind+read or mmap+read from the opened file
> - cat it be read correctly?
> ----------------------------------------------------------------------
>
>
> J. R. Okajima

As I said, keep the FDs open. In case of the split meta/files dirs you
would have orig_fd, meta_fd and delta_fd. And make read/write operate
on fi->fh only, not the path to the file. That way the delta-fs will
operate like any other userspace program doing read/write after an
unlink.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-09 13:36                         ` hooanon05
@ 2009-03-09 14:25                           ` Goswin von Brederlow
  2009-03-09 15:20                             ` hooanon05
  2009-03-12  9:19                           ` Tomas M
  1 sibling, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-09 14:25 UTC (permalink / raw)
  To: hooanon05
  Cc: Goswin von Brederlow, Miklos Szeredi, bs_lists, fuse-devel,
	linux-fsdevel

hooanon05@yahoo.co.jp writes:

> Goswin von Brederlow:
>> Only if you want to fully support hardlinks. Do you know of anything
>> that really needs true hardlinks?
>
> Do you expect me the name of an application in real world?
> I don't know. But I believe when fileA and fileB are hardlinked, users
> and applications expect they always provide same filedata.
> If deltafs never support hardlink, then I will say nothing about it.
>
>
> J. R. Okajima

Only place I use hardlinks is between binaries in a debian package and
backups. In both cases they are totaly read-only and only a space
saver. Not so that changes to one file will also show up in the other.

I can't think of anything that would break if hardlinks would have a
copy-on-write/copy-up semantic in delta-fs.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-09 14:25                           ` Goswin von Brederlow
@ 2009-03-09 15:20                             ` hooanon05
  2009-03-10  8:06                               ` Goswin von Brederlow
  0 siblings, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-09 15:20 UTC (permalink / raw)
  To: Goswin von Brederlow; +Cc: Miklos Szeredi, bs_lists, fuse-devel, linux-fsdevel


Goswin von Brederlow:
> I can't think of anything that would break if hardlinks would have a
> copy-on-write/copy-up semantic in delta-fs.

What do you expect to the last "cat b"?
(from my old mail)
----------------------------------------------------------------------
Since you don't care the inum, the hardlink will not work correctly. For
instance,
$ cd /base
$ echo a > a
$ ln a b
$ cd /deltafs
$ echo a >> a
$ cat b
----------------------------------------------------------------------


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-09 14:22                             ` Goswin von Brederlow
@ 2009-03-09 15:25                               ` hooanon05
  2009-03-10  8:14                                 ` Goswin von Brederlow
  2009-03-09 16:36                               ` Miklos Szeredi
  1 sibling, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-09 15:25 UTC (permalink / raw)
  To: Goswin von Brederlow; +Cc: bs_lists, fuse-devel, linux-fsdevel


Goswin von Brederlow:
> As I said, keep the FDs open. In case of the split meta/files dirs you
> would have orig_fd, meta_fd and delta_fd. And make read/write operate

So your approach is that the files under meta/ and delta/ will be
created and opened in open(2) to deltafs...
While I am afraid it is not effective, I think I could understand your
approach.


J. R. Okajima


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-09 14:22                             ` Goswin von Brederlow
  2009-03-09 15:25                               ` hooanon05
@ 2009-03-09 16:36                               ` Miklos Szeredi
  1 sibling, 0 replies; 51+ messages in thread
From: Miklos Szeredi @ 2009-03-09 16:36 UTC (permalink / raw)
  To: goswin-v-b; +Cc: hooanon05, fuse-devel, linux-fsdevel

On Mon, 09 Mar 2009, Goswin von Brederlow wrote:
> hooanon05@yahoo.co.jp writes:
> 
> > Goswin von Brederlow:
> > (from my previous mail)
> > ----------------------------------------------------------------------
> > - open a file on deltafs
> > - unlink it
> > - rmdir its parent
> > - write or fchmod to it
> > - rewind+read or mmap+read from the opened file
> > - cat it be read correctly?
> > ----------------------------------------------------------------------
> >
> >
> > J. R. Okajima
> 
> As I said, keep the FDs open. In case of the split meta/files dirs you
> would have orig_fd, meta_fd and delta_fd. And make read/write operate
> on fi->fh only, not the path to the file. That way the delta-fs will
> operate like any other userspace program doing read/write after an
> unlink.

This is just an implementation detail.  In fact the current prototype
*does* support unlinked but still open files, but it does it
differently (with the help of the fuse library's hide-on-unlink
feature).

Miklos

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-09 15:20                             ` hooanon05
@ 2009-03-10  8:06                               ` Goswin von Brederlow
  2009-03-10  8:44                                 ` hooanon05
  0 siblings, 1 reply; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-10  8:06 UTC (permalink / raw)
  To: hooanon05; +Cc: Miklos Szeredi, bs_lists, fuse-devel, linux-fsdevel

hooanon05@yahoo.co.jp writes:

> Goswin von Brederlow:
>> I can't think of anything that would break if hardlinks would have a
>> copy-on-write/copy-up semantic in delta-fs.
>
> What do you expect to the last "cat b"?
> (from my old mail)
> ----------------------------------------------------------------------
> Since you don't care the inum, the hardlink will not work correctly. For
> instance,
> $ cd /base
> $ echo a > a
> $ ln a b
> $ cd /deltafs
> $ echo a >> a
> $ cat b
> ----------------------------------------------------------------------

The wrong thing, just 'a'. But the question remains: What software
breaks? I can live with this shortcomming.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-09 15:25                               ` hooanon05
@ 2009-03-10  8:14                                 ` Goswin von Brederlow
  0 siblings, 0 replies; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-10  8:14 UTC (permalink / raw)
  To: hooanon05; +Cc: Goswin von Brederlow, bs_lists, fuse-devel, linux-fsdevel

hooanon05@yahoo.co.jp writes:

> Goswin von Brederlow:
>> As I said, keep the FDs open. In case of the split meta/files dirs you
>> would have orig_fd, meta_fd and delta_fd. And make read/write operate
>
> So your approach is that the files under meta/ and delta/ will be
> created and opened in open(2) to deltafs...
> While I am afraid it is not effective, I think I could understand your
> approach.
>
>
> J. R. Okajima

At a minimum I would open the file on the read-only branch unless the
file is already changed. You can create the meta/ and files/ FDs as
needed in any callback that modifies them. If the file is deleted at
that point you can create a dummy file and delete it while keeping the
FD. Just always creating the meta/ and files/ entries when a file is
opened for write seems simpler though and you need them anyway when
the first write call comes. If you track atime then you even need the
meta/ all the time.

But that is really justan implementation detail. It is how i would do it.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-10  8:06                               ` Goswin von Brederlow
@ 2009-03-10  8:44                                 ` hooanon05
  2009-03-12  9:22                                   ` Tomas M
  0 siblings, 1 reply; 51+ messages in thread
From: hooanon05 @ 2009-03-10  8:44 UTC (permalink / raw)
  To: Goswin von Brederlow; +Cc: Miklos Szeredi, bs_lists, fuse-devel, linux-fsdevel


Goswin von Brederlow:
> The wrong thing, just 'a'. But the question remains: What software
> breaks? I can live with this shortcomming.

Then you won't meet the problem.


J. R. Okajima

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-09 13:36                         ` hooanon05
  2009-03-09 14:25                           ` Goswin von Brederlow
@ 2009-03-12  9:19                           ` Tomas M
  1 sibling, 0 replies; 51+ messages in thread
From: Tomas M @ 2009-03-12  9:19 UTC (permalink / raw)
  To: hooanon05
  Cc: Goswin von Brederlow, Miklos Szeredi, bs_lists, fuse-devel,
	linux-fsdevel

> Goswin von Brederlow:
>> Only if you want to fully support hardlinks. Do you know of anything
>> that really needs true hardlinks?

A lot of KDE internals depend on real hardlinks ...


Tomas M


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-10  8:44                                 ` hooanon05
@ 2009-03-12  9:22                                   ` Tomas M
  2009-03-12  9:40                                     ` Goswin von Brederlow
  0 siblings, 1 reply; 51+ messages in thread
From: Tomas M @ 2009-03-12  9:22 UTC (permalink / raw)
  To: hooanon05
  Cc: Goswin von Brederlow, Miklos Szeredi, bs_lists, fuse-devel,
	linux-fsdevel

> Goswin von Brederlow:
>> The wrong thing, just 'a'. But the question remains: What software
>> breaks? I can live with this shortcomming.

In my experience, KDE (3.x) always breaks if the filesystem it runs on doesn't support everything like hardlinks, and so on. It does a lot of tricky things with hardlinks.

Tomas M


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [fuse-devel] delta filesystem prototype
  2009-03-12  9:22                                   ` Tomas M
@ 2009-03-12  9:40                                     ` Goswin von Brederlow
  0 siblings, 0 replies; 51+ messages in thread
From: Goswin von Brederlow @ 2009-03-12  9:40 UTC (permalink / raw)
  To: Tomas M
  Cc: hooanon05, Goswin von Brederlow, Miklos Szeredi, bs_lists,
	fuse-devel, linux-fsdevel

Tomas M <tomas@slax.org> writes:

>> Goswin von Brederlow:
>>> The wrong thing, just 'a'. But the question remains: What software
>>> breaks? I can live with this shortcomming.
>
> In my experience, KDE (3.x) always breaks if the filesystem it runs on doesn't support everything like hardlinks, and so on. It does a lot of tricky things with hardlinks.
>
> Tomas M

Works fine in unionfs-fuse without special hardlink fixing tricks.

MfG
        Goswin

^ permalink raw reply	[flat|nested] 51+ messages in thread

end of thread, other threads:[~2009-03-12 10:32 UTC | newest]

Thread overview: 51+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-02-28 14:42 delta filesystem prototype Miklos Szeredi
2009-02-28 17:22 ` [fuse-devel] " Goswin von Brederlow
2009-03-01  0:38   ` Bernd Schubert
2009-03-01 10:17     ` Goswin von Brederlow
2009-03-04 11:21       ` Miklos Szeredi
2009-03-04 14:12         ` Goswin von Brederlow
2009-03-05 13:06           ` Miklos Szeredi
2009-03-05 19:58             ` Goswin von Brederlow
2009-03-06  4:10               ` hooanon05
2009-03-06 12:37                 ` Goswin von Brederlow
2009-03-07  1:16                   ` hooanon05
2009-03-07  9:01                     ` Goswin von Brederlow
2009-03-07  9:12                       ` hooanon05
2009-03-09 12:21                         ` Goswin von Brederlow
2009-03-09 13:35                           ` hooanon05
2009-03-09 14:22                             ` Goswin von Brederlow
2009-03-09 15:25                               ` hooanon05
2009-03-10  8:14                                 ` Goswin von Brederlow
2009-03-09 16:36                               ` Miklos Szeredi
2009-03-06 11:35               ` Miklos Szeredi
2009-03-06 12:50                 ` Goswin von Brederlow
2009-03-06 13:21                   ` Miklos Szeredi
2009-03-07  8:56                     ` Goswin von Brederlow
2009-03-07  1:19                 ` hooanon05
2009-03-07  9:03                   ` Goswin von Brederlow
2009-03-07  9:16                     ` hooanon05
2009-03-09 12:28                       ` Goswin von Brederlow
2009-03-09 13:36                         ` hooanon05
2009-03-09 14:25                           ` Goswin von Brederlow
2009-03-09 15:20                             ` hooanon05
2009-03-10  8:06                               ` Goswin von Brederlow
2009-03-10  8:44                                 ` hooanon05
2009-03-12  9:22                                   ` Tomas M
2009-03-12  9:40                                     ` Goswin von Brederlow
2009-03-12  9:19                           ` Tomas M
2009-03-09 14:13                         ` Nikolaus Rath
2009-03-03  8:31 ` hooanon05
2009-03-03 10:59   ` [fuse-devel] " Goswin von Brederlow
2009-03-03 13:11     ` hooanon05
2009-03-03 15:27       ` Dave Kleikamp
2009-03-03 15:50         ` hooanon05
2009-03-03 15:54           ` Dave Kleikamp
2009-03-03 16:02             ` hooanon05
2009-03-03 16:14               ` Dave Kleikamp
2009-03-03 16:19                 ` hooanon05
2009-03-03 16:46                   ` Dave Kleikamp
2009-03-03 17:13                     ` hooanon05
2009-03-04 11:52                       ` Goswin von Brederlow
2009-03-04 14:10                         ` Dave Kleikamp
2009-03-04 16:23                           ` hooanon05
2009-03-04 11:49       ` Goswin von Brederlow

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.