All of lore.kernel.org
 help / color / mirror / Atom feed
From: "J. R. Okajima" <hooanon05@yahoo.co.jp>
To: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, "J. R. Okajima" <hooanon05@yahoo.co.jp>
Subject: [RFC Aufs2 #2 19/28] aufs inode
Date: Mon, 16 Mar 2009 16:20:31 +0900	[thread overview]
Message-ID: <1237188040-11404-20-git-send-email-hooanon05@yahoo.co.jp> (raw)
In-Reply-To: <1237188040-11404-1-git-send-email-hooanon05@yahoo.co.jp>

initial commit
inode operations and private data

Signed-off-by: J. R. Okajima <hooanon05@yahoo.co.jp>
---
 fs/aufs/i_op.c     |  857 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/aufs/i_op_add.c |  625 +++++++++++++++++++++++++++++++++++
 fs/aufs/i_op_del.c |  471 ++++++++++++++++++++++++++
 fs/aufs/i_op_ren.c |  929 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/aufs/iinfo.c    |  257 +++++++++++++++
 fs/aufs/inode.c    |  356 ++++++++++++++++++++
 fs/aufs/inode.h    |  471 ++++++++++++++++++++++++++
 7 files changed, 3966 insertions(+), 0 deletions(-)
 create mode 100644 fs/aufs/i_op.c
 create mode 100644 fs/aufs/i_op_add.c
 create mode 100644 fs/aufs/i_op_del.c
 create mode 100644 fs/aufs/i_op_ren.c
 create mode 100644 fs/aufs/iinfo.c
 create mode 100644 fs/aufs/inode.c
 create mode 100644 fs/aufs/inode.h

diff --git a/fs/aufs/i_op.c b/fs/aufs/i_op.c
new file mode 100644
index 0000000..e7a7279
--- /dev/null
+++ b/fs/aufs/i_op.c
@@ -0,0 +1,857 @@
+/*
+ * Copyright (C) 2005-2009 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/*
+ * inode operations (except add/del/rename)
+ */
+
+#include <linux/device_cgroup.h>
+#include <linux/fs_stack.h>
+#include <linux/uaccess.h>
+#include "aufs.h"
+
+static int h_permission(struct inode *h_inode, int mask,
+			struct vfsmount *h_mnt, int brperm)
+{
+	int err;
+	const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
+
+	err = -EACCES;
+	if ((write_mask && IS_IMMUTABLE(h_inode))
+	    || ((mask & MAY_EXEC)
+		&& S_ISREG(h_inode->i_mode)
+		&& ((h_mnt->mnt_flags & MNT_NOEXEC)
+		    || !(h_inode->i_mode & S_IXUGO))))
+		goto out;
+
+	/*
+	 * - skip the lower fs test in the case of write to ro branch.
+	 * - nfs dir permission write check is optimized, but a policy for
+	 *   link/rename requires a real check.
+	 */
+	if ((write_mask && !au_br_writable(brperm))
+	    || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
+		&& write_mask && !(mask & MAY_READ))
+	    || !h_inode->i_op->permission) {
+		/* AuLabel(generic_permission); */
+		err = generic_permission(h_inode, mask, NULL);
+	} else {
+		/* AuLabel(h_inode->permission); */
+		err = h_inode->i_op->permission(h_inode, mask);
+		AuTraceErr(err);
+	}
+
+	if (!err)
+		err = devcgroup_inode_permission(h_inode, mask);
+	if (!err)
+		err = security_inode_permission
+			(h_inode, mask & (MAY_READ | MAY_WRITE | MAY_EXEC
+					  | MAY_APPEND));
+
+ out:
+	return err;
+}
+
+static int aufs_permission(struct inode *inode, int mask)
+{
+	int err;
+	aufs_bindex_t bindex, bend;
+	const unsigned char isdir = !!S_ISDIR(inode->i_mode);
+	const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
+	struct inode *h_inode;
+	struct super_block *sb;
+	struct au_branch *br;
+
+	sb = inode->i_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	ii_read_lock_child(inode);
+
+	if (!isdir || write_mask) {
+		h_inode = au_h_iptr(inode, au_ibstart(inode));
+		AuDebugOn(!h_inode
+			  || ((h_inode->i_mode & S_IFMT)
+			      != (inode->i_mode & S_IFMT)));
+		err = 0;
+		bindex = au_ibstart(inode);
+		br = au_sbr(sb, bindex);
+		err = h_permission(h_inode, mask, br->br_mnt, br->br_perm);
+
+		if (write_mask && !err) {
+			/* test whether the upper writable branch exists */
+			err = -EROFS;
+			for (; bindex >= 0; bindex--)
+				if (!au_br_rdonly(au_sbr(sb, bindex))) {
+					err = 0;
+					break;
+				}
+		}
+		goto out;
+	}
+
+	/* non-write to dir */
+	err = 0;
+	bend = au_ibend(inode);
+	for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
+		h_inode = au_h_iptr(inode, bindex);
+		if (h_inode) {
+			AuDebugOn(!S_ISDIR(h_inode->i_mode));
+			br = au_sbr(sb, bindex);
+			err = h_permission(h_inode, mask, br->br_mnt,
+					   br->br_perm);
+		}
+	}
+
+ out:
+	ii_read_unlock(inode);
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
+				  struct nameidata *nd)
+{
+	struct dentry *ret, *parent;
+	struct inode *inode, *h_inode;
+	struct mutex *mtx;
+	struct super_block *sb;
+	int err, npositive;
+	aufs_bindex_t bstart;
+
+	IMustLock(dir);
+
+	sb = dir->i_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+	err = au_alloc_dinfo(dentry);
+	ret = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_read_lock_parent(parent, AuLock_IR);
+	npositive = au_lkup_dentry(dentry, au_dbstart(parent), /*type*/0, nd);
+	di_read_unlock(parent, AuLock_IR);
+	err = npositive;
+	ret = ERR_PTR(err);
+	if (unlikely(err < 0))
+		goto out_unlock;
+
+	inode = NULL;
+	if (npositive) {
+		bstart = au_dbstart(dentry);
+		h_inode = au_h_dptr(dentry, bstart)->d_inode;
+		if (!S_ISDIR(h_inode->i_mode)) {
+			/*
+			 * stop 'race'-ing between hardlinks under different
+			 * parents.
+			 */
+			mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
+			mutex_lock(mtx);
+			inode = au_new_inode(dentry, /*must_new*/0);
+			mutex_unlock(mtx);
+		} else
+			inode = au_new_inode(dentry, /*must_new*/0);
+		ret = (void *)inode;
+	}
+	if (IS_ERR(inode))
+		goto out_unlock;
+
+	ret = d_splice_alias(inode, dentry);
+	if (unlikely(IS_ERR(ret) && inode))
+		ii_write_unlock(inode);
+	au_store_oflag(nd);
+
+ out_unlock:
+	di_write_unlock(dentry);
+ out:
+	si_read_unlock(sb);
+	return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
+			  const unsigned char add_entry, aufs_bindex_t bcpup,
+			  aufs_bindex_t bstart)
+{
+	int err;
+	struct dentry *h_parent;
+	struct inode *h_dir;
+
+	if (add_entry) {
+		au_update_dbstart(dentry);
+		IMustLock(parent->d_inode);
+	} else
+		di_write_lock_parent(parent);
+
+	err = 0;
+	if (!au_h_dptr(parent, bcpup)) {
+		if (bstart < bcpup)
+			err = au_cpdown_dirs(dentry, bcpup);
+		else
+			err = au_cpup_dirs(dentry, bcpup);
+	}
+	if (!err && add_entry) {
+		h_parent = au_h_dptr(parent, bcpup);
+		h_dir = h_parent->d_inode;
+		mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
+		err = au_lkup_neg(dentry, bcpup);
+		/* todo: no unlock here */
+		mutex_unlock(&h_dir->i_mutex);
+		if (bstart < bcpup && au_dbstart(dentry) < 0) {
+			au_set_dbstart(dentry, 0);
+			au_update_dbrange(dentry, /*do_put_zero*/0);
+		}
+	}
+
+	if (!add_entry)
+		di_write_unlock(parent);
+	if (!err)
+		err = bcpup; /* success */
+
+	return err;
+}
+
+/*
+ * decide the branch and the parent dir where we will create a new entry.
+ * returns new bindex or an error.
+ * copyup the parent dir if needed.
+ */
+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
+	      struct au_wr_dir_args *args)
+{
+	int err;
+	aufs_bindex_t bcpup, bstart, src_bstart;
+	const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
+							 ADD_ENTRY);
+	struct super_block *sb;
+	struct dentry *parent;
+	struct au_sbinfo *sbinfo;
+
+	sb = dentry->d_sb;
+	sbinfo = au_sbi(sb);
+	parent = dget_parent(dentry);
+	bstart = au_dbstart(dentry);
+	bcpup = bstart;
+	if (args->force_btgt < 0) {
+		if (src_dentry) {
+			src_bstart = au_dbstart(src_dentry);
+			if (src_bstart < bstart)
+				bcpup = src_bstart;
+		} else if (add_entry) {
+			err = AuWbrCreate(sbinfo, dentry,
+					  au_ftest_wrdir(args->flags, ISDIR));
+			bcpup = err;
+		}
+
+		if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
+			if (add_entry)
+				err = AuWbrCopyup(sbinfo, dentry);
+			else {
+				if (!IS_ROOT(dentry)) {
+					di_read_lock_parent(parent, !AuLock_IR);
+					err = AuWbrCopyup(sbinfo, dentry);
+					di_read_unlock(parent, !AuLock_IR);
+				} else
+					err = AuWbrCopyup(sbinfo, dentry);
+			}
+			bcpup = err;
+			if (unlikely(err < 0))
+				goto out;
+		}
+	} else {
+		bcpup = args->force_btgt;
+		AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
+	}
+	AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
+	if (bstart < bcpup)
+		au_update_dbrange(dentry, /*do_put_zero*/1);
+
+	err = bcpup;
+	if (bcpup == bstart)
+		goto out; /* success */
+
+	/* copyup the new parent into the branch we process */
+	err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
+
+ out:
+	dput(parent);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct dentry *au_pinned_h_parent(struct au_pin *pin)
+{
+	if (pin && pin->parent)
+		return au_h_dptr(pin->parent, pin->bindex);
+	return NULL;
+}
+
+void au_unpin(struct au_pin *p)
+{
+	if (au_ftest_pin(p->flags, MNT_WRITE))
+		mnt_drop_write(p->h_mnt);
+	if (!p->hdir)
+		return;
+
+	au_hin_imtx_unlock(p->hdir);
+	if (!au_ftest_pin(p->flags, DI_LOCKED))
+		di_read_unlock(p->parent, AuLock_IR);
+	iput(p->hdir->hi_inode);
+	dput(p->parent);
+	p->parent = NULL;
+	p->hdir = NULL;
+	p->h_mnt = NULL;
+}
+
+int au_do_pin(struct au_pin *p)
+{
+	int err;
+	struct super_block *sb;
+	struct dentry *h_dentry, *h_parent;
+	struct au_branch *br;
+	struct inode *h_dir;
+
+	err = 0;
+	sb = p->dentry->d_sb;
+	br = au_sbr(sb, p->bindex);
+	if (IS_ROOT(p->dentry)) {
+		if (au_ftest_pin(p->flags, MNT_WRITE)) {
+			p->h_mnt = br->br_mnt;
+			err = mnt_want_write(p->h_mnt);
+			if (unlikely(err)) {
+				au_fclr_pin(p->flags, MNT_WRITE);
+				goto out_err;
+			}
+		}
+		goto out;
+	}
+
+	h_dentry = NULL;
+	if (p->bindex <= au_dbend(p->dentry))
+		h_dentry = au_h_dptr(p->dentry, p->bindex);
+
+	p->parent = dget_parent(p->dentry);
+	if (!au_ftest_pin(p->flags, DI_LOCKED))
+		di_read_lock(p->parent, AuLock_IR, p->lsc_di);
+
+	h_dir = NULL;
+	h_parent = au_h_dptr(p->parent, p->bindex);
+	p->hdir = au_hi(p->parent->d_inode, p->bindex);
+	if (p->hdir)
+		h_dir = p->hdir->hi_inode;
+
+	/* udba case */
+	if (unlikely(!p->hdir || !h_dir)) {
+		err = -EBUSY;
+		if (!au_ftest_pin(p->flags, DI_LOCKED))
+			di_read_unlock(p->parent, AuLock_IR);
+		dput(p->parent);
+		p->parent = NULL;
+		goto out_err;
+	}
+
+	au_igrab(h_dir);
+	au_hin_imtx_lock_nested(p->hdir, p->lsc_hi);
+
+	if (h_dentry) {
+		err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
+		if (unlikely(err)) {
+			au_fclr_pin(p->flags, MNT_WRITE);
+			goto out_unpin;
+		}
+	}
+
+	if (au_ftest_pin(p->flags, MNT_WRITE)) {
+		p->h_mnt = br->br_mnt;
+		err = mnt_want_write(p->h_mnt);
+		if (unlikely(err)) {
+			au_fclr_pin(p->flags, MNT_WRITE);
+			goto out_unpin;
+		}
+	}
+	goto out; /* success */
+
+ out_unpin:
+	au_unpin(p);
+ out_err:
+	AuErr("err %d\n", err);
+	err = -EBUSY;
+ out:
+	return err;
+}
+
+void au_pin_init(struct au_pin *p, struct dentry *dentry,
+		 aufs_bindex_t bindex, int lsc_di, int lsc_hi,
+		 unsigned int udba, unsigned char flags)
+{
+	p->dentry = dentry;
+	p->udba = udba;
+	p->lsc_di = lsc_di;
+	p->lsc_hi = lsc_hi;
+	p->flags = flags;
+	p->bindex = bindex;
+
+	p->parent = NULL;
+	p->hdir = NULL;
+	p->h_mnt = NULL;
+}
+
+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
+	   unsigned int udba, unsigned char flags)
+{
+	au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
+		    udba, flags);
+	return au_do_pin(pin);
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define AuIcpup_DID_CPUP	1
+#define au_ftest_icpup(flags, name)	((flags) & AuIcpup_##name)
+#define au_fset_icpup(flags, name)	{ (flags) |= AuIcpup_##name; }
+#define au_fclr_icpup(flags, name)	{ (flags) &= ~AuIcpup_##name; }
+
+struct au_icpup_args {
+	unsigned char flags;
+	unsigned char pin_flags;
+	aufs_bindex_t btgt;
+	struct au_pin pin;
+	struct path h_path;
+	struct inode *h_inode;
+};
+
+static int au_lock_and_icpup(struct dentry *dentry, struct iattr *ia,
+			     struct au_icpup_args *a)
+{
+	int err;
+	unsigned int udba;
+	loff_t sz;
+	aufs_bindex_t bstart;
+	struct dentry *hi_wh, *parent;
+	struct inode *inode;
+	struct au_wr_dir_args wr_dir_args = {
+		.force_btgt	= -1,
+		.flags		= 0
+	};
+
+	di_write_lock_child(dentry);
+	bstart = au_dbstart(dentry);
+	inode = dentry->d_inode;
+	if (S_ISDIR(inode->i_mode))
+		au_fset_wrdir(wr_dir_args.flags, ISDIR);
+	/* plink or hi_wh() case */
+	if (bstart != au_ibstart(inode))
+		wr_dir_args.force_btgt = au_ibstart(inode);
+	err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
+	if (unlikely(err < 0))
+		goto out_dentry;
+	a->btgt = err;
+	if (err != bstart)
+		au_fset_icpup(a->flags, DID_CPUP);
+
+	err = 0;
+	a->pin_flags = AuPin_MNT_WRITE;
+	parent = NULL;
+	if (!IS_ROOT(dentry)) {
+		au_fset_pin(a->pin_flags, DI_LOCKED);
+		parent = dget_parent(dentry);
+		di_write_lock_parent(parent);
+	}
+
+	udba = au_opt_udba(dentry->d_sb);
+	if (d_unhashed(dentry) || (ia->ia_valid & ATTR_FILE))
+		udba = AuOpt_UDBA_NONE;
+	err = au_pin(&a->pin, dentry, a->btgt, udba, a->pin_flags);
+	if (unlikely(err)) {
+		if (parent) {
+			di_write_unlock(parent);
+			dput(parent);
+		}
+		goto out_dentry;
+	}
+	a->h_path.dentry = au_h_dptr(dentry, bstart);
+	a->h_inode = a->h_path.dentry->d_inode;
+	mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
+	sz = -1;
+	if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
+		sz = ia->ia_size;
+
+	hi_wh = NULL;
+	if (au_ftest_icpup(a->flags, DID_CPUP) && d_unhashed(dentry)) {
+		hi_wh = au_hi_wh(inode, a->btgt);
+		if (!hi_wh) {
+			err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
+			if (unlikely(err))
+				goto out_unlock;
+			hi_wh = au_hi_wh(inode, a->btgt);
+			/* todo: revalidate hi_wh? */
+		}
+	}
+
+	if (parent) {
+		au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
+		di_downgrade_lock(parent, AuLock_IR);
+		dput(parent);
+	}
+	if (!au_ftest_icpup(a->flags, DID_CPUP))
+		goto out; /* success */
+
+	if (!d_unhashed(dentry)) {
+		err = au_sio_cpup_simple(dentry, a->btgt, sz, AuCpup_DTIME);
+		if (!err)
+			a->h_path.dentry = au_h_dptr(dentry, a->btgt);
+	} else if (!hi_wh)
+		a->h_path.dentry = au_h_dptr(dentry, a->btgt);
+	else
+		a->h_path.dentry = hi_wh; /* do not dget here */
+
+ out_unlock:
+	mutex_unlock(&a->h_inode->i_mutex);
+	a->h_inode = a->h_path.dentry->d_inode;
+	if (!err) {
+		mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
+		goto out; /* success */
+	}
+
+	au_unpin(&a->pin);
+
+ out_dentry:
+	di_write_unlock(dentry);
+ out:
+	return err;
+}
+
+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
+{
+	int err;
+	struct inode *inode;
+	struct super_block *sb;
+	struct file *file;
+	struct au_icpup_args *a;
+
+	err = -ENOMEM;
+	a = kzalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	inode = dentry->d_inode;
+	IMustLock(inode);
+	sb = dentry->d_sb;
+	si_read_lock(sb, AuLock_FLUSH);
+
+	file = NULL;
+	if (ia->ia_valid & ATTR_FILE) {
+		/* currently ftruncate(2) only */
+		file = ia->ia_file;
+		fi_write_lock(file);
+		ia->ia_file = au_h_fptr(file, au_fbstart(file));
+	}
+
+	if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+		ia->ia_valid &= ~ATTR_MODE;
+
+	err = au_lock_and_icpup(dentry, ia, a);
+	if (unlikely(err < 0))
+		goto out_si;
+	if (au_ftest_icpup(a->flags, DID_CPUP)) {
+		ia->ia_file = NULL;
+		ia->ia_valid &= ~ATTR_FILE;
+	}
+
+	a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
+	if (ia->ia_valid & ATTR_SIZE) {
+		struct file *f;
+
+		if (ia->ia_size < i_size_read(inode)) {
+			/* unmap only */
+			err = vmtruncate(inode, ia->ia_size);
+			if (unlikely(err))
+				goto out_unlock;
+		}
+
+		f = NULL;
+		if (ia->ia_valid & ATTR_FILE)
+			f = ia->ia_file;
+		mutex_unlock(&a->h_inode->i_mutex);
+		err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
+		mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
+	} else
+		err = vfsub_notify_change(&a->h_path, ia);
+	if (!err)
+		au_cpup_attr_changeable(inode);
+
+ out_unlock:
+	mutex_unlock(&a->h_inode->i_mutex);
+	au_unpin(&a->pin);
+	di_write_unlock(dentry);
+ out_si:
+	if (file) {
+		fi_write_unlock(file);
+		ia->ia_file = file;
+		ia->ia_valid |= ATTR_FILE;
+	}
+	si_read_unlock(sb);
+	kfree(a);
+ out:
+	return err;
+}
+
+static int au_getattr_lock_reval(struct dentry *dentry, unsigned int sigen)
+{
+	int err;
+	struct inode *inode;
+	struct dentry *parent;
+
+	err = 0;
+	inode = dentry->d_inode;
+	di_write_lock_child(dentry);
+	if (au_digen(dentry) != sigen || au_iigen(inode) != sigen) {
+		parent = dget_parent(dentry);
+		di_read_lock_parent(parent, AuLock_IR);
+		/* returns a number of positive dentries */
+		err = au_refresh_hdentry(dentry, inode->i_mode & S_IFMT);
+		if (err > 0)
+			err = au_refresh_hinode(inode, dentry);
+		di_read_unlock(parent, AuLock_IR);
+		dput(parent);
+		if (unlikely(!err))
+			err = -EIO;
+	}
+	di_downgrade_lock(dentry, AuLock_IR);
+
+	return err;
+}
+
+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
+			     unsigned int nlink)
+{
+	inode->i_mode = st->mode;
+	inode->i_uid = st->uid;
+	inode->i_gid = st->gid;
+	inode->i_atime = st->atime;
+	inode->i_mtime = st->mtime;
+	inode->i_ctime = st->ctime;
+
+	au_cpup_attr_nlink(inode, /*force*/0);
+	if (S_ISDIR(inode->i_mode)) {
+		inode->i_nlink -= nlink;
+		inode->i_nlink += st->nlink;
+	}
+
+	spin_lock(&inode->i_lock);
+	inode->i_blocks = st->blocks;
+	i_size_write(inode, st->size);
+	spin_unlock(&inode->i_lock);
+}
+
+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
+			struct dentry *dentry, struct kstat *st)
+{
+	int err;
+	unsigned int mnt_flags;
+	aufs_bindex_t bindex;
+	unsigned char udba_none, positive, did_lock;
+	struct super_block *sb, *h_sb;
+	struct inode *inode;
+	struct vfsmount *h_mnt;
+	struct dentry *h_dentry;
+
+	err = 0;
+	did_lock = 0;
+	sb = dentry->d_sb;
+	inode = dentry->d_inode;
+	si_read_lock(sb, AuLock_FLUSH);
+	if (IS_ROOT(dentry)) {
+		/* lock free root dinfo */
+		h_dentry = dget(au_di(dentry)->di_hdentry->hd_dentry);
+		h_mnt = au_sbr_mnt(sb, 0);
+		goto getattr;
+	}
+
+	did_lock = 1;
+	mnt_flags = au_mntflags(sb);
+	udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
+
+	/* support fstat(2) */
+	if (!d_unhashed(dentry) && !udba_none) {
+		unsigned int sigen = au_sigen(sb);
+		if (au_digen(dentry) == sigen && au_iigen(inode) == sigen)
+			di_read_lock_child(dentry, AuLock_IR);
+		else {
+			err = au_getattr_lock_reval(dentry, sigen);
+			if (unlikely(err))
+				goto out;
+		}
+	} else
+		di_read_lock_child(dentry, AuLock_IR);
+
+	bindex = au_ibstart(inode);
+	h_mnt = au_sbr_mnt(sb, bindex);
+	h_sb = h_mnt->mnt_sb;
+	if (!au_test_fs_bad_iattr(h_sb) && udba_none)
+		goto out_fill; /* success */
+
+	if (au_dbstart(dentry) == bindex)
+		h_dentry = dget(au_h_dptr(dentry, bindex));
+	else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
+		h_dentry = au_plink_lkup(inode, bindex);
+		if (IS_ERR(h_dentry))
+			goto out_fill; /* pretending success */
+	} else
+		/* illegally overlapped or something */
+		goto out_fill; /* pretending success */
+
+ getattr:
+	positive = !!h_dentry->d_inode;
+	if (positive)
+		err = vfs_getattr(h_mnt, h_dentry, st);
+	dput(h_dentry);
+	if (!err) {
+		if (positive)
+			au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
+		goto out_fill; /* success */
+	}
+	goto out;
+
+ out_fill:
+	generic_fillattr(inode, st);
+ out:
+	if (did_lock)
+		di_read_unlock(dentry, AuLock_IR);
+	si_read_unlock(sb);
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
+		      int bufsiz)
+{
+	int err;
+	struct super_block *sb;
+	struct dentry *h_dentry;
+
+	err = -EINVAL;
+	h_dentry = au_h_dptr(dentry, bindex);
+	if (unlikely(/* !h_dentry
+		     || !h_dentry->d_inode
+		     || !h_dentry->d_inode->i_op
+		     || */ !h_dentry->d_inode->i_op->readlink))
+		goto out;
+
+	err = security_inode_readlink(h_dentry);
+	if (unlikely(err))
+		goto out;
+
+	sb = dentry->d_sb;
+	if (!au_test_ro(sb, bindex, dentry->d_inode)) {
+		vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
+		fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
+	}
+	err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
+
+ out:
+	return err;
+}
+
+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+{
+	int err;
+
+	aufs_read_lock(dentry, AuLock_IR);
+	err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
+	aufs_read_unlock(dentry, AuLock_IR);
+
+	return err;
+}
+
+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	int err;
+	char *buf;
+	mm_segment_t old_fs;
+
+	err = -ENOMEM;
+	buf = __getname();
+	if (unlikely(!buf))
+		goto out;
+
+	aufs_read_lock(dentry, AuLock_IR);
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	err = h_readlink(dentry, au_dbstart(dentry), (char __user *)buf,
+			 PATH_MAX);
+	set_fs(old_fs);
+	aufs_read_unlock(dentry, AuLock_IR);
+
+	if (err >= 0) {
+		buf[err] = 0;
+		/* will be freed by put_link */
+		nd_set_link(nd, buf);
+		return NULL; /* success */
+	}
+	__putname(buf);
+
+ out:
+	path_put(&nd->path);
+	AuTraceErr(err);
+	return ERR_PTR(err);
+}
+
+static void aufs_put_link(struct dentry *dentry __maybe_unused,
+			  struct nameidata *nd, void *cookie __maybe_unused)
+{
+	__putname(nd_get_link(nd));
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void aufs_truncate_range(struct inode *inode __maybe_unused,
+				loff_t start __maybe_unused,
+				loff_t end __maybe_unused)
+{
+	AuUnsupport();
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct inode_operations aufs_symlink_iop = {
+	.permission	= aufs_permission,
+	.setattr	= aufs_setattr,
+	.getattr	= aufs_getattr,
+	.readlink	= aufs_readlink,
+	.follow_link	= aufs_follow_link,
+	.put_link	= aufs_put_link
+};
+
+struct inode_operations aufs_dir_iop = {
+	.create		= aufs_create,
+	.lookup		= aufs_lookup,
+	.link		= aufs_link,
+	.unlink		= aufs_unlink,
+	.symlink	= aufs_symlink,
+	.mkdir		= aufs_mkdir,
+	.rmdir		= aufs_rmdir,
+	.mknod		= aufs_mknod,
+	.rename		= aufs_rename,
+
+	.permission	= aufs_permission,
+	.setattr	= aufs_setattr,
+	.getattr	= aufs_getattr
+};
+
+struct inode_operations aufs_iop = {
+	.permission	= aufs_permission,
+	.setattr	= aufs_setattr,
+	.getattr	= aufs_getattr,
+	.truncate_range	= aufs_truncate_range
+};
diff --git a/fs/aufs/i_op_add.c b/fs/aufs/i_op_add.c
new file mode 100644
index 0000000..516120d
--- /dev/null
+++ b/fs/aufs/i_op_add.c
@@ -0,0 +1,625 @@
+/*
+ * Copyright (C) 2005-2009 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/*
+ * inode operations (add entry)
+ */
+
+#include "aufs.h"
+
+/*
+ * final procedure of adding a new entry, except link(2).
+ * remove whiteout, instantiate, copyup the parent dir's times and size
+ * and update version.
+ * if it failed, re-create the removed whiteout.
+ */
+static int epilog(struct inode *dir, aufs_bindex_t bindex,
+		  struct dentry *wh_dentry, struct dentry *dentry)
+{
+	int err, rerr;
+	aufs_bindex_t bwh;
+	struct path h_path;
+	struct inode *inode, *h_dir;
+	struct dentry *wh;
+
+	bwh = -1;
+	if (wh_dentry) {
+		h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
+		IMustLock(h_dir);
+		AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
+		bwh = au_dbwh(dentry);
+		h_path.dentry = wh_dentry;
+		h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
+		err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
+					  dentry);
+		if (unlikely(err))
+			goto out;
+	}
+
+	inode = au_new_inode(dentry, /*must_new*/1);
+	if (!IS_ERR(inode)) {
+		d_instantiate(dentry, inode);
+		dir = dentry->d_parent->d_inode; /* dir inode is locked */
+		IMustLock(dir);
+		if (au_ibstart(dir) == au_dbstart(dentry))
+			au_cpup_attr_timesizes(dir);
+		dir->i_version++;
+		return 0; /* success */
+	}
+
+	err = PTR_ERR(inode);
+	if (!wh_dentry)
+		goto out;
+
+	/* revert */
+	/* dir inode is locked */
+	wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
+	rerr = PTR_ERR(wh);
+	if (IS_ERR(wh)) {
+		AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
+			AuDLNPair(dentry), err, rerr);
+		err = -EIO;
+	} else
+		dput(wh);
+
+ out:
+	return err;
+}
+
+/*
+ * simple tests for the adding inode operations.
+ * following the checks in vfs, plus the parent-child relationship.
+ */
+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir)
+{
+	int err;
+	umode_t h_mode;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+
+	h_dentry = au_h_dptr(dentry, bindex);
+	h_inode = h_dentry->d_inode;
+	if (!dentry->d_inode) {
+		err = -EEXIST;
+		if (unlikely(h_inode))
+			goto out;
+	} else {
+		/* rename(2) case */
+		err = -EIO;
+		if (unlikely(!h_inode || !h_inode->i_nlink))
+			goto out;
+
+		h_mode = h_inode->i_mode;
+		if (!isdir) {
+			err = -EISDIR;
+			if (unlikely(S_ISDIR(h_mode)))
+				goto out;
+		} else if (unlikely(!S_ISDIR(h_mode))) {
+			err = -ENOTDIR;
+			goto out;
+		}
+	}
+
+	err = -EIO;
+	/* expected parent dir is locked */
+	if (unlikely(h_parent != h_dentry->d_parent))
+		goto out;
+	err = 0;
+
+ out:
+	return err;
+}
+
+/*
+ * initial procedure of adding a new entry.
+ * prepare writable branch and the parent dir, lock it,
+ * and lookup whiteout for the new entry.
+ */
+static struct dentry*
+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
+		  struct dentry *src_dentry, struct au_pin *pin,
+		  struct au_wr_dir_args *wr_dir_args)
+{
+	struct dentry *wh_dentry, *h_parent;
+	struct super_block *sb;
+	struct au_branch *br;
+	int err;
+	unsigned int udba;
+	aufs_bindex_t bcpup;
+
+	err = au_wr_dir(dentry, src_dentry, wr_dir_args);
+	bcpup = err;
+	wh_dentry = ERR_PTR(err);
+	if (unlikely(err < 0))
+		goto out;
+
+	sb = dentry->d_sb;
+	udba = au_opt_udba(sb);
+	err = au_pin(pin, dentry, bcpup, udba,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	wh_dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	h_parent = au_pinned_h_parent(pin);
+	if (udba != AuOpt_UDBA_NONE
+	    && au_dbstart(dentry) == bcpup) {
+		err = au_may_add(dentry, bcpup, h_parent,
+				 au_ftest_wrdir(wr_dir_args->flags, ISDIR));
+		wh_dentry = ERR_PTR(err);
+		if (unlikely(err))
+			goto out_unpin;
+	}
+
+	br = au_sbr(sb, bcpup);
+	if (dt) {
+		struct path tmp = {
+			.dentry	= h_parent,
+			.mnt	= br->br_mnt
+		};
+		au_dtime_store(dt, au_pinned_parent(pin), &tmp);
+	}
+
+	wh_dentry = NULL;
+	if (bcpup != au_dbwh(dentry))
+		goto out; /* success */
+
+	wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
+
+ out_unpin:
+	if (IS_ERR(wh_dentry))
+		au_unpin(pin);
+ out:
+	return wh_dentry;
+}
+
+/* ---------------------------------------------------------------------- */
+
+enum { Mknod, Symlink, Creat };
+struct simple_arg {
+	int type;
+	union {
+		struct {
+			int mode;
+			struct nameidata *nd;
+		} c;
+		struct {
+			const char *symname;
+		} s;
+		struct {
+			int mode;
+			dev_t dev;
+		} m;
+	} u;
+};
+
+static int add_simple(struct inode *dir, struct dentry *dentry,
+		      struct simple_arg *arg)
+{
+	int err;
+	aufs_bindex_t bstart;
+	unsigned char created;
+	struct au_dtime dt;
+	struct au_pin pin;
+	struct path h_path;
+	struct dentry *wh_dentry, *parent;
+	struct inode *h_dir;
+	struct au_wr_dir_args wr_dir_args = {
+		.force_btgt	= -1,
+		.flags		= AuWrDir_ADD_ENTRY
+	};
+
+	IMustLock(dir);
+
+	parent = dentry->d_parent; /* dir inode is locked */
+	aufs_read_lock(dentry, AuLock_DW);
+	di_write_lock_parent(parent);
+	wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
+				      &wr_dir_args);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out;
+
+	bstart = au_dbstart(dentry);
+	h_path.dentry = au_h_dptr(dentry, bstart);
+	h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
+	h_dir = au_pinned_h_dir(&pin);
+	switch (arg->type) {
+	case Creat:
+		err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
+		break;
+	case Symlink:
+		err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
+		break;
+	case Mknod:
+		err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
+		break;
+	default:
+		BUG();
+	}
+	created = !err;
+	if (!err)
+		err = epilog(dir, bstart, wh_dentry, dentry);
+
+	/* revert */
+	if (unlikely(created && err && h_path.dentry->d_inode)) {
+		int rerr;
+		rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
+		if (rerr) {
+			AuIOErr("%.*s revert failure(%d, %d)\n",
+				AuDLNPair(dentry), err, rerr);
+			err = -EIO;
+		}
+		au_dtime_revert(&dt);
+		d_drop(dentry);
+	}
+
+	au_unpin(&pin);
+	dput(wh_dentry);
+
+ out:
+	if (unlikely(err)) {
+		au_update_dbstart(dentry);
+		d_drop(dentry);
+	}
+	di_write_unlock(parent);
+	aufs_read_unlock(dentry, AuLock_DW);
+	return err;
+}
+
+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+	struct simple_arg arg = {
+		.type = Mknod,
+		.u.m = {
+			.mode	= mode,
+			.dev	= dev
+		}
+	};
+	return add_simple(dir, dentry, &arg);
+}
+
+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+	struct simple_arg arg = {
+		.type = Symlink,
+		.u.s.symname = symname
+	};
+	return add_simple(dir, dentry, &arg);
+}
+
+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
+		struct nameidata *nd)
+{
+	struct simple_arg arg = {
+		.type = Creat,
+		.u.c = {
+			.mode	= mode,
+			.nd	= nd
+		}
+	};
+	return add_simple(dir, dentry, &arg);
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_link_args {
+	aufs_bindex_t bdst, bsrc;
+	struct au_pin pin;
+	struct path h_path;
+	struct dentry *src_parent, *parent;
+};
+
+static int au_cpup_before_link(struct dentry *src_dentry,
+			       struct au_link_args *a)
+{
+	int err;
+	struct dentry *h_src_dentry;
+	struct mutex *h_mtx;
+
+	di_read_lock_parent(a->src_parent, AuLock_IR);
+	err = au_test_and_cpup_dirs(src_dentry, a->bdst);
+	if (unlikely(err))
+		goto out;
+
+	h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
+	h_mtx = &h_src_dentry->d_inode->i_mutex;
+	err = au_pin(&a->pin, src_dentry, a->bdst,
+		     au_opt_udba(src_dentry->d_sb),
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	if (unlikely(err))
+		goto out;
+	mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
+	err = au_sio_cpup_simple(src_dentry, a->bdst, -1,
+				 AuCpup_DTIME /* | AuCpup_KEEPLINO */);
+	mutex_unlock(h_mtx);
+	au_unpin(&a->pin);
+
+ out:
+	di_read_unlock(a->src_parent, AuLock_IR);
+	return err;
+}
+
+static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
+{
+	int err;
+	unsigned char plink;
+	struct inode *h_inode, *inode;
+	struct dentry *h_src_dentry;
+	struct super_block *sb;
+
+	plink = 0;
+	h_inode = NULL;
+	sb = src_dentry->d_sb;
+	inode = src_dentry->d_inode;
+	if (au_ibstart(inode) <= a->bdst)
+		h_inode = au_h_iptr(inode, a->bdst);
+	if (!h_inode || !h_inode->i_nlink) {
+		/* copyup src_dentry as the name of dentry. */
+		au_set_dbstart(src_dentry, a->bdst);
+		au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
+		h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
+		mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
+		err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc, -1,
+					 AuCpup_KEEPLINO, a->parent);
+		mutex_unlock(&h_inode->i_mutex);
+		au_set_h_dptr(src_dentry, a->bdst, NULL);
+		au_set_dbstart(src_dentry, a->bsrc);
+	} else {
+		/* the inode of src_dentry already exists on a.bdst branch */
+		h_src_dentry = d_find_alias(h_inode);
+		if (!h_src_dentry && au_plink_test(inode)) {
+			plink = 1;
+			h_src_dentry = au_plink_lkup(inode, a->bdst);
+			err = PTR_ERR(h_src_dentry);
+			if (IS_ERR(h_src_dentry))
+				goto out;
+
+			if (unlikely(!h_src_dentry->d_inode)) {
+				dput(h_src_dentry);
+				h_src_dentry = NULL;
+			}
+
+		}
+		if (h_src_dentry) {
+			err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
+					 &a->h_path);
+			dput(h_src_dentry);
+		} else {
+			AuIOErr("no dentry found for hi%lu on b%d\n",
+				h_inode->i_ino, a->bdst);
+			err = -EIO;
+		}
+	}
+
+	if (!err && !plink)
+		au_plink_append(inode, a->bdst, a->h_path.dentry);
+
+out:
+	return err;
+}
+
+int aufs_link(struct dentry *src_dentry, struct inode *dir,
+	      struct dentry *dentry)
+{
+	int err, rerr;
+	struct au_dtime dt;
+	struct au_link_args *a;
+	struct dentry *wh_dentry, *h_src_dentry;
+	struct inode *inode;
+	struct super_block *sb;
+	struct au_wr_dir_args wr_dir_args = {
+		/* .force_btgt	= -1, */
+		.flags		= AuWrDir_ADD_ENTRY
+	};
+
+	IMustLock(dir);
+	inode = src_dentry->d_inode;
+	IMustLock(inode);
+
+	err = -ENOMEM;
+	a = kzalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	a->parent = dentry->d_parent; /* dir inode is locked */
+	aufs_read_and_write_lock2(dentry, src_dentry, /*AuLock_FLUSH*/0);
+	a->src_parent = dget_parent(src_dentry);
+	wr_dir_args.force_btgt = au_dbstart(src_dentry);
+
+	di_write_lock_parent(a->parent);
+	wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
+	wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
+				      &wr_dir_args);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_unlock;
+
+	err = 0;
+	sb = dentry->d_sb;
+	a->bdst = au_dbstart(dentry);
+	a->h_path.dentry = au_h_dptr(dentry, a->bdst);
+	a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
+	a->bsrc = au_dbstart(src_dentry);
+	if (au_opt_test(au_mntflags(sb), PLINK)) {
+		if (a->bdst < a->bsrc
+		    /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
+			err = au_cpup_or_link(src_dentry, a);
+		else {
+			h_src_dentry = au_h_dptr(src_dentry, a->bdst);
+			err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
+					 &a->h_path);
+		}
+	} else {
+		/*
+		 * copyup src_dentry to the branch we process,
+		 * and then link(2) to it.
+		 */
+		if (a->bdst < a->bsrc
+		    /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
+			au_unpin(&a->pin);
+			di_write_unlock(a->parent);
+			err = au_cpup_before_link(src_dentry, a);
+			if (!err) {
+				di_write_lock_parent(a->parent);
+				err = au_pin(&a->pin, dentry, a->bdst,
+					     au_opt_udba(sb),
+					     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+				if (unlikely(err))
+					goto out_wh;
+			}
+		}
+		if (!err) {
+			h_src_dentry = au_h_dptr(src_dentry, a->bdst);
+			err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
+					 &a->h_path);
+		}
+	}
+	if (unlikely(err))
+		goto out_unpin;
+
+	if (wh_dentry) {
+		a->h_path.dentry = wh_dentry;
+		err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
+					  dentry);
+		if (unlikely(err))
+			goto out_revert;
+	}
+
+	dir->i_version++;
+	if (au_ibstart(dir) == au_dbstart(dentry))
+		au_cpup_attr_timesizes(dir);
+	inc_nlink(inode);
+	inode->i_ctime = dir->i_ctime;
+	if (!d_unhashed(a->h_path.dentry))
+		d_instantiate(dentry, au_igrab(inode));
+	else
+		/* some filesystem calls d_drop() */
+		d_drop(dentry);
+	goto out_unpin; /* success */
+
+ out_revert:
+	rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
+	if (!rerr)
+		goto out_dt;
+	AuIOErr("%.*s reverting failed(%d, %d)\n",
+		AuDLNPair(dentry), err, rerr);
+	err = -EIO;
+ out_dt:
+	d_drop(dentry);
+	au_dtime_revert(&dt);
+ out_unpin:
+	au_unpin(&a->pin);
+ out_wh:
+	dput(wh_dentry);
+ out_unlock:
+	if (unlikely(err)) {
+		au_update_dbstart(dentry);
+		d_drop(dentry);
+	}
+	di_write_unlock(a->parent);
+	dput(a->src_parent);
+	aufs_read_and_write_unlock2(dentry, src_dentry);
+	kfree(a);
+ out:
+	return err;
+}
+
+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	int err, rerr;
+	aufs_bindex_t bindex;
+	unsigned char diropq;
+	struct au_pin pin;
+	struct path h_path;
+	struct dentry *wh_dentry, *parent, *opq_dentry;
+	struct mutex *h_mtx;
+	struct super_block *sb;
+	struct au_dtime dt;
+	struct au_wr_dir_args wr_dir_args = {
+		.force_btgt	= -1,
+		.flags		= AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
+	};
+
+	IMustLock(dir);
+
+	aufs_read_lock(dentry, AuLock_DW);
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_write_lock_parent(parent);
+	wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
+				      &wr_dir_args);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out;
+
+	sb = dentry->d_sb;
+	bindex = au_dbstart(dentry);
+	h_path.dentry = au_h_dptr(dentry, bindex);
+	h_path.mnt = au_sbr_mnt(sb, bindex);
+	err = vfsub_mkdir(au_pinned_h_dir(&pin), &h_path, mode);
+	if (unlikely(err))
+		goto out_unlock;
+
+	/* make the dir opaque */
+	diropq = 0;
+	h_mtx = &h_path.dentry->d_inode->i_mutex;
+	if (wh_dentry
+	    || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
+		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
+		opq_dentry = au_diropq_create(dentry, bindex);
+		mutex_unlock(h_mtx);
+		err = PTR_ERR(opq_dentry);
+		if (IS_ERR(opq_dentry))
+			goto out_dir;
+		dput(opq_dentry);
+		diropq = 1;
+	}
+
+	err = epilog(dir, bindex, wh_dentry, dentry);
+	if (!err) {
+		inc_nlink(dir);
+		goto out_unlock; /* success */
+	}
+
+	/* revert */
+	if (diropq) {
+		AuLabel(revert opq);
+		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
+		rerr = au_diropq_remove(dentry, bindex);
+		mutex_unlock(h_mtx);
+		if (rerr) {
+			AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
+				AuDLNPair(dentry), err, rerr);
+			err = -EIO;
+		}
+	}
+
+ out_dir:
+	AuLabel(revert dir);
+	rerr = vfsub_rmdir(au_pinned_h_dir(&pin), &h_path);
+	if (rerr) {
+		AuIOErr("%.*s reverting dir failed(%d, %d)\n",
+			AuDLNPair(dentry), err, rerr);
+		err = -EIO;
+	}
+	d_drop(dentry);
+	au_dtime_revert(&dt);
+ out_unlock:
+	au_unpin(&pin);
+	dput(wh_dentry);
+ out:
+	if (unlikely(err)) {
+		au_update_dbstart(dentry);
+		d_drop(dentry);
+	}
+	di_write_unlock(parent);
+	aufs_read_unlock(dentry, AuLock_DW);
+	return err;
+}
diff --git a/fs/aufs/i_op_del.c b/fs/aufs/i_op_del.c
new file mode 100644
index 0000000..681edb4
--- /dev/null
+++ b/fs/aufs/i_op_del.c
@@ -0,0 +1,471 @@
+/*
+ * Copyright (C) 2005-2009 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/*
+ * inode operations (del entry)
+ */
+
+#include "aufs.h"
+
+/*
+ * decide if a new whiteout for @dentry is necessary or not.
+ * when it is necessary, prepare the parent dir for the upper branch whose
+ * branch index is @bcpup for creation. the actual creation of the whiteout will
+ * be done by caller.
+ * return value:
+ * 0: wh is unnecessary
+ * plus: wh is necessary
+ * minus: error
+ */
+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
+{
+	int need_wh, err;
+	aufs_bindex_t bstart;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	bstart = au_dbstart(dentry);
+	if (*bcpup < 0) {
+		*bcpup = bstart;
+		if (au_test_ro(sb, bstart, dentry->d_inode)) {
+			err = AuWbrCopyup(au_sbi(sb), dentry);
+			*bcpup = err;
+			if (unlikely(err < 0))
+				goto out;
+		}
+	} else
+		AuDebugOn(bstart < *bcpup
+			  || au_test_ro(sb, *bcpup, dentry->d_inode));
+	AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
+
+	if (*bcpup != bstart) {
+		err = au_cpup_dirs(dentry, *bcpup);
+		if (unlikely(err))
+			goto out;
+		need_wh = 1;
+	} else {
+		aufs_bindex_t old_bend, new_bend, bdiropq = -1;
+
+		old_bend = au_dbend(dentry);
+		if (isdir) {
+			bdiropq = au_dbdiropq(dentry);
+			au_set_dbdiropq(dentry, -1);
+		}
+		need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
+					 /*nd*/NULL);
+		err = need_wh;
+		if (isdir)
+			au_set_dbdiropq(dentry, bdiropq);
+		if (unlikely(err < 0))
+			goto out;
+		new_bend = au_dbend(dentry);
+		if (!need_wh && old_bend != new_bend) {
+			au_set_h_dptr(dentry, new_bend, NULL);
+			au_set_dbend(dentry, old_bend);
+		}
+	}
+	AuDbg("need_wh %d\n", need_wh);
+	err = need_wh;
+
+ out:
+	return err;
+}
+
+/*
+ * simple tests for the del-entry operations.
+ * following the checks in vfs, plus the parent-child relationship.
+ */
+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir)
+{
+	int err;
+	umode_t h_mode;
+	struct dentry *h_dentry, *h_latest;
+	struct inode *h_inode;
+
+	h_dentry = au_h_dptr(dentry, bindex);
+	h_inode = h_dentry->d_inode;
+	if (dentry->d_inode) {
+		err = -ENOENT;
+		if (unlikely(!h_inode || !h_inode->i_nlink))
+			goto out;
+
+		h_mode = h_inode->i_mode;
+		if (!isdir) {
+			err = -EISDIR;
+			if (unlikely(S_ISDIR(h_mode)))
+				goto out;
+		} else if (unlikely(!S_ISDIR(h_mode))) {
+			err = -ENOTDIR;
+			goto out;
+		}
+	} else {
+		/* rename(2) case */
+		err = -EIO;
+		if (unlikely(h_inode))
+			goto out;
+	}
+
+	err = -ENOENT;
+	/* expected parent dir is locked */
+	if (unlikely(h_parent != h_dentry->d_parent))
+		goto out;
+	err = 0;
+
+	/*
+	 * rmdir a dir may break the consistency on some filesystem.
+	 * let's try heavy test.
+	 */
+	err = -EACCES;
+	if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
+		goto out;
+
+	h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
+				   au_sbr(dentry->d_sb, bindex));
+	err = -EIO;
+	if (IS_ERR(h_latest))
+		goto out;
+	if (h_latest == h_dentry)
+		err = 0;
+	dput(h_latest);
+
+ out:
+	return err;
+}
+
+/*
+ * decide the branch where we operate for @dentry. the branch index will be set
+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
+ * dir for reverting.
+ * when a new whiteout is necessary, create it.
+ */
+static struct dentry*
+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
+		    struct au_dtime *dt, struct au_pin *pin)
+{
+	struct dentry *wh_dentry;
+	struct super_block *sb;
+	struct path h_path;
+	int err, need_wh;
+	unsigned int udba;
+	aufs_bindex_t bcpup;
+
+	need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
+	wh_dentry = ERR_PTR(need_wh);
+	if (unlikely(need_wh < 0))
+		goto out;
+
+	sb = dentry->d_sb;
+	udba = au_opt_udba(sb);
+	bcpup = *rbcpup;
+	err = au_pin(pin, dentry, bcpup, udba,
+		     AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+	wh_dentry = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+
+	h_path.dentry = au_pinned_h_parent(pin);
+	if (udba != AuOpt_UDBA_NONE
+	    && au_dbstart(dentry) == bcpup) {
+		err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
+		wh_dentry = ERR_PTR(err);
+		if (unlikely(err))
+			goto out_unpin;
+	}
+
+	h_path.mnt = au_sbr_mnt(sb, bcpup);
+	au_dtime_store(dt, au_pinned_parent(pin), &h_path);
+	wh_dentry = NULL;
+	if (!need_wh)
+		goto out; /* success, no need to create whiteout */
+
+	wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
+	if (!IS_ERR(wh_dentry))
+		goto out; /* success */
+	/* returns with the parent is locked and wh_dentry is dget-ed */
+
+ out_unpin:
+	au_unpin(pin);
+ out:
+	return wh_dentry;
+}
+
+/*
+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
+ * in order to be revertible and save time for removing many child whiteouts
+ * under the dir.
+ * returns 1 when there are too many child whiteout and caller should remove
+ * them asynchronously. returns 0 when the number of children is enough small to
+ * remove now or the branch fs is a remote fs.
+ * otherwise return an error.
+ */
+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
+			   struct au_nhash *whlist, struct inode *dir)
+{
+	int rmdir_later, err, dirwh;
+	struct dentry *h_dentry;
+	struct super_block *sb;
+
+	sb = dentry->d_sb;
+	h_dentry = au_h_dptr(dentry, bindex);
+	err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
+	if (unlikely(err))
+		goto out;
+
+	/* stop monitoring */
+	au_hin_free(au_hi(dentry->d_inode, bindex));
+
+	if (!au_test_fs_remote(h_dentry->d_sb)) {
+		dirwh = au_sbi(sb)->si_dirwh;
+		rmdir_later = (dirwh <= 1);
+		if (!rmdir_later)
+			rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
+							      dirwh);
+		if (rmdir_later)
+			return rmdir_later;
+	}
+
+	err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
+	if (unlikely(err)) {
+		AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
+			AuDLNPair(h_dentry), bindex, err);
+		err = 0;
+	}
+
+ out:
+	return err;
+}
+
+/*
+ * final procedure for deleting a entry.
+ * maintain dentry and iattr.
+ */
+static void epilog(struct inode *dir, struct dentry *dentry,
+		   aufs_bindex_t bindex)
+{
+	struct inode *inode;
+
+	/*
+	 * even if this is not a dir,
+	 * set S_DEAD here since we need to detect the dead inode.
+	 */
+	inode = dentry->d_inode;
+	if (!inode->i_nlink)
+		inode->i_flags |= S_DEAD;
+	d_drop(dentry);
+	inode->i_ctime = dir->i_ctime;
+
+	if (atomic_read(&dentry->d_count) == 1) {
+		au_set_h_dptr(dentry, au_dbstart(dentry), NULL);
+		au_update_dbstart(dentry);
+	}
+	if (au_ibstart(dir) == bindex)
+		au_cpup_attr_timesizes(dir);
+	dir->i_version++;
+}
+
+/*
+ * when an error happened, remove the created whiteout and revert everything.
+ */
+static int do_revert(int err, struct inode *dir, aufs_bindex_t bwh,
+		     struct dentry *wh_dentry, struct dentry *dentry,
+		     struct au_dtime *dt)
+{
+	int rerr;
+	struct path h_path = {
+		.dentry	= wh_dentry,
+		.mnt	= au_sbr_mnt(dir->i_sb, bwh)
+	};
+
+	rerr = au_wh_unlink_dentry(au_h_iptr(dir, bwh), &h_path, dentry);
+	if (!rerr) {
+		au_set_dbwh(dentry, bwh);
+		au_dtime_revert(dt);
+		return 0;
+	}
+
+	AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
+		AuDLNPair(dentry), err, rerr);
+	return -EIO;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int aufs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int err;
+	aufs_bindex_t bwh, bindex, bstart;
+	struct au_dtime dt;
+	struct au_pin pin;
+	struct path h_path;
+	struct inode *inode, *h_dir;
+	struct dentry *parent, *wh_dentry;
+
+	IMustLock(dir);
+	inode = dentry->d_inode;
+	if (unlikely(!inode))
+		return -ENOENT; /* possible? */
+	IMustLock(inode);
+
+	aufs_read_lock(dentry, AuLock_DW);
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_write_lock_parent(parent);
+
+	bstart = au_dbstart(dentry);
+	bwh = au_dbwh(dentry);
+	bindex = -1;
+	wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out;
+
+	h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
+	h_path.dentry = au_h_dptr(dentry, bstart);
+	dget(h_path.dentry);
+	if (bindex == bstart) {
+		h_dir = au_pinned_h_dir(&pin);
+		err = vfsub_unlink(h_dir, &h_path, /*force*/0);
+	} else {
+		/* dir inode is locked */
+		h_dir = wh_dentry->d_parent->d_inode;
+		IMustLock(h_dir);
+		err = 0;
+	}
+
+	if (!err) {
+		drop_nlink(inode);
+		epilog(dir, dentry, bindex);
+
+		/* update target timestamps */
+		if (bindex == bstart) {
+			vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
+			inode->i_ctime = h_path.dentry->d_inode->i_ctime;
+		} else
+			/* todo: this timestamp may be reverted later */
+			inode->i_ctime = h_dir->i_ctime;
+		goto out_unlock; /* success */
+	}
+
+	/* revert */
+	if (wh_dentry) {
+		int rerr;
+
+		rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt);
+		if (rerr)
+			err = rerr;
+	}
+
+ out_unlock:
+	au_unpin(&pin);
+	dput(wh_dentry);
+	dput(h_path.dentry);
+ out:
+	di_write_unlock(parent);
+	aufs_read_unlock(dentry, AuLock_DW);
+	return err;
+}
+
+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int err, rmdir_later;
+	aufs_bindex_t bwh, bindex, bstart;
+	struct au_dtime dt;
+	struct au_pin pin;
+	struct inode *inode;
+	struct dentry *parent, *wh_dentry, *h_dentry;
+	struct au_whtmp_rmdir_args *args;
+	struct au_nhash *whlist;
+
+	IMustLock(dir);
+	inode = dentry->d_inode;
+	err = -ENOENT; /* possible? */
+	if (unlikely(!inode))
+		goto out;
+	IMustLock(inode);
+
+	whlist = au_nhash_new(GFP_NOFS);
+	err = PTR_ERR(whlist);
+	if (IS_ERR(whlist))
+		goto out;
+
+	err = -ENOMEM;
+	args = kmalloc(sizeof(*args), GFP_NOFS);
+	if (unlikely(!args))
+		goto out_whlist;
+
+	aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH);
+	parent = dentry->d_parent; /* dir inode is locked */
+	di_write_lock_parent(parent);
+	err = au_test_empty(dentry, whlist);
+	if (unlikely(err))
+		goto out_args;
+
+	bstart = au_dbstart(dentry);
+	bwh = au_dbwh(dentry);
+	bindex = -1;
+	wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
+	err = PTR_ERR(wh_dentry);
+	if (IS_ERR(wh_dentry))
+		goto out_args;
+
+	h_dentry = au_h_dptr(dentry, bstart);
+	dget(h_dentry);
+	rmdir_later = 0;
+	if (bindex == bstart) {
+		err = renwh_and_rmdir(dentry, bstart, whlist, dir);
+		if (err > 0) {
+			rmdir_later = err;
+			err = 0;
+		}
+	} else {
+		/* stop monitoring */
+		au_hin_free(au_hi(inode, bstart));
+
+		/* dir inode is locked */
+		IMustLock(wh_dentry->d_parent->d_inode);
+		err = 0;
+	}
+
+	if (!err) {
+		clear_nlink(inode);
+		au_set_dbdiropq(dentry, -1);
+		epilog(dir, dentry, bindex);
+
+		if (rmdir_later) {
+			au_whtmp_kick_rmdir(dir, bstart, h_dentry, whlist,
+					    args);
+			args = NULL;
+		}
+
+		goto out_unlock; /* success */
+	}
+
+	/* revert */
+	AuLabel(revert);
+	if (wh_dentry) {
+		int rerr;
+
+		rerr = do_revert(err, dir, bwh, wh_dentry, dentry, &dt);
+		if (rerr)
+			err = rerr;
+	}
+
+ out_unlock:
+	au_unpin(&pin);
+	dput(wh_dentry);
+	dput(h_dentry);
+ out_args:
+	di_write_unlock(parent);
+	aufs_read_unlock(dentry, AuLock_DW);
+	kfree(args);
+ out_whlist:
+	au_nhash_del(whlist);
+ out:
+	return err;
+}
diff --git a/fs/aufs/i_op_ren.c b/fs/aufs/i_op_ren.c
new file mode 100644
index 0000000..e0dd048
--- /dev/null
+++ b/fs/aufs/i_op_ren.c
@@ -0,0 +1,929 @@
+/*
+ * Copyright (C) 2005-2009 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/*
+ * inode operation (rename entry)
+ * todo: this is crazy monster
+ */
+
+#include "aufs.h"
+
+enum { AuSRC, AuDST, AuSrcDst };
+enum { AuPARENT, AuCHILD, AuParentChild };
+
+#define AuRen_ISDIR	1
+#define AuRen_ISSAMEDIR	(1 << 1)
+#define AuRen_WHSRC	(1 << 2)
+#define AuRen_WHDST	(1 << 3)
+#define AuRen_MNT_WRITE	(1 << 4)
+#define AuRen_DT_DSTDIR	(1 << 5)
+#define AuRen_DIROPQ	(1 << 6)
+#define AuRen_CPUP	(1 << 7)
+#define au_ftest_ren(flags, name)	((flags) & AuRen_##name)
+#define au_fset_ren(flags, name)	{ (flags) |= AuRen_##name; }
+#define au_fclr_ren(flags, name)	{ (flags) &= ~AuRen_##name; }
+
+struct au_ren_args {
+	struct {
+		struct dentry *dentry, *h_dentry, *parent, *h_parent,
+			*wh_dentry;
+		struct inode *dir, *inode;
+		struct au_hinode *hdir;
+		struct au_dtime dt[AuParentChild];
+		aufs_bindex_t bstart;
+	} sd[AuSrcDst];
+
+#define src_dentry	sd[AuSRC].dentry
+#define src_dir		sd[AuSRC].dir
+#define src_inode	sd[AuSRC].inode
+#define src_h_dentry	sd[AuSRC].h_dentry
+#define src_parent	sd[AuSRC].parent
+#define src_h_parent	sd[AuSRC].h_parent
+#define src_wh_dentry	sd[AuSRC].wh_dentry
+#define src_hdir	sd[AuSRC].hdir
+#define src_h_dir	sd[AuSRC].hdir->hi_inode
+#define src_dt		sd[AuSRC].dt
+#define src_bstart	sd[AuSRC].bstart
+
+#define dst_dentry	sd[AuDST].dentry
+#define dst_dir		sd[AuDST].dir
+#define dst_inode	sd[AuDST].inode
+#define dst_h_dentry	sd[AuDST].h_dentry
+#define dst_parent	sd[AuDST].parent
+#define dst_h_parent	sd[AuDST].h_parent
+#define dst_wh_dentry	sd[AuDST].wh_dentry
+#define dst_hdir	sd[AuDST].hdir
+#define dst_h_dir	sd[AuDST].hdir->hi_inode
+#define dst_dt		sd[AuDST].dt
+#define dst_bstart	sd[AuDST].bstart
+
+	struct dentry *h_trap;
+	struct au_branch *br;
+	struct au_hinode *src_hinode;
+	struct path h_path;
+	struct au_nhash whlist;
+	aufs_bindex_t btgt;
+
+	unsigned int flags;
+
+	struct au_whtmp_rmdir_args *thargs;
+	struct dentry *h_dst;
+};
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * functions for reverting.
+ * when an error happened in a single rename systemcall, we should revert
+ * everything as if nothing happend.
+ * we don't need to revert the copied-up/down the parent dir since they are
+ * harmless.
+ */
+
+#define RevertFailure(fmt, args...) do { \
+	AuIOErr("revert failure: " fmt " (%d, %d)\n", \
+		##args, err, rerr); \
+	err = -EIO; \
+} while (0)
+
+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
+{
+	int rerr;
+
+	au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
+	rerr = au_diropq_remove(a->src_dentry, a->btgt);
+	au_hin_imtx_unlock(a->src_hinode);
+	if (rerr)
+		RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
+}
+
+
+static void au_ren_rev_rename(int err, struct au_ren_args *a)
+{
+	int rerr;
+
+	a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
+				       a->br, /*nd*/NULL);
+	rerr = PTR_ERR(a->h_path.dentry);
+	if (IS_ERR(a->h_path.dentry)) {
+		RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
+		return;
+	}
+
+	rerr = vfsub_rename(a->dst_h_dir,
+			    au_h_dptr(a->src_dentry, a->btgt),
+			    a->src_h_dir, &a->h_path);
+	d_drop(a->h_path.dentry);
+	dput(a->h_path.dentry);
+	/* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
+	if (rerr)
+		RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
+}
+
+static void au_ren_rev_cpup(int err, struct au_ren_args *a)
+{
+	int rerr;
+
+	a->h_path.dentry = a->dst_h_dentry;
+	rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
+	au_set_h_dptr(a->src_dentry, a->btgt, NULL);
+	au_set_dbstart(a->src_dentry, a->src_bstart);
+	if (rerr)
+		RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
+}
+
+
+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
+{
+	int rerr;
+
+	a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
+				       a->br, /*nd*/NULL);
+	rerr = PTR_ERR(a->h_path.dentry);
+	if (IS_ERR(a->h_path.dentry)) {
+		RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
+		return;
+	}
+	if (a->h_path.dentry->d_inode) {
+		d_drop(a->h_path.dentry);
+		dput(a->h_path.dentry);
+		return;
+	}
+
+	rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
+	d_drop(a->h_path.dentry);
+	dput(a->h_path.dentry);
+	if (!rerr) {
+		au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
+		au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
+	} else
+		RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
+}
+
+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
+{
+	int rerr;
+
+	a->h_path.dentry = a->src_wh_dentry;
+	rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
+	if (rerr)
+		RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
+}
+
+static void au_ren_rev_drop(struct au_ren_args *a)
+{
+	struct dentry *d, *h_d;
+	int i;
+	aufs_bindex_t bend, bindex;
+
+	for (i = 0; i < AuSrcDst; i++) {
+		d = a->sd[i].dentry;
+		d_drop(d);
+		bend = au_dbend(d);
+		for (bindex = au_dbstart(d); bindex <= bend; bindex++) {
+			h_d = au_h_dptr(d, bindex);
+			if (h_d)
+				d_drop(h_d);
+		}
+	}
+
+	au_update_dbstart(a->dst_dentry);
+	if (a->thargs)
+		d_drop(a->h_dst);
+}
+#undef RevertFailure
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * when we have to copyup the renaming entry, do it with the rename-target name
+ * in order to minimize the cost (the later actual rename is unnecessary).
+ * otherwise rename it on the target branch.
+ */
+static int au_ren_or_cpup(struct au_ren_args *a)
+{
+	int err;
+	struct dentry *d;
+
+	d = a->src_dentry;
+	if (au_dbstart(d) == a->btgt) {
+		a->h_path.dentry = a->dst_h_dentry;
+		if (au_ftest_ren(a->flags, DIROPQ)
+		    && au_dbdiropq(d) == a->btgt)
+			au_fclr_ren(a->flags, DIROPQ);
+		AuDebugOn(au_dbstart(d) != a->btgt);
+		err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
+				   a->dst_h_dir, &a->h_path);
+	} else {
+		struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
+
+		au_fset_ren(a->flags, CPUP);
+		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
+		au_set_dbstart(d, a->btgt);
+		au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
+		err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
+					 !AuCpup_DTIME, a->dst_parent);
+		if (unlikely(err)) {
+			au_set_h_dptr(d, a->btgt, NULL);
+			au_set_dbstart(d, a->src_bstart);
+		}
+		mutex_unlock(h_mtx);
+	}
+
+	return err;
+}
+
+/* cf. aufs_rmdir() */
+static int au_ren_del_whtmp(struct au_ren_args *a)
+{
+	int err;
+	struct inode *dir;
+
+	dir = a->dst_dir;
+	if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
+				     au_sbi(dir->i_sb)->si_dirwh)
+	    || au_test_fs_remote(a->h_dst->d_sb)) {
+		err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
+		if (unlikely(err))
+			AuWarn("failed removing whtmp dir %.*s (%d), "
+			       "ignored.\n", AuDLNPair(a->h_dst), err);
+	} else {
+		au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, &a->whlist,
+				    a->thargs);
+		dput(a->h_dst);
+		a->thargs = NULL;
+	}
+
+	return 0;
+}
+
+/* make it 'opaque' dir. */
+static int au_ren_diropq(struct au_ren_args *a)
+{
+	int err;
+	struct dentry *diropq;
+
+	err = 0;
+	a->src_hinode = au_hi(a->src_inode, a->btgt);
+	au_hin_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
+	diropq = au_diropq_create(a->src_dentry, a->btgt);
+	au_hin_imtx_unlock(a->src_hinode);
+	if (IS_ERR(diropq))
+		err = PTR_ERR(diropq);
+	dput(diropq);
+
+	return err;
+}
+
+static int do_rename(struct au_ren_args *a)
+{
+	int err;
+	struct dentry *d, *h_d;
+
+	/* prepare workqueue args for asynchronous rmdir */
+	h_d = a->dst_h_dentry;
+	if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
+		err = -ENOMEM;
+		a->thargs = kmalloc(sizeof(*a->thargs), GFP_NOFS);
+		if (unlikely(!a->thargs))
+			goto out;
+		a->h_dst = dget(h_d);
+	}
+
+	/* create whiteout for src_dentry */
+	if (au_ftest_ren(a->flags, WHSRC)) {
+		a->src_wh_dentry
+			= au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
+		err = PTR_ERR(a->src_wh_dentry);
+		if (IS_ERR(a->src_wh_dentry))
+			goto out_thargs;
+	}
+
+	/* lookup whiteout for dentry */
+	if (au_ftest_ren(a->flags, WHDST)) {
+		h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
+				 a->br);
+		err = PTR_ERR(h_d);
+		if (IS_ERR(h_d))
+			goto out_whsrc;
+		if (!h_d->d_inode)
+			dput(h_d);
+		else
+			a->dst_wh_dentry = h_d;
+	}
+
+	/* rename dentry to tmpwh */
+	if (a->thargs) {
+		err = au_whtmp_ren(a->dst_h_dentry, a->br);
+		if (unlikely(err))
+			goto out_whdst;
+
+		d = a->dst_dentry;
+		au_set_h_dptr(d, a->btgt, NULL);
+		err = au_lkup_neg(d, a->btgt);
+		if (unlikely(err))
+			goto out_whtmp;
+		a->dst_h_dentry = au_h_dptr(d, a->btgt);
+	}
+
+	/* cpup src */
+	if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
+		struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
+
+		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
+		err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
+					 !AuCpup_DTIME);
+		mutex_unlock(h_mtx);
+		if (unlikely(err))
+			goto out_whtmp;
+	}
+
+	/* rename by vfs_rename or cpup */
+	d = a->dst_dentry;
+	if (au_ftest_ren(a->flags, ISDIR)
+	    && (a->dst_wh_dentry
+		|| au_dbdiropq(d) == a->btgt
+		/* hide the lower to keep xino */
+		|| a->btgt < au_dbend(d)
+		|| au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
+		au_fset_ren(a->flags, DIROPQ);
+	err = au_ren_or_cpup(a);
+	if (unlikely(err))
+		/* leave the copied-up one */
+		goto out_whtmp;
+
+	/* make dir opaque */
+	if (au_ftest_ren(a->flags, DIROPQ)) {
+		err = au_ren_diropq(a);
+		if (unlikely(err))
+			goto out_rename;
+	}
+
+	/* update target timestamps */
+	AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
+	a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
+	vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
+	a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
+
+	/* remove whiteout for dentry */
+	if (a->dst_wh_dentry) {
+		a->h_path.dentry = a->dst_wh_dentry;
+		err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
+					  a->dst_dentry);
+		if (unlikely(err))
+			goto out_diropq;
+	}
+
+	/* remove whtmp */
+	if (a->thargs)
+		au_ren_del_whtmp(a); /* ignore this error */
+
+	err = 0;
+	goto out_success;
+
+ out_diropq:
+	if (au_ftest_ren(a->flags, DIROPQ))
+		au_ren_rev_diropq(err, a);
+ out_rename:
+	if (!au_ftest_ren(a->flags, CPUP))
+		au_ren_rev_rename(err, a);
+	else
+		au_ren_rev_cpup(err, a);
+ out_whtmp:
+	if (a->thargs)
+		au_ren_rev_whtmp(err, a);
+ out_whdst:
+	dput(a->dst_wh_dentry);
+	a->dst_wh_dentry = NULL;
+ out_whsrc:
+	if (a->src_wh_dentry)
+		au_ren_rev_whsrc(err, a);
+	au_ren_rev_drop(a);
+ out_success:
+	dput(a->src_wh_dentry);
+	dput(a->dst_wh_dentry);
+ out_thargs:
+	if (a->thargs) {
+		dput(a->h_dst);
+		kfree(a->thargs);
+	}
+ out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * test if @dentry dir can be rename destination or not.
+ * success means, it is a logically empty dir.
+ */
+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
+{
+	return au_test_empty(dentry, whlist);
+}
+
+/*
+ * test if @dentry dir can be rename source or not.
+ * if it can, return 0 and @children is filled.
+ * success means,
+ * - it is a logically empty dir.
+ * - or, it exists on writable branch and has no children including whiteouts
+ *       on the lower branch.
+ */
+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
+{
+	int err;
+	aufs_bindex_t bstart;
+
+	bstart = au_dbstart(dentry);
+	if (bstart != btgt) {
+		struct au_nhash *whlist;
+
+		whlist = au_nhash_new(GFP_NOFS);
+		err = PTR_ERR(whlist);
+		if (IS_ERR(whlist))
+			goto out;
+		err = au_test_empty(dentry, whlist);
+		au_nhash_del(whlist);
+		goto out;
+	}
+
+	if (bstart == au_dbtaildir(dentry))
+		return 0; /* success */
+
+	err = au_test_empty_lower(dentry);
+
+ out:
+	if (err == -ENOTEMPTY) {
+		AuWarn1("renaming dir who has child(ren) on multiple branches,"
+			" is not supported\n");
+		err = -EXDEV;
+	}
+	return err;
+}
+
+/* side effect: sets whlist and h_dentry */
+static int au_ren_may_dir(struct au_ren_args *a)
+{
+	int err;
+	struct dentry *d;
+
+	err = 0;
+	au_nhash_init(&a->whlist);
+	d = a->dst_dentry;
+	if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
+		au_set_dbstart(d, a->dst_bstart);
+		err = may_rename_dstdir(d, &a->whlist);
+		au_set_dbstart(d, a->btgt);
+	}
+	a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
+	if (unlikely(err))
+		goto out;
+
+	d = a->src_dentry;
+	a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
+	if (au_ftest_ren(a->flags, ISDIR)) {
+		err = may_rename_srcdir(d, a->btgt);
+		if (unlikely(err))
+			au_nhash_fin(&a->whlist);
+	}
+
+ out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * simple tests for rename.
+ * following the checks in vfs, plus the parent-child relationship.
+ */
+static int au_may_ren(struct au_ren_args *a)
+{
+	int err, isdir;
+	struct inode *h_inode;
+
+	if (a->src_bstart == a->btgt) {
+		err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
+				 au_ftest_ren(a->flags, ISDIR));
+		if (unlikely(err))
+			goto out;
+		err = -EINVAL;
+		if (unlikely(a->src_h_dentry == a->h_trap))
+			goto out;
+	}
+
+	err = 0;
+	if (a->dst_bstart != a->btgt)
+		goto out;
+
+	err = -EIO;
+	h_inode = a->dst_h_dentry->d_inode;
+	isdir = !!au_ftest_ren(a->flags, ISDIR);
+	if (!a->dst_dentry->d_inode) {
+		if (unlikely(h_inode))
+			goto out;
+		err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
+				 isdir);
+	} else {
+		if (unlikely(!h_inode || !h_inode->i_nlink))
+			goto out;
+		err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
+				 isdir);
+		if (unlikely(err))
+			goto out;
+		err = -ENOTEMPTY;
+		if (unlikely(a->dst_h_dentry == a->h_trap))
+			goto out;
+		err = 0;
+	}
+
+ out:
+	if (unlikely(err == -ENOENT || err == -EEXIST))
+		err = -EIO;
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * locking order
+ * (VFS)
+ * - src_dir and dir by lock_rename()
+ * - inode if exitsts
+ * (aufs)
+ * - lock all
+ *   + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
+ *     + si_read_lock
+ *     + di_write_lock2_child()
+ *       + di_write_lock_child()
+ *	   + ii_write_lock_child()
+ *       + di_write_lock_child2()
+ *	   + ii_write_lock_child2()
+ *     + src_parent and parent
+ *       + di_write_lock_parent()
+ *	   + ii_write_lock_parent()
+ *       + di_write_lock_parent2()
+ *	   + ii_write_lock_parent2()
+ *   + lower src_dir and dir by vfsub_lock_rename()
+ *   + verify the every relationships between child and parent. if any
+ *     of them failed, unlock all and return -EBUSY.
+ */
+static void au_ren_unlock(struct au_ren_args *a)
+{
+	struct super_block *sb;
+
+	sb = a->dst_dentry->d_sb;
+	if (au_ftest_ren(a->flags, MNT_WRITE))
+		mnt_drop_write(a->br->br_mnt);
+	vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
+			    a->dst_h_parent, a->dst_hdir);
+}
+
+static int au_ren_lock(struct au_ren_args *a)
+{
+	int err;
+	unsigned int udba;
+
+	err = 0;
+	a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
+	a->src_hdir = au_hi(a->src_dir, a->btgt);
+	a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
+	a->dst_hdir = au_hi(a->dst_dir, a->btgt);
+	a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
+				      a->dst_h_parent, a->dst_hdir);
+	udba = au_opt_udba(a->src_dentry->d_sb);
+	if (au_dbstart(a->src_dentry) == a->btgt)
+		err = au_h_verify(a->src_h_dentry, udba,
+				  a->src_h_parent->d_inode, a->src_h_parent,
+				  a->br);
+	if (!err && au_dbstart(a->dst_dentry) == a->btgt)
+		err = au_h_verify(a->dst_h_dentry, udba,
+				  a->dst_h_parent->d_inode, a->dst_h_parent,
+				  a->br);
+	if (!err) {
+		err = mnt_want_write(a->br->br_mnt);
+		if (unlikely(err))
+			goto out_unlock;
+		au_fset_ren(a->flags, MNT_WRITE);
+		goto out; /* success */
+	}
+
+	err = -EBUSY;
+
+ out_unlock:
+	au_ren_unlock(a);
+ out:
+	return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void au_ren_refresh_dir(struct au_ren_args *a)
+{
+	struct inode *dir;
+
+	dir = a->dst_dir;
+	dir->i_version++;
+	if (au_ftest_ren(a->flags, ISDIR)) {
+		/* is this updating defined in POSIX? */
+		au_cpup_attr_timesizes(a->src_inode);
+		au_cpup_attr_nlink(dir, /*force*/1);
+		if (a->dst_inode) {
+			clear_nlink(a->dst_inode);
+			au_cpup_attr_timesizes(a->dst_inode);
+		}
+	}
+	if (au_ibstart(dir) == a->btgt)
+		au_cpup_attr_timesizes(dir);
+
+	if (au_ftest_ren(a->flags, ISSAMEDIR))
+		return;
+
+	dir = a->src_dir;
+	dir->i_version++;
+	if (au_ftest_ren(a->flags, ISDIR))
+		au_cpup_attr_nlink(dir, /*force*/1);
+	if (au_ibstart(dir) == a->btgt)
+		au_cpup_attr_timesizes(dir);
+}
+
+static void au_ren_refresh(struct au_ren_args *a)
+{
+	aufs_bindex_t bend, bindex;
+	struct dentry *d, *h_d;
+	struct inode *i, *h_i;
+	struct super_block *sb;
+
+	d = a->src_dentry;
+	au_set_dbwh(d, -1);
+	bend = au_dbend(d);
+	for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
+		h_d = au_h_dptr(d, bindex);
+		if (h_d)
+			au_set_h_dptr(d, bindex, NULL);
+	}
+	au_set_dbend(d, a->btgt);
+
+	sb = d->d_sb;
+	i = a->src_inode;
+	if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
+		return; /* success */
+
+	bend = au_ibend(i);
+	for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
+		h_i = au_h_iptr(i, bindex);
+		if (h_i) {
+			au_xino_write0(sb, bindex, h_i->i_ino, 0);
+			/* ignore this error */
+			au_set_h_iptr(i, bindex, NULL, 0);
+		}
+	}
+	au_set_ibend(i, a->btgt);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* mainly for link(2) and rename(2) */
+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
+{
+	aufs_bindex_t bdiropq, bwh;
+	struct dentry *parent;
+	struct au_branch *br;
+
+	parent = dentry->d_parent;
+	IMustLock(parent->d_inode); /* dir is locked */
+
+	bdiropq = au_dbdiropq(parent);
+	bwh = au_dbwh(dentry);
+	br = au_sbr(dentry->d_sb, btgt);
+	if (au_br_rdonly(br)
+	    || (0 <= bdiropq && bdiropq < btgt)
+	    || (0 <= bwh && bwh < btgt))
+		btgt = -1;
+
+	AuDbg("btgt %d\n", btgt);
+	return btgt;
+}
+
+/* sets src_bstart, dst_bstart and btgt */
+static int au_ren_wbr(struct au_ren_args *a)
+{
+	int err;
+	struct au_wr_dir_args wr_dir_args = {
+		/* .force_btgt	= -1, */
+		.flags		= AuWrDir_ADD_ENTRY
+	};
+
+	a->src_bstart = au_dbstart(a->src_dentry);
+	a->dst_bstart = au_dbstart(a->dst_dentry);
+	if (au_ftest_ren(a->flags, ISDIR))
+		au_fset_wrdir(wr_dir_args.flags, ISDIR);
+	wr_dir_args.force_btgt = a->src_bstart;
+	if (a->dst_inode && a->dst_bstart < a->src_bstart)
+		wr_dir_args.force_btgt = a->dst_bstart;
+	wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
+	err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
+	a->btgt = err;
+
+	return err;
+}
+
+static void au_ren_dt(struct au_ren_args *a)
+{
+	a->h_path.dentry = a->src_h_parent;
+	au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
+	if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
+		a->h_path.dentry = a->dst_h_parent;
+		au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
+	}
+
+	au_fclr_ren(a->flags, DT_DSTDIR);
+	if (!au_ftest_ren(a->flags, ISDIR))
+		return;
+
+	a->h_path.dentry = a->src_h_dentry;
+	au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
+	if (a->dst_h_dentry->d_inode) {
+		au_fset_ren(a->flags, DT_DSTDIR);
+		a->h_path.dentry = a->dst_h_dentry;
+		au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
+	}
+}
+
+static void au_ren_rev_dt(int err, struct au_ren_args *a)
+{
+	struct dentry *h_d;
+	struct mutex *h_mtx;
+
+	au_dtime_revert(a->src_dt + AuPARENT);
+	if (!au_ftest_ren(a->flags, ISSAMEDIR))
+		au_dtime_revert(a->dst_dt + AuPARENT);
+
+	if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
+		h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
+		h_mtx = &h_d->d_inode->i_mutex;
+		mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
+		au_dtime_revert(a->src_dt + AuCHILD);
+		mutex_unlock(h_mtx);
+
+		if (au_ftest_ren(a->flags, DT_DSTDIR)) {
+			h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
+			h_mtx = &h_d->d_inode->i_mutex;
+			mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
+			au_dtime_revert(a->dst_dt + AuCHILD);
+			mutex_unlock(h_mtx);
+		}
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
+		struct inode *_dst_dir, struct dentry *_dst_dentry)
+{
+	int err;
+	/* reduce stack space */
+	struct au_ren_args *a;
+
+	IMustLock(_src_dir);
+	IMustLock(_dst_dir);
+
+	err = -ENOMEM;
+	BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
+	a = kzalloc(sizeof(*a), GFP_NOFS);
+	if (unlikely(!a))
+		goto out;
+
+	a->src_dir = _src_dir;
+	a->src_dentry = _src_dentry;
+	a->src_inode = a->src_dentry->d_inode;
+	a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
+	a->dst_dir = _dst_dir;
+	a->dst_dentry = _dst_dentry;
+	a->dst_inode = a->dst_dentry->d_inode;
+	a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
+	if (a->dst_inode) {
+		IMustLock(a->dst_inode);
+		au_igrab(a->dst_inode);
+	}
+
+	err = -ENOTDIR;
+	if (S_ISDIR(a->src_inode->i_mode)) {
+		au_fset_ren(a->flags, ISDIR);
+		if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
+			goto out_free;
+		aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
+					  AuLock_DIR | AuLock_FLUSH);
+	} else
+		aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
+					  AuLock_FLUSH);
+
+	au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
+	di_write_lock_parent(a->dst_parent);
+
+	/* which branch we process */
+	err = au_ren_wbr(a);
+	if (unlikely(err < 0))
+		goto out_unlock;
+	a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
+	a->h_path.mnt = a->br->br_mnt;
+
+	/* are they available to be renamed */
+	err = au_ren_may_dir(a);
+	if (unlikely(err))
+		goto out_unlock;
+
+	/* prepare the writable parent dir on the same branch */
+	if (a->dst_bstart == a->btgt) {
+		au_fset_ren(a->flags, WHDST);
+	} else {
+		err = au_cpup_dirs(a->dst_dentry, a->btgt);
+		if (unlikely(err))
+			goto out_children;
+	}
+
+	if (a->src_dir != a->dst_dir) {
+		/*
+		 * this temporary unlock is safe,
+		 * because both dir->i_mutex are locked.
+		 */
+		di_write_unlock(a->dst_parent);
+		di_write_lock_parent(a->src_parent);
+		err = au_wr_dir_need_wh(a->src_dentry,
+					au_ftest_ren(a->flags, ISDIR),
+					&a->btgt);
+		di_write_unlock(a->src_parent);
+		di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
+		au_fclr_ren(a->flags, ISSAMEDIR);
+	} else
+		err = au_wr_dir_need_wh(a->src_dentry,
+					au_ftest_ren(a->flags, ISDIR),
+					&a->btgt);
+	if (unlikely(err < 0))
+		goto out_children;
+	if (err)
+		au_fset_ren(a->flags, WHSRC);
+
+	/* lock them all */
+	err = au_ren_lock(a);
+	if (unlikely(err))
+		goto out_children;
+
+	if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE)) {
+		err = au_may_ren(a);
+		if (unlikely(err))
+			goto out_hdir;
+	}
+
+	/* store timestamps to be revertible */
+	au_ren_dt(a);
+
+	/* here we go */
+	err = do_rename(a);
+	if (unlikely(err))
+		goto out_dt;
+
+	/* update dir attributes */
+	au_ren_refresh_dir(a);
+
+	/* dput/iput all lower dentries */
+	au_ren_refresh(a);
+
+	goto out_hdir; /* success */
+
+ out_dt:
+	au_ren_rev_dt(err, a);
+ out_hdir:
+	au_ren_unlock(a);
+ out_children:
+	au_nhash_fin(&a->whlist);
+ out_unlock:
+	if (unlikely(err && au_ftest_ren(a->flags, ISDIR))) {
+		au_update_dbstart(a->dst_dentry);
+		d_drop(a->dst_dentry);
+	}
+	if (!err) {
+		d_move(a->src_dentry, a->dst_dentry);
+		if (a->dst_inode
+		    && (a->dst_inode->i_nlink <= 1
+			|| au_ftest_ren(a->flags, ISDIR)))
+			a->dst_inode->i_flags |= S_DEAD;
+	}
+	if (au_ftest_ren(a->flags, ISSAMEDIR))
+		di_write_unlock(a->dst_parent);
+	else
+		di_write_unlock2(a->src_parent, a->dst_parent);
+	aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
+ out_free:
+	iput(a->dst_inode);
+	kfree(a);
+ out:
+	return err;
+}
diff --git a/fs/aufs/iinfo.c b/fs/aufs/iinfo.c
new file mode 100644
index 0000000..ed9c55c
--- /dev/null
+++ b/fs/aufs/iinfo.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2005-2009 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/*
+ * inode private data
+ */
+
+#include "aufs.h"
+
+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
+{
+	struct inode *h_inode;
+
+	h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
+	AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
+	return h_inode;
+}
+
+/* todo: hard/soft set? */
+void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
+{
+	struct au_iinfo *iinfo = au_ii(inode);
+	struct inode *h_inode;
+
+	iinfo->ii_bstart = bindex;
+	h_inode = iinfo->ii_hinode[bindex + 0].hi_inode;
+	if (h_inode)
+		au_cpup_igen(inode, h_inode);
+}
+
+void au_hiput(struct au_hinode *hinode)
+{
+	au_hin_free(hinode);
+	dput(hinode->hi_whdentry);
+	iput(hinode->hi_inode);
+}
+
+unsigned int au_hi_flags(struct inode *inode, int isdir)
+{
+	unsigned int flags;
+	const unsigned int mnt_flags = au_mntflags(inode->i_sb);
+
+	flags = 0;
+	if (au_opt_test(mnt_flags, XINO))
+		au_fset_hi(flags, XINO);
+	if (isdir && au_opt_test(mnt_flags, UDBA_HINOTIFY))
+		au_fset_hi(flags, HINOTIFY);
+	return flags;
+}
+
+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
+		   struct inode *h_inode, unsigned int flags)
+{
+	struct au_hinode *hinode;
+	struct inode *hi;
+	struct au_iinfo *iinfo = au_ii(inode);
+
+	hinode = iinfo->ii_hinode + bindex;
+	hi = hinode->hi_inode;
+	AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
+	AuDebugOn(h_inode && hi);
+
+	if (hi)
+		au_hiput(hinode);
+	hinode->hi_inode = h_inode;
+	if (h_inode) {
+		int err;
+		struct super_block *sb = inode->i_sb;
+		struct au_branch *br;
+
+		if (bindex == iinfo->ii_bstart)
+			au_cpup_igen(inode, h_inode);
+		br = au_sbr(sb, bindex);
+		hinode->hi_id = br->br_id;
+		if (au_ftest_hi(flags, XINO)) {
+			err = au_xino_write(sb, bindex, h_inode->i_ino,
+					    inode->i_ino);
+			if (unlikely(err))
+				AuIOErr1("failed au_xino_write() %d\n", err);
+		}
+
+		if (au_ftest_hi(flags, HINOTIFY)
+		    && au_br_hinotifyable(br->br_perm)) {
+			err = au_hin_alloc(hinode, inode, h_inode);
+			if (unlikely(err))
+				AuIOErr1("au_hin_alloc() %d\n", err);
+		}
+	}
+}
+
+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
+		  struct dentry *h_wh)
+{
+	struct au_hinode *hinode;
+
+	hinode = au_ii(inode)->ii_hinode + bindex;
+	AuDebugOn(hinode->hi_whdentry);
+	hinode->hi_whdentry = h_wh;
+}
+
+void au_update_iigen(struct inode *inode)
+{
+	atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
+	/* smp_mb(); */ /* atomic_set */
+}
+
+/* it may be called at remount time, too */
+void au_update_brange(struct inode *inode, int do_put_zero)
+{
+	struct au_iinfo *iinfo;
+
+	iinfo = au_ii(inode);
+	if (!iinfo || iinfo->ii_bstart < 0)
+		return;
+
+	if (do_put_zero) {
+		aufs_bindex_t bindex;
+
+		for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
+		     bindex++) {
+			struct inode *h_i;
+
+			h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
+			if (h_i && !h_i->i_nlink)
+				au_set_h_iptr(inode, bindex, NULL, 0);
+		}
+	}
+
+	iinfo->ii_bstart = -1;
+	while (++iinfo->ii_bstart <= iinfo->ii_bend)
+		if (iinfo->ii_hinode[0 + iinfo->ii_bstart].hi_inode)
+			break;
+	if (iinfo->ii_bstart > iinfo->ii_bend) {
+		iinfo->ii_bstart = -1;
+		iinfo->ii_bend = -1;
+		return;
+	}
+
+	iinfo->ii_bend++;
+	while (0 <= --iinfo->ii_bend)
+		if (iinfo->ii_hinode[0 + iinfo->ii_bend].hi_inode)
+			break;
+	AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend || iinfo->ii_bend < 0);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_iinfo_init(struct inode *inode)
+{
+	struct au_iinfo *iinfo;
+	struct super_block *sb;
+	int nbr, i;
+
+	sb = inode->i_sb;
+	iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
+	nbr = au_sbend(sb) + 1;
+	if (unlikely(nbr <= 0))
+		nbr = 1;
+	iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
+	if (iinfo->ii_hinode) {
+		for (i = 0; i < nbr; i++)
+			iinfo->ii_hinode[i].hi_id = -1;
+
+		atomic_set(&iinfo->ii_generation, au_sigen(sb));
+		/* smp_mb(); */ /* atomic_set */
+		init_rwsem(&iinfo->ii_rwsem);
+		iinfo->ii_bstart = -1;
+		iinfo->ii_bend = -1;
+		iinfo->ii_vdir = NULL;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
+{
+	int err, sz;
+	struct au_hinode *hip;
+
+	err = -ENOMEM;
+	sz = sizeof(*hip) * (iinfo->ii_bend + 1);
+	if (!sz)
+		sz = sizeof(*hip);
+	hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
+	if (hip) {
+		iinfo->ii_hinode = hip;
+		err = 0;
+	}
+
+	return err;
+}
+
+static int au_iinfo_write0(struct super_block *sb, struct au_hinode *hinode,
+			   ino_t ino)
+{
+	int err;
+	aufs_bindex_t bindex;
+	unsigned char locked;
+
+	err = 0;
+	locked = !!si_noflush_read_trylock(sb);
+	bindex = au_br_index(sb, hinode->hi_id);
+	if (bindex >= 0)
+		err = au_xino_write0(sb, bindex, hinode->hi_inode->i_ino, ino);
+	/* error action? */
+	if (locked)
+		si_read_unlock(sb);
+	return err;
+}
+
+void au_iinfo_fin(struct inode *inode)
+{
+	ino_t ino;
+	aufs_bindex_t bend;
+	unsigned char unlinked;
+	struct au_iinfo *iinfo;
+	struct au_hinode *hi;
+	struct super_block *sb;
+
+	iinfo = au_ii(inode);
+	/* bad_inode case */
+	if (!iinfo)
+		return;
+
+	if (iinfo->ii_vdir)
+		au_vdir_free(iinfo->ii_vdir);
+
+	if (iinfo->ii_bstart >= 0) {
+		sb = inode->i_sb;
+		unlinked = !inode->i_nlink;
+		ino = 0;
+		if (unlinked)
+			ino = inode->i_ino;
+		hi = iinfo->ii_hinode + iinfo->ii_bstart;
+		bend = iinfo->ii_bend;
+		while (iinfo->ii_bstart++ <= bend) {
+			if (hi->hi_inode) {
+				if (unlinked || !hi->hi_inode->i_nlink) {
+					au_iinfo_write0(sb, hi, ino);
+					/* ignore this error */
+					ino = 0;
+				}
+				au_hiput(hi);
+			}
+			hi++;
+		}
+	}
+
+	kfree(iinfo->ii_hinode);
+	au_rwsem_destroy(&iinfo->ii_rwsem);
+}
diff --git a/fs/aufs/inode.c b/fs/aufs/inode.c
new file mode 100644
index 0000000..c4a962b
--- /dev/null
+++ b/fs/aufs/inode.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2005-2009 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/*
+ * inode functions
+ */
+
+#include "aufs.h"
+
+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
+{
+	au_cpup_attr_all(inode, /*force*/0);
+	au_update_iigen(inode);
+	if (do_version)
+		inode->i_version++;
+}
+
+int au_refresh_hinode_self(struct inode *inode, int do_attr)
+{
+	int err;
+	aufs_bindex_t bindex, new_bindex;
+	unsigned char update;
+	struct inode *first;
+	struct au_hinode *p, *q, tmp;
+	struct super_block *sb;
+	struct au_iinfo *iinfo;
+
+	update = 0;
+	sb = inode->i_sb;
+	iinfo = au_ii(inode);
+	err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
+	if (unlikely(err))
+		goto out;
+
+	p = iinfo->ii_hinode + iinfo->ii_bstart;
+	first = p->hi_inode;
+	err = 0;
+	for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
+	     bindex++, p++) {
+		if (!p->hi_inode)
+			continue;
+
+		new_bindex = au_br_index(sb, p->hi_id);
+		if (new_bindex == bindex)
+			continue;
+
+		if (new_bindex < 0) {
+			update++;
+			au_hiput(p);
+			p->hi_inode = NULL;
+			continue;
+		}
+
+		if (new_bindex < iinfo->ii_bstart)
+			iinfo->ii_bstart = new_bindex;
+		if (iinfo->ii_bend < new_bindex)
+			iinfo->ii_bend = new_bindex;
+		/* swap two lower inode, and loop again */
+		q = iinfo->ii_hinode + new_bindex;
+		tmp = *q;
+		*q = *p;
+		*p = tmp;
+		if (tmp.hi_inode) {
+			bindex--;
+			p--;
+		}
+	}
+	au_update_brange(inode, /*do_put_zero*/0);
+	if (do_attr)
+		au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
+
+ out:
+	return err;
+}
+
+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
+{
+	int err, update;
+	unsigned int flags;
+	aufs_bindex_t bindex, bend;
+	unsigned char isdir;
+	struct inode *first;
+	struct au_hinode *p;
+	struct au_iinfo *iinfo;
+
+	err = au_refresh_hinode_self(inode, /*do_attr*/0);
+	if (unlikely(err))
+		goto out;
+
+	update = 0;
+	iinfo = au_ii(inode);
+	p = iinfo->ii_hinode + iinfo->ii_bstart;
+	first = p->hi_inode;
+	isdir = S_ISDIR(inode->i_mode);
+	flags = au_hi_flags(inode, isdir);
+	bend = au_dbend(dentry);
+	for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
+		struct inode *h_i;
+		struct dentry *h_d;
+
+		h_d = au_h_dptr(dentry, bindex);
+		if (!h_d || !h_d->d_inode)
+			continue;
+
+		if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
+			h_i = au_h_iptr(inode, bindex);
+			if (h_i) {
+				if (h_i == h_d->d_inode)
+					continue;
+				err = -EIO;
+				break;
+			}
+		}
+		if (bindex < iinfo->ii_bstart)
+			iinfo->ii_bstart = bindex;
+		if (iinfo->ii_bend < bindex)
+			iinfo->ii_bend = bindex;
+		au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
+		update = 1;
+	}
+	au_update_brange(inode, /*do_put_zero*/0);
+
+	if (unlikely(err))
+		goto out;
+
+	au_refresh_hinode_attr(inode, update && isdir);
+
+ out:
+	return err;
+}
+
+static int set_inode(struct inode *inode, struct dentry *dentry)
+{
+	int err;
+	unsigned int flags;
+	umode_t mode;
+	aufs_bindex_t bindex, bstart, btail;
+	unsigned char isdir;
+	struct dentry *h_dentry;
+	struct inode *h_inode;
+	struct au_iinfo *iinfo;
+
+	err = 0;
+	isdir = 0;
+	bstart = au_dbstart(dentry);
+	h_inode = au_h_dptr(dentry, bstart)->d_inode;
+	mode = h_inode->i_mode;
+	switch (mode & S_IFMT) {
+	case S_IFREG:
+		btail = au_dbtail(dentry);
+		inode->i_op = &aufs_iop;
+		inode->i_fop = &aufs_file_fop;
+		inode->i_mapping->a_ops = &aufs_aop;
+		break;
+	case S_IFDIR:
+		isdir = 1;
+		btail = au_dbtaildir(dentry);
+		inode->i_op = &aufs_dir_iop;
+		inode->i_fop = &aufs_dir_fop;
+		break;
+	case S_IFLNK:
+		btail = au_dbtail(dentry);
+		inode->i_op = &aufs_symlink_iop;
+		break;
+	case S_IFBLK:
+	case S_IFCHR:
+	case S_IFIFO:
+	case S_IFSOCK:
+		btail = au_dbtail(dentry);
+		inode->i_op = &aufs_iop;
+		init_special_inode(inode, mode, h_inode->i_rdev);
+		break;
+	default:
+		AuIOErr("Unknown file type 0%o\n", mode);
+		err = -EIO;
+		goto out;
+	}
+
+	flags = au_hi_flags(inode, isdir);
+	iinfo = au_ii(inode);
+	iinfo->ii_bstart = bstart;
+	iinfo->ii_bend = btail;
+	for (bindex = bstart; bindex <= btail; bindex++) {
+		h_dentry = au_h_dptr(dentry, bindex);
+		if (h_dentry)
+			au_set_h_iptr(inode, bindex,
+				      au_igrab(h_dentry->d_inode), flags);
+	}
+	au_cpup_attr_all(inode, /*force*/1);
+
+ out:
+	return err;
+}
+
+/* successful returns with iinfo write_locked */
+static int reval_inode(struct inode *inode, struct dentry *dentry, int *matched)
+{
+	int err;
+	aufs_bindex_t bindex, bend;
+	struct inode *h_inode, *h_dinode;
+
+	*matched = 0;
+
+	/*
+	 * before this function, if aufs got any iinfo lock, it must be only
+	 * one, the parent dir.
+	 * it can happen by UDBA and the obsoleted inode number.
+	 */
+	err = -EIO;
+	if (unlikely(inode->i_ino == parent_ino(dentry)))
+		goto out;
+
+	ii_write_lock_new_child(inode);
+	if (unlikely(IS_DEADDIR(inode)))
+		goto out_unlock;
+
+	err = 0;
+	h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
+	bend = au_ibend(inode);
+	for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
+		h_inode = au_h_iptr(inode, bindex);
+		if (h_inode && h_inode == h_dinode) {
+			*matched = 1;
+			err = 0;
+			if (au_iigen(inode) != au_digen(dentry))
+				err = au_refresh_hinode(inode, dentry);
+			break;
+		}
+	}
+
+ out_unlock:
+	if (unlikely(err))
+		ii_write_unlock(inode);
+ out:
+	return err;
+}
+
+/* successful returns with iinfo write_locked */
+/* todo: return with unlocked? */
+struct inode *au_new_inode(struct dentry *dentry, int must_new)
+{
+	struct inode *inode;
+	struct dentry *h_dentry;
+	struct super_block *sb;
+	ino_t h_ino, ino;
+	int err, match;
+	aufs_bindex_t bstart;
+
+	sb = dentry->d_sb;
+	bstart = au_dbstart(dentry);
+	h_dentry = au_h_dptr(dentry, bstart);
+	h_ino = h_dentry->d_inode->i_ino;
+	err = au_xino_read(sb, bstart, h_ino, &ino);
+	inode = ERR_PTR(err);
+	if (unlikely(err))
+		goto out;
+ new_ino:
+	if (!ino) {
+		ino = au_xino_new_ino(sb);
+		if (unlikely(!ino)) {
+			inode = ERR_PTR(-EIO);
+			goto out;
+		}
+	}
+
+	AuDbg("i%lu\n", (unsigned long)ino);
+	inode = au_iget_locked(sb, ino);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out;
+
+	AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
+	if (inode->i_state & I_NEW) {
+		ii_write_lock_new_child(inode);
+		err = set_inode(inode, dentry);
+		unlock_new_inode(inode);
+		if (!err)
+			goto out; /* success */
+
+		iget_failed(inode);
+		ii_write_unlock(inode);
+		goto out_iput;
+	} else if (!must_new) {
+		err = reval_inode(inode, dentry, &match);
+		if (!err)
+			goto out; /* success */
+		else if (match)
+			goto out_iput;
+	}
+
+	if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
+		AuWarn1("Un-notified UDBA or repeatedly renamed dir,"
+			" b%d, %s, %.*s, hi%lu, i%lu.\n",
+			bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
+			(unsigned long)h_ino, (unsigned long)ino);
+	ino = 0;
+	err = au_xino_write0(sb, bstart, h_ino, 0);
+	if (!err) {
+		iput(inode);
+		goto new_ino;
+	}
+
+ out_iput:
+	iput(inode);
+	inode = ERR_PTR(err);
+ out:
+	return inode;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
+	       struct inode *inode)
+{
+	int err;
+
+	err = au_br_rdonly(au_sbr(sb, bindex));
+
+	/* pseudo-link after flushed may happen out of bounds */
+	if (!err
+	    && inode
+	    && au_ibstart(inode) <= bindex
+	    && bindex <= au_ibend(inode)) {
+		/*
+		 * permission check is unnecessary since vfsub routine
+		 * will be called later
+		 */
+		struct inode *hi = au_h_iptr(inode, bindex);
+		if (hi)
+			err = IS_IMMUTABLE(hi) ? -EROFS : 0;
+	}
+
+	return err;
+}
+
+int au_test_h_perm(struct inode *h_inode, int mask)
+{
+	if (!current_fsuid())
+		return 0;
+	return inode_permission(h_inode, mask);
+}
+
+int au_test_h_perm_sio(struct inode *h_inode, int mask)
+{
+	if (au_test_nfs(h_inode->i_sb)
+	    && (mask & MAY_WRITE)
+	    && S_ISDIR(h_inode->i_mode))
+		mask |= MAY_READ; /* force permission check */
+	return au_test_h_perm(h_inode, mask);
+}
diff --git a/fs/aufs/inode.h b/fs/aufs/inode.h
new file mode 100644
index 0000000..ffda951
--- /dev/null
+++ b/fs/aufs/inode.h
@@ -0,0 +1,471 @@
+/*
+ * Copyright (C) 2005-2009 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/*
+ * inode operations
+ */
+
+#ifndef __AUFS_INODE_H__
+#define __AUFS_INODE_H__
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/inotify.h>
+#include <linux/namei.h>
+#include <linux/aufs_type.h>
+#include "rwsem.h"
+
+struct au_hinotify {
+#ifdef CONFIG_AUFS_HINOTIFY
+	struct inotify_watch	hin_watch;
+	struct inode		*hin_aufs_inode;	/* no get/put */
+#endif
+};
+
+struct au_hinode {
+	struct inode		*hi_inode;
+	aufs_bindex_t		hi_id;
+#ifdef CONFIG_AUFS_HINOTIFY
+	struct au_hinotify	*hi_notify;
+#endif
+
+	/* reference to the copied-up whiteout with get/put */
+	struct dentry		*hi_whdentry;
+};
+
+struct au_vdir;
+struct au_iinfo {
+	atomic_t		ii_generation;
+	struct super_block	*ii_hsb1;	/* no get/put */
+
+	struct rw_semaphore	ii_rwsem;
+	aufs_bindex_t		ii_bstart, ii_bend;
+	__u32			ii_higen;
+	struct au_hinode	*ii_hinode;
+	struct au_vdir		*ii_vdir;
+};
+
+struct au_icntnr {
+	struct au_iinfo iinfo;
+	struct inode vfs_inode;
+};
+
+/* au_pin flags */
+#define AuPin_DI_LOCKED		1
+#define AuPin_MNT_WRITE		(1 << 1)
+#define au_ftest_pin(flags, name)	((flags) & AuPin_##name)
+#define au_fset_pin(flags, name)	{ (flags) |= AuPin_##name; }
+#define au_fclr_pin(flags, name)	{ (flags) &= ~AuPin_##name; }
+
+struct au_pin {
+	/* input */
+	struct dentry *dentry;
+	unsigned int udba;
+	unsigned char lsc_di, lsc_hi, flags;
+	aufs_bindex_t bindex;
+
+	/* output */
+	struct dentry *parent;
+	struct au_hinode *hdir;
+	struct vfsmount *h_mnt;
+};
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct au_iinfo *au_ii(struct inode *inode)
+{
+	struct au_iinfo *iinfo;
+
+	iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
+	if (iinfo->ii_hinode)
+		return iinfo;
+	return NULL; /* debugging bad_inode case */
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* inode.c */
+int au_refresh_hinode_self(struct inode *inode, int do_attr);
+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
+struct inode *au_new_inode(struct dentry *dentry, int must_new);
+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
+	       struct inode *inode);
+int au_test_h_perm(struct inode *h_inode, int mask);
+int au_test_h_perm_sio(struct inode *h_inode, int mask);
+
+/* i_op.c */
+extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
+
+/* au_wr_dir flags */
+#define AuWrDir_ADD_ENTRY	1
+#define AuWrDir_ISDIR		(1 << 1)
+#define au_ftest_wrdir(flags, name)	((flags) & AuWrDir_##name)
+#define au_fset_wrdir(flags, name)	{ (flags) |= AuWrDir_##name; }
+#define au_fclr_wrdir(flags, name)	{ (flags) &= ~AuWrDir_##name; }
+
+struct au_wr_dir_args {
+	aufs_bindex_t force_btgt;
+	unsigned char flags;
+};
+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
+	      struct au_wr_dir_args *args);
+
+struct dentry *au_pinned_h_parent(struct au_pin *pin);
+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
+		 aufs_bindex_t bindex, int lsc_di, int lsc_hi,
+		 unsigned int udba, unsigned char flags);
+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
+	   unsigned int udba, unsigned char flags) __must_check;
+int au_do_pin(struct au_pin *pin) __must_check;
+void au_unpin(struct au_pin *pin);
+
+/* i_op_add.c */
+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir);
+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
+		struct nameidata *nd);
+int aufs_link(struct dentry *src_dentry, struct inode *dir,
+	      struct dentry *dentry);
+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+
+/* i_op_del.c */
+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
+	       struct dentry *h_parent, int isdir);
+int aufs_unlink(struct inode *dir, struct dentry *dentry);
+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
+
+/* i_op_ren.c */
+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
+		struct inode *dir, struct dentry *dentry);
+
+/* iinfo.c */
+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
+void au_hiput(struct au_hinode *hinode);
+void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex);
+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
+		  struct dentry *h_wh);
+unsigned int au_hi_flags(struct inode *inode, int isdir);
+
+/* hinode flags */
+#define AuHi_XINO	1
+#define AuHi_HINOTIFY	(1 << 1)
+#define au_ftest_hi(flags, name)	((flags) & AuHi_##name)
+#define au_fset_hi(flags, name)		{ (flags) |= AuHi_##name; }
+#define au_fclr_hi(flags, name)		{ (flags) &= ~AuHi_##name; }
+
+#ifndef CONFIG_AUFS_HINOTIFY
+#undef AuHi_HINOTIFY
+#define AuHi_HINOTIFY	0
+#endif
+
+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
+		   struct inode *h_inode, unsigned int flags);
+
+void au_update_iigen(struct inode *inode);
+void au_update_brange(struct inode *inode, int do_put_zero);
+
+int au_iinfo_init(struct inode *inode);
+void au_iinfo_fin(struct inode *inode);
+int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
+
+/* plink.c */
+void au_plink_block_maintain(struct super_block *sb);
+#ifdef CONFIG_AUFS_DEBUG
+void au_plink_list(struct super_block *sb);
+#else
+static inline void au_plink_list(struct super_block *sb)
+{
+	/* nothing */
+}
+#endif
+int au_plink_test(struct inode *inode);
+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
+		     struct dentry *h_dentry);
+void au_plink_put(struct super_block *sb);
+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
+
+/* ---------------------------------------------------------------------- */
+
+/* lock subclass for iinfo */
+enum {
+	AuLsc_II_CHILD,		/* child first */
+	AuLsc_II_CHILD2,	/* rename(2), link(2), and cpup at hinotify */
+	AuLsc_II_CHILD3,	/* copyup dirs */
+	AuLsc_II_PARENT,	/* see AuLsc_I_PARENT in vfsub.h */
+	AuLsc_II_PARENT2,
+	AuLsc_II_PARENT3,	/* copyup dirs */
+	AuLsc_II_NEW_CHILD
+};
+
+/*
+ * ii_read_lock_child, ii_write_lock_child,
+ * ii_read_lock_child2, ii_write_lock_child2,
+ * ii_read_lock_child3, ii_write_lock_child3,
+ * ii_read_lock_parent, ii_write_lock_parent,
+ * ii_read_lock_parent2, ii_write_lock_parent2,
+ * ii_read_lock_parent3, ii_write_lock_parent3,
+ * ii_read_lock_new_child, ii_write_lock_new_child,
+ */
+#define AuReadLockFunc(name, lsc) \
+static inline void ii_read_lock_##name(struct inode *i) \
+{ \
+	down_read_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
+}
+
+#define AuWriteLockFunc(name, lsc) \
+static inline void ii_write_lock_##name(struct inode *i) \
+{ \
+	down_write_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
+}
+
+#define AuRWLockFuncs(name, lsc) \
+	AuReadLockFunc(name, lsc) \
+	AuWriteLockFunc(name, lsc)
+
+AuRWLockFuncs(child, CHILD);
+AuRWLockFuncs(child2, CHILD2);
+AuRWLockFuncs(child3, CHILD3);
+AuRWLockFuncs(parent, PARENT);
+AuRWLockFuncs(parent2, PARENT2);
+AuRWLockFuncs(parent3, PARENT3);
+AuRWLockFuncs(new_child, NEW_CHILD);
+
+#undef AuReadLockFunc
+#undef AuWriteLockFunc
+#undef AuRWLockFuncs
+
+/*
+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
+ */
+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
+
+#define IiMustNoWaiters(i)	AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
+
+/* ---------------------------------------------------------------------- */
+
+static inline unsigned int au_iigen(struct inode *inode)
+{
+	return atomic_read(&au_ii(inode)->ii_generation);
+}
+
+/* tiny test for inode number */
+/* tmpfs generation is too rough */
+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
+{
+	struct au_iinfo *iinfo;
+
+	iinfo = au_ii(inode);
+	return !(iinfo->ii_hsb1 == h_inode->i_sb
+		 && iinfo->ii_higen == h_inode->i_generation);
+}
+
+static inline struct inode *au_igrab(struct inode *inode)
+{
+	if (inode) {
+		AuDebugOn(!atomic_read(&inode->i_count));
+		atomic_inc(&inode->i_count);
+	}
+	return inode;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
+					aufs_bindex_t bindex)
+{
+	return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
+}
+
+static inline aufs_bindex_t au_ibstart(struct inode *inode)
+{
+	return au_ii(inode)->ii_bstart;
+}
+
+static inline aufs_bindex_t au_ibend(struct inode *inode)
+{
+	return au_ii(inode)->ii_bend;
+}
+
+static inline struct au_vdir *au_ivdir(struct inode *inode)
+{
+	return au_ii(inode)->ii_vdir;
+}
+
+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
+{
+	return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
+}
+
+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
+{
+	au_ii(inode)->ii_bend = bindex;
+}
+
+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
+{
+	au_ii(inode)->ii_vdir = vdir;
+}
+
+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
+{
+	return au_ii(inode)->ii_hinode + bindex;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
+{
+	if (pin)
+		return pin->parent;
+	return NULL;
+}
+
+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
+{
+	if (pin && pin->hdir)
+		return pin->hdir->hi_inode;
+	return NULL;
+}
+
+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
+{
+	if (pin)
+		return pin->hdir;
+	return NULL;
+}
+
+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
+{
+	if (pin)
+		pin->dentry = dentry;
+}
+
+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
+					   unsigned char lflag)
+{
+	if (pin) {
+		/* dirty macros require brackets */
+		if (lflag) {
+			au_fset_pin(pin->flags, DI_LOCKED);
+		} else {
+			au_fclr_pin(pin->flags, DI_LOCKED);
+		}
+	}
+}
+
+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
+{
+	if (pin) {
+		dput(pin->parent);
+		pin->parent = dget(parent);
+	}
+}
+
+/* ---------------------------------------------------------------------- */
+
+#ifdef CONFIG_AUFS_HINOTIFY
+/* hinotify.c */
+int au_hin_alloc(struct au_hinode *hinode, struct inode *inode,
+		 struct inode *h_inode);
+void au_hin_free(struct au_hinode *hinode);
+void au_hin_ctl(struct au_hinode *hinode, int do_set);
+void au_reset_hinotify(struct inode *inode, unsigned int flags);
+
+int __init au_hinotify_init(void);
+void au_hinotify_fin(void);
+
+static inline
+void au_hin_init(struct au_hinode *hinode, struct au_hinotify *val)
+{
+	hinode->hi_notify = val;
+}
+
+static inline void au_iigen_dec(struct inode *inode)
+{
+	atomic_dec(&au_ii(inode)->ii_generation);
+}
+
+#else
+static inline
+int au_hin_alloc(struct au_hinode *hinode __maybe_unused,
+		 struct inode *inode __maybe_unused,
+		 struct inode *h_inode __maybe_unused)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void au_hin_free(struct au_hinode *hinode __maybe_unused)
+{
+	/* nothing */
+}
+
+static inline void au_hin_ctl(struct au_hinode *hinode __maybe_unused,
+			      int do_set __maybe_unused)
+{
+	/* nothing */
+}
+
+static inline void au_reset_hinotify(struct inode *inode __maybe_unused,
+				     unsigned int flags __maybe_unused)
+{
+	/* nothing */
+}
+
+static inline int au_hinotify_init(void)
+{
+	return 0;
+}
+
+#define au_hinotify_fin()	do {} while (0)
+
+static inline
+void au_hin_init(struct au_hinode *hinode __maybe_unused,
+		 struct au_hinotify *val __maybe_unused)
+{
+	/* empty */
+}
+#endif /* CONFIG_AUFS_HINOTIFY */
+
+static inline void au_hin_suspend(struct au_hinode *hdir)
+{
+	au_hin_ctl(hdir, /*do_set*/0);
+}
+
+static inline void au_hin_resume(struct au_hinode *hdir)
+{
+	au_hin_ctl(hdir, /*do_set*/1);
+}
+
+static inline void au_hin_imtx_lock(struct au_hinode *hdir)
+{
+	mutex_lock(&hdir->hi_inode->i_mutex);
+	au_hin_suspend(hdir);
+}
+
+static inline void au_hin_imtx_lock_nested(struct au_hinode *hdir,
+					   unsigned int sc __maybe_unused)
+{
+	mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
+	au_hin_suspend(hdir);
+}
+
+static inline void au_hin_imtx_unlock(struct au_hinode *hdir)
+{
+	au_hin_resume(hdir);
+	mutex_unlock(&hdir->hi_inode->i_mutex);
+}
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_INODE_H__ */
-- 
1.6.1.284.g5dc13


  parent reply	other threads:[~2009-03-16  7:32 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-16  7:20 [RFC Aufs2 #2 00/28] source files J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 01/28] aufs documents J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 02/28] aufs public header file J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 03/28] aufs module global J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 04/28] aufs super_block J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 05/28] aufs branch directory/filesystem J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 06/28] aufs xino J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 07/28] aufs object lifetime management via sysfs J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 08/28] aufs mount options/flags J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 09/28] aufs workqueue J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 10/28] aufs sub-VFS J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 11/28] aufs sub-dcache J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 12/28] aufs copy-up J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 13/28] aufs whiteout J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 14/28] aufs pseudo-link J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 15/28] aufs policies to select one among multiple writable branches J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 16/28] aufs dentry and lookup J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 17/28] aufs file J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 18/28] aufs direcotry J. R. Okajima
2009-03-16  7:20 ` J. R. Okajima [this message]
2009-03-16  7:20 ` [RFC Aufs2 #2 20/28] aufs ioctl J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 21/28] aufs sysfs entries J. R. Okajima
2009-03-16 18:05   ` Greg KH
2009-03-17  3:26     ` hooanon05
2009-03-17  3:37       ` Greg KH
2009-03-17  4:05         ` hooanon05
2009-03-17  4:22           ` Greg KH
2009-03-17  4:50             ` hooanon05
2009-03-17 10:39           ` Evgeniy Polyakov
2009-03-17 12:56             ` hooanon05
2009-03-19  8:04         ` hooanon05
2009-03-16  7:20 ` [RFC Aufs2 #2 22/28] aufs branch for loopback block device J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 23/28] aufs internal inotify J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 24/28] aufs test for fstype J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 25/28] aufs debug J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 26/28] export splice functions J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 27/28] export lookup functions J. R. Okajima
2009-03-16  7:20 ` [RFC Aufs2 #2 28/28] kbuild aufs J. R. Okajima

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1237188040-11404-20-git-send-email-hooanon05@yahoo.co.jp \
    --to=hooanon05@yahoo.co.jp \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.