From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757384AbXLJCw3 (ORCPT ); Sun, 9 Dec 2007 21:52:29 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1753091AbXLJCn6 (ORCPT ); Sun, 9 Dec 2007 21:43:58 -0500 Received: from filer.fsl.cs.sunysb.edu ([130.245.126.2]:49825 "EHLO filer.fsl.cs.sunysb.edu" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754952AbXLJCnh (ORCPT ); Sun, 9 Dec 2007 21:43:37 -0500 From: Erez Zadok To: hch@infradead.org, viro@ftp.linux.org.uk, akpm@linux-foundation.org Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Erez Zadok Subject: [PATCH 25/42] Unionfs: super_block operations Date: Sun, 9 Dec 2007 21:41:58 -0500 Message-Id: <11972545501069-git-send-email-ezk@cs.sunysb.edu> X-Mailer: git-send-email 1.5.2.2 X-MailKey: Erez_Zadok In-Reply-To: <11972545353262-git-send-email-ezk@cs.sunysb.edu> References: <11972545353262-git-send-email-ezk@cs.sunysb.edu> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Includes read_inode, delete_inode, put_super, statfs, remount_fs (which supports branch-management ops), clear_inode, alloc_inode, destroy_inode, write_inode, umount_begin, and show_options. Signed-off-by: Erez Zadok --- fs/unionfs/super.c | 1020 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 1020 insertions(+), 0 deletions(-) create mode 100644 fs/unionfs/super.c diff --git a/fs/unionfs/super.c b/fs/unionfs/super.c new file mode 100644 index 0000000..d9cf2a7 --- /dev/null +++ b/fs/unionfs/super.c @@ -0,0 +1,1020 @@ +/* + * Copyright (c) 2003-2007 Erez Zadok + * Copyright (c) 2003-2006 Charles P. Wright + * Copyright (c) 2005-2007 Josef 'Jeff' Sipek + * Copyright (c) 2005-2006 Junjiro Okajima + * Copyright (c) 2005 Arun M. Krishnakumar + * Copyright (c) 2004-2006 David P. Quigley + * Copyright (c) 2003-2004 Mohammad Nayyer Zubair + * Copyright (c) 2003 Puja Gupta + * Copyright (c) 2003 Harikesavan Krishnan + * Copyright (c) 2003-2007 Stony Brook University + * Copyright (c) 2003-2007 The Research Foundation of SUNY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "union.h" + +/* + * The inode cache is used with alloc_inode for both our inode info and the + * vfs inode. + */ +static struct kmem_cache *unionfs_inode_cachep; + +static void unionfs_read_inode(struct inode *inode) +{ + int size; + struct unionfs_inode_info *info = UNIONFS_I(inode); + + unionfs_read_lock(inode->i_sb); + + memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode)); + info->bstart = -1; + info->bend = -1; + atomic_set(&info->generation, + atomic_read(&UNIONFS_SB(inode->i_sb)->generation)); + spin_lock_init(&info->rdlock); + info->rdcount = 1; + info->hashsize = -1; + INIT_LIST_HEAD(&info->readdircache); + + size = sbmax(inode->i_sb) * sizeof(struct inode *); + info->lower_inodes = kzalloc(size, GFP_KERNEL); + if (unlikely(!info->lower_inodes)) { + printk(KERN_CRIT "unionfs: no kernel memory when allocating " + "lower-pointer array!\n"); + BUG(); + } + + inode->i_version++; + inode->i_op = &unionfs_main_iops; + inode->i_fop = &unionfs_main_fops; + + inode->i_mapping->a_ops = &unionfs_aops; + + unionfs_read_unlock(inode->i_sb); +} + +/* + * we now define delete_inode, because there are two VFS paths that may + * destroy an inode: one of them calls clear inode before doing everything + * else that's needed, and the other is fine. This way we truncate the inode + * size (and its pages) and then clear our own inode, which will do an iput + * on our and the lower inode. + * + * No need to lock sb info's rwsem. + */ +static void unionfs_delete_inode(struct inode *inode) +{ + i_size_write(inode, 0); /* every f/s seems to do that */ + + if (inode->i_data.nrpages) + truncate_inode_pages(&inode->i_data, 0); + + clear_inode(inode); +} + +/* + * final actions when unmounting a file system + * + * No need to lock rwsem. + */ +static void unionfs_put_super(struct super_block *sb) +{ + int bindex, bstart, bend; + struct unionfs_sb_info *spd; + int leaks = 0; + + spd = UNIONFS_SB(sb); + if (!spd) + return; + + bstart = sbstart(sb); + bend = sbend(sb); + + /* Make sure we have no leaks of branchget/branchput. */ + for (bindex = bstart; bindex <= bend; bindex++) + if (unlikely(branch_count(sb, bindex) != 0)) { + printk(KERN_CRIT + "unionfs: branch %d has %d references left!\n", + bindex, branch_count(sb, bindex)); + leaks = 1; + } + BUG_ON(leaks != 0); + + kfree(spd->data); + kfree(spd); + sb->s_fs_info = NULL; +} + +/* + * Since people use this to answer the "How big of a file can I write?" + * question, we report the size of the highest priority branch as the size of + * the union. + */ +static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int err = 0; + struct super_block *sb; + struct dentry *lower_dentry; + + sb = dentry->d_sb; + + unionfs_read_lock(sb); + unionfs_lock_dentry(dentry); + + if (unlikely(!__unionfs_d_revalidate_chain(dentry, NULL, false))) { + err = -ESTALE; + goto out; + } + unionfs_check_dentry(dentry); + + lower_dentry = unionfs_lower_dentry(sb->s_root); + err = vfs_statfs(lower_dentry, buf); + + /* set return buf to our f/s to avoid confusing user-level utils */ + buf->f_type = UNIONFS_SUPER_MAGIC; + /* + * Our maximum file name can is shorter by a few bytes because every + * file name could potentially be whited-out. + * + * XXX: this restriction goes away with ODF. + */ + buf->f_namelen -= UNIONFS_WHLEN; + + /* + * reset two fields to avoid confusing user-land. + * XXX: is this still necessary? + */ + memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t)); + memset(&buf->f_spare, 0, sizeof(buf->f_spare)); + +out: + unionfs_check_dentry(dentry); + unionfs_unlock_dentry(dentry); + unionfs_read_unlock(sb); + return err; +} + +/* handle mode changing during remount */ +static noinline int do_remount_mode_option(char *optarg, int cur_branches, + struct unionfs_data *new_data, + struct path *new_lower_paths) +{ + int err = -EINVAL; + int perms, idx; + char *modename = strchr(optarg, '='); + struct nameidata nd; + + /* by now, optarg contains the branch name */ + if (!*optarg) { + printk(KERN_ERR + "unionfs: no branch specified for mode change\n"); + goto out; + } + if (!modename) { + printk(KERN_ERR "unionfs: branch \"%s\" requires a mode\n", + optarg); + goto out; + } + *modename++ = '\0'; + err = parse_branch_mode(modename, &perms); + if (err) { + printk(KERN_ERR "unionfs: invalid mode \"%s\" for \"%s\"\n", + modename, optarg); + goto out; + } + + /* + * Find matching branch index. For now, this assumes that nothing + * has been mounted on top of this Unionfs stack. Once we have /odf + * and cache-coherency resolved, we'll address the branch-path + * uniqueness. + */ + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); + if (err) { + printk(KERN_ERR "unionfs: error accessing " + "lower directory \"%s\" (error %d)\n", + optarg, err); + goto out; + } + for (idx = 0; idx < cur_branches; idx++) + if (nd.mnt == new_lower_paths[idx].mnt && + nd.dentry == new_lower_paths[idx].dentry) + break; + path_release(&nd); /* no longer needed */ + if (idx == cur_branches) { + err = -ENOENT; /* err may have been reset above */ + printk(KERN_ERR "unionfs: branch \"%s\" " + "not found\n", optarg); + goto out; + } + /* check/change mode for existing branch */ + /* we don't warn if perms==branchperms */ + new_data[idx].branchperms = perms; + err = 0; +out: + return err; +} + +/* handle branch deletion during remount */ +static noinline int do_remount_del_option(char *optarg, int cur_branches, + struct unionfs_data *new_data, + struct path *new_lower_paths) +{ + int err = -EINVAL; + int idx; + struct nameidata nd; + + /* optarg contains the branch name to delete */ + + /* + * Find matching branch index. For now, this assumes that nothing + * has been mounted on top of this Unionfs stack. Once we have /odf + * and cache-coherency resolved, we'll address the branch-path + * uniqueness. + */ + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); + if (err) { + printk(KERN_ERR "unionfs: error accessing " + "lower directory \"%s\" (error %d)\n", + optarg, err); + goto out; + } + for (idx = 0; idx < cur_branches; idx++) + if (nd.mnt == new_lower_paths[idx].mnt && + nd.dentry == new_lower_paths[idx].dentry) + break; + path_release(&nd); /* no longer needed */ + if (idx == cur_branches) { + printk(KERN_ERR "unionfs: branch \"%s\" " + "not found\n", optarg); + err = -ENOENT; + goto out; + } + /* check if there are any open files on the branch to be deleted */ + if (atomic_read(&new_data[idx].open_files) > 0) { + err = -EBUSY; + goto out; + } + + /* + * Now we have to delete the branch. First, release any handles it + * has. Then, move the remaining array indexes past "idx" in + * new_data and new_lower_paths one to the left. Finally, adjust + * cur_branches. + */ + pathput(&new_lower_paths[idx]); + + if (idx < cur_branches - 1) { + /* if idx==cur_branches-1, we delete last branch: easy */ + memmove(&new_data[idx], &new_data[idx+1], + (cur_branches - 1 - idx) * + sizeof(struct unionfs_data)); + memmove(&new_lower_paths[idx], &new_lower_paths[idx+1], + (cur_branches - 1 - idx) * sizeof(struct path)); + } + + err = 0; +out: + return err; +} + +/* handle branch insertion during remount */ +static noinline int do_remount_add_option(char *optarg, int cur_branches, + struct unionfs_data *new_data, + struct path *new_lower_paths, + int *high_branch_id) +{ + int err = -EINVAL; + int perms; + int idx = 0; /* default: insert at beginning */ + char *new_branch , *modename = NULL; + struct nameidata nd; + + /* + * optarg can be of several forms: + * + * /bar:/foo insert /foo before /bar + * /bar:/foo=ro insert /foo in ro mode before /bar + * /foo insert /foo in the beginning (prepend) + * :/foo insert /foo at the end (append) + */ + if (*optarg == ':') { /* append? */ + new_branch = optarg + 1; /* skip ':' */ + idx = cur_branches; + goto found_insertion_point; + } + new_branch = strchr(optarg, ':'); + if (!new_branch) { /* prepend? */ + new_branch = optarg; + goto found_insertion_point; + } + *new_branch++ = '\0'; /* holds path+mode of new branch */ + + /* + * Find matching branch index. For now, this assumes that nothing + * has been mounted on top of this Unionfs stack. Once we have /odf + * and cache-coherency resolved, we'll address the branch-path + * uniqueness. + */ + err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); + if (err) { + printk(KERN_ERR "unionfs: error accessing " + "lower directory \"%s\" (error %d)\n", + optarg, err); + goto out; + } + for (idx = 0; idx < cur_branches; idx++) + if (nd.mnt == new_lower_paths[idx].mnt && + nd.dentry == new_lower_paths[idx].dentry) + break; + path_release(&nd); /* no longer needed */ + if (idx == cur_branches) { + printk(KERN_ERR "unionfs: branch \"%s\" " + "not found\n", optarg); + err = -ENOENT; + goto out; + } + + /* + * At this point idx will hold the index where the new branch should + * be inserted before. + */ +found_insertion_point: + /* find the mode for the new branch */ + if (new_branch) + modename = strchr(new_branch, '='); + if (modename) + *modename++ = '\0'; + if (!new_branch || !*new_branch) { + printk(KERN_ERR "unionfs: null new branch\n"); + err = -EINVAL; + goto out; + } + err = parse_branch_mode(modename, &perms); + if (err) { + printk(KERN_ERR "unionfs: invalid mode \"%s\" for " + "branch \"%s\"\n", modename, new_branch); + goto out; + } + err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd); + if (err) { + printk(KERN_ERR "unionfs: error accessing " + "lower directory \"%s\" (error %d)\n", + new_branch, err); + goto out; + } + /* + * It's probably safe to check_mode the new branch to insert. Note: + * we don't allow inserting branches which are unionfs's by + * themselves (check_branch returns EINVAL in that case). This is + * because this code base doesn't support stacking unionfs: the ODF + * code base supports that correctly. + */ + err = check_branch(&nd); + if (err) { + printk(KERN_ERR "unionfs: lower directory " + "\"%s\" is not a valid branch\n", optarg); + path_release(&nd); + goto out; + } + + /* + * Now we have to insert the new branch. But first, move the bits + * to make space for the new branch, if needed. Finally, adjust + * cur_branches. + * We don't release nd here; it's kept until umount/remount. + */ + if (idx < cur_branches) { + /* if idx==cur_branches, we append: easy */ + memmove(&new_data[idx+1], &new_data[idx], + (cur_branches - idx) * sizeof(struct unionfs_data)); + memmove(&new_lower_paths[idx+1], &new_lower_paths[idx], + (cur_branches - idx) * sizeof(struct path)); + } + new_lower_paths[idx].dentry = nd.dentry; + new_lower_paths[idx].mnt = nd.mnt; + + new_data[idx].sb = nd.dentry->d_sb; + atomic_set(&new_data[idx].open_files, 0); + new_data[idx].branchperms = perms; + new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */ + + err = 0; +out: + return err; +} + + +/* + * Support branch management options on remount. + * + * See Documentation/filesystems/unionfs/ for details. + * + * @flags: numeric mount options + * @options: mount options string + * + * This function can rearrange a mounted union dynamically, adding and + * removing branches, including changing branch modes. Clearly this has to + * be done safely and atomically. Luckily, the VFS already calls this + * function with lock_super(sb) and lock_kernel() held, preventing + * concurrent mixing of new mounts, remounts, and unmounts. Moreover, + * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb) + * to purge dentries/inodes from our superblock, and also called + * fsync_super(sb) to purge any dirty pages. So we're good. + * + * XXX: however, our remount code may also need to invalidate mapped pages + * so as to force them to be re-gotten from the (newly reconfigured) lower + * branches. This has to wait for proper mmap and cache coherency support + * in the VFS. + * + */ +static int unionfs_remount_fs(struct super_block *sb, int *flags, + char *options) +{ + int err = 0; + int i; + char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */ + char *optname; + int cur_branches = 0; /* no. of current branches */ + int new_branches = 0; /* no. of branches actually left in the end */ + int add_branches; /* est. no. of branches to add */ + int del_branches; /* est. no. of branches to del */ + int max_branches; /* max possible no. of branches */ + struct unionfs_data *new_data = NULL, *tmp_data = NULL; + struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL; + struct inode **new_lower_inodes = NULL; + int new_high_branch_id; /* new high branch ID */ + int size; /* memory allocation size, temp var */ + int old_ibstart, old_ibend; + + unionfs_write_lock(sb); + + /* + * The VFS will take care of "ro" and "rw" flags, and we can safely + * ignore MS_SILENT, but anything else left over is an error. So we + * need to check if any other flags may have been passed (none are + * allowed/supported as of now). + */ + if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) { + printk(KERN_ERR + "unionfs: remount flags 0x%x unsupported\n", *flags); + err = -EINVAL; + goto out_error; + } + + /* + * If 'options' is NULL, it's probably because the user just changed + * the union to a "ro" or "rw" and the VFS took care of it. So + * nothing to do and we're done. + */ + if (!options || options[0] == '\0') + goto out_error; + + /* + * Find out how many branches we will have in the end, counting + * "add" and "del" commands. Copy the "options" string because + * strsep modifies the string and we need it later. + */ + tmp_to_free = kstrdup(options, GFP_KERNEL); + optionstmp = tmp_to_free; + if (unlikely(!optionstmp)) { + err = -ENOMEM; + goto out_free; + } + cur_branches = sbmax(sb); /* current no. branches */ + new_branches = sbmax(sb); + del_branches = 0; + add_branches = 0; + new_high_branch_id = sbhbid(sb); /* save current high_branch_id */ + while ((optname = strsep(&optionstmp, ",")) != NULL) { + char *optarg; + + if (!optname || !*optname) + continue; + + optarg = strchr(optname, '='); + if (optarg) + *optarg++ = '\0'; + + if (!strcmp("add", optname)) + add_branches++; + else if (!strcmp("del", optname)) + del_branches++; + } + kfree(tmp_to_free); + /* after all changes, will we have at least one branch left? */ + if ((new_branches + add_branches - del_branches) < 1) { + printk(KERN_ERR + "unionfs: no branches left after remount\n"); + err = -EINVAL; + goto out_free; + } + + /* + * Since we haven't actually parsed all the add/del options, nor + * have we checked them for errors, we don't know for sure how many + * branches we will have after all changes have taken place. In + * fact, the total number of branches left could be less than what + * we have now. So we need to allocate space for a temporary + * placeholder that is at least as large as the maximum number of + * branches we *could* have, which is the current number plus all + * the additions. Once we're done with these temp placeholders, we + * may have to re-allocate the final size, copy over from the temp, + * and then free the temps (done near the end of this function). + */ + max_branches = cur_branches + add_branches; + /* allocate space for new pointers to lower dentry */ + tmp_data = kcalloc(max_branches, + sizeof(struct unionfs_data), GFP_KERNEL); + if (unlikely(!tmp_data)) { + err = -ENOMEM; + goto out_free; + } + /* allocate space for new pointers to lower paths */ + tmp_lower_paths = kcalloc(max_branches, + sizeof(struct path), GFP_KERNEL); + if (unlikely(!tmp_lower_paths)) { + err = -ENOMEM; + goto out_free; + } + /* copy current info into new placeholders, incrementing refcnts */ + memcpy(tmp_data, UNIONFS_SB(sb)->data, + cur_branches * sizeof(struct unionfs_data)); + memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths, + cur_branches * sizeof(struct path)); + for (i = 0; i < cur_branches; i++) + pathget(&tmp_lower_paths[i]); /* drop refs at end of fxn */ + + /******************************************************************* + * For each branch command, do path_lookup on the requested branch, + * and apply the change to a temp branch list. To handle errors, we + * already dup'ed the old arrays (above), and increased the refcnts + * on various f/s objects. So now we can do all the path_lookups + * and branch-management commands on the new arrays. If it fail mid + * way, we free the tmp arrays and *put all objects. If we succeed, + * then we free old arrays and *put its objects, and then replace + * the arrays with the new tmp list (we may have to re-allocate the + * memory because the temp lists could have been larger than what we + * actually needed). + *******************************************************************/ + + while ((optname = strsep(&options, ",")) != NULL) { + char *optarg; + + if (!optname || !*optname) + continue; + /* + * At this stage optname holds a comma-delimited option, but + * without the commas. Next, we need to break the string on + * the '=' symbol to separate CMD=ARG, where ARG itself can + * be KEY=VAL. For example, in mode=/foo=rw, CMD is "mode", + * KEY is "/foo", and VAL is "rw". + */ + optarg = strchr(optname, '='); + if (optarg) + *optarg++ = '\0'; + /* incgen remount option (instead of old ioctl) */ + if (!strcmp("incgen", optname)) { + err = 0; + goto out_no_change; + } + + /* + * All of our options take an argument now. (Insert ones + * that don't above this check.) So at this stage optname + * contains the CMD part and optarg contains the ARG part. + */ + if (!optarg || !*optarg) { + printk(KERN_ERR "unionfs: all remount options require " + "an argument (%s)\n", optname); + err = -EINVAL; + goto out_release; + } + + if (!strcmp("add", optname)) { + err = do_remount_add_option(optarg, new_branches, + tmp_data, + tmp_lower_paths, + &new_high_branch_id); + if (err) + goto out_release; + new_branches++; + if (new_branches > UNIONFS_MAX_BRANCHES) { + printk(KERN_ERR "unionfs: command exceeds " + "%d branches\n", UNIONFS_MAX_BRANCHES); + err = -E2BIG; + goto out_release; + } + continue; + } + if (!strcmp("del", optname)) { + err = do_remount_del_option(optarg, new_branches, + tmp_data, + tmp_lower_paths); + if (err) + goto out_release; + new_branches--; + continue; + } + if (!strcmp("mode", optname)) { + err = do_remount_mode_option(optarg, new_branches, + tmp_data, + tmp_lower_paths); + if (err) + goto out_release; + continue; + } + + /* + * When you use "mount -o remount,ro", mount(8) will + * reportedly pass the original dirs= string from + * /proc/mounts. So for now, we have to ignore dirs= and + * not consider it an error, unless we want to allow users + * to pass dirs= in remount. Note that to allow the VFS to + * actually process the ro/rw remount options, we have to + * return 0 from this function. + */ + if (!strcmp("dirs", optname)) { + printk(KERN_WARNING + "unionfs: remount ignoring option \"%s\"\n", + optname); + continue; + } + + err = -EINVAL; + printk(KERN_ERR + "unionfs: unrecognized option \"%s\"\n", optname); + goto out_release; + } + +out_no_change: + + /****************************************************************** + * WE'RE ALMOST DONE: check if leftmost branch might be read-only, + * see if we need to allocate a small-sized new vector, copy the + * vectors to their correct place, release the refcnt of the older + * ones, and return. Also handle invalidating any pages that will + * have to be re-read. + *******************************************************************/ + + if (!(tmp_data[0].branchperms & MAY_WRITE)) { + printk(KERN_ERR "unionfs: leftmost branch cannot be read-only " + "(use \"remount,ro\" to create a read-only union)\n"); + err = -EINVAL; + goto out_release; + } + + /* (re)allocate space for new pointers to lower dentry */ + size = new_branches * sizeof(struct unionfs_data); + new_data = krealloc(tmp_data, size, GFP_KERNEL); + if (unlikely(!new_data)) { + err = -ENOMEM; + goto out_release; + } + + /* allocate space for new pointers to lower paths */ + size = new_branches * sizeof(struct path); + new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL); + if (unlikely(!new_lower_paths)) { + err = -ENOMEM; + goto out_release; + } + + /* allocate space for new pointers to lower inodes */ + new_lower_inodes = kcalloc(new_branches, + sizeof(struct inode *), GFP_KERNEL); + if (unlikely(!new_lower_inodes)) { + err = -ENOMEM; + goto out_release; + } + + /* + * OK, just before we actually put the new set of branches in place, + * we need to ensure that our own f/s has no dirty objects left. + * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and + * fsync_super(sb), taking care of dentries, inodes, and dirty + * pages. So all that's left is for us to invalidate any leftover + * (non-dirty) pages to ensure that they will be re-read from the + * new lower branches (and to support mmap). + */ + + /* + * Now we call drop_pagecache_sb() to invalidate all pages in this + * super. This function calls invalidate_inode_pages(mapping), + * which calls invalidate_mapping_pages(): the latter, however, will + * not invalidate pages which are dirty, locked, under writeback, or + * mapped into page tables. We shouldn't have to worry about dirty + * or under-writeback pages, because do_remount_sb() called + * fsync_super() which would not have returned until all dirty pages + * were flushed. + * + * But do we have to worry about locked pages? Is there any chance + * that in here we'll get locked pages? + * + * XXX: what about pages mapped into pagetables? Are these pages + * which user processes may have mmap(2)'ed? If so, then we need to + * invalidate those too, no? Maybe we'll have to write our own + * version of invalidate_mapping_pages() which also handled mapped + * pages. + * + * XXX: Alternatively, maybe we should call truncate_inode_pages(), + * which use two passes over the pages list, and will truncate all + * pages. + */ + drop_pagecache_sb(sb); + + /* copy new vectors into their correct place */ + tmp_data = UNIONFS_SB(sb)->data; + UNIONFS_SB(sb)->data = new_data; + new_data = NULL; /* so don't free good pointers below */ + tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths; + UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths; + new_lower_paths = NULL; /* so don't free good pointers below */ + + /* update our unionfs_sb_info and root dentry index of last branch */ + i = sbmax(sb); /* save no. of branches to release at end */ + sbend(sb) = new_branches - 1; + set_dbend(sb->s_root, new_branches - 1); + old_ibstart = ibstart(sb->s_root->d_inode); + old_ibend = ibend(sb->s_root->d_inode); + ibend(sb->s_root->d_inode) = new_branches - 1; + UNIONFS_D(sb->s_root)->bcount = new_branches; + new_branches = i; /* no. of branches to release below */ + + /* + * Update lower inodes: 3 steps + * 1. grab ref on all new lower inodes + */ + for (i = dbstart(sb->s_root); i <= dbend(sb->s_root); i++) { + struct dentry *lower_dentry = + unionfs_lower_dentry_idx(sb->s_root, i); + igrab(lower_dentry->d_inode); + new_lower_inodes[i] = lower_dentry->d_inode; + } + /* 2. release reference on all older lower inodes */ + for (i = old_ibstart; i <= old_ibend; i++) { + iput(unionfs_lower_inode_idx(sb->s_root->d_inode, i)); + unionfs_set_lower_inode_idx(sb->s_root->d_inode, i, NULL); + } + kfree(UNIONFS_I(sb->s_root->d_inode)->lower_inodes); + /* 3. update root dentry's inode to new lower_inodes array */ + UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes; + new_lower_inodes = NULL; + + /* maxbytes may have changed */ + sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes; + /* update high branch ID */ + sbhbid(sb) = new_high_branch_id; + + /* update our sb->generation for revalidating objects */ + i = atomic_inc_return(&UNIONFS_SB(sb)->generation); + atomic_set(&UNIONFS_D(sb->s_root)->generation, i); + atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i); + if (!(*flags & MS_SILENT)) + pr_info("unionfs: new generation number %d\n", i); + /* finally, update the root dentry's times */ + unionfs_copy_attr_times(sb->s_root->d_inode); + err = 0; /* reset to success */ + + /* + * The code above falls through to the next label, and releases the + * refcnts of the older ones (stored in tmp_*): if we fell through + * here, it means success. However, if we jump directly to this + * label from any error above, then an error occurred after we + * grabbed various refcnts, and so we have to release the + * temporarily constructed structures. + */ +out_release: + /* no need to cleanup/release anything in tmp_data */ + if (tmp_lower_paths) + for (i = 0; i < new_branches; i++) + pathput(&tmp_lower_paths[i]); +out_free: + kfree(tmp_lower_paths); + kfree(tmp_data); + kfree(new_lower_paths); + kfree(new_data); + kfree(new_lower_inodes); +out_error: + unionfs_check_dentry(sb->s_root); + unionfs_write_unlock(sb); + return err; +} + +/* + * Called by iput() when the inode reference count reached zero + * and the inode is not hashed anywhere. Used to clear anything + * that needs to be, before the inode is completely destroyed and put + * on the inode free list. + * + * No need to lock sb info's rwsem. + */ +static void unionfs_clear_inode(struct inode *inode) +{ + int bindex, bstart, bend; + struct inode *lower_inode; + struct list_head *pos, *n; + struct unionfs_dir_state *rdstate; + + list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) { + rdstate = list_entry(pos, struct unionfs_dir_state, cache); + list_del(&rdstate->cache); + free_rdstate(rdstate); + } + + /* + * Decrement a reference to a lower_inode, which was incremented + * by our read_inode when it was created initially. + */ + bstart = ibstart(inode); + bend = ibend(inode); + if (bstart >= 0) { + for (bindex = bstart; bindex <= bend; bindex++) { + lower_inode = unionfs_lower_inode_idx(inode, bindex); + if (!lower_inode) + continue; + iput(lower_inode); + } + } + + kfree(UNIONFS_I(inode)->lower_inodes); + UNIONFS_I(inode)->lower_inodes = NULL; +} + +static struct inode *unionfs_alloc_inode(struct super_block *sb) +{ + struct unionfs_inode_info *i; + + i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL); + if (unlikely(!i)) + return NULL; + + /* memset everything up to the inode to 0 */ + memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode)); + + i->vfs_inode.i_version = 1; + return &i->vfs_inode; +} + +static void unionfs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode)); +} + +/* unionfs inode cache constructor */ +static void init_once(struct kmem_cache *cachep, void *obj) +{ + struct unionfs_inode_info *i = obj; + + inode_init_once(&i->vfs_inode); +} + +int unionfs_init_inode_cache(void) +{ + int err = 0; + + unionfs_inode_cachep = + kmem_cache_create("unionfs_inode_cache", + sizeof(struct unionfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT, init_once); + if (unlikely(!unionfs_inode_cachep)) + err = -ENOMEM; + return err; +} + +/* unionfs inode cache destructor */ +void unionfs_destroy_inode_cache(void) +{ + if (unionfs_inode_cachep) + kmem_cache_destroy(unionfs_inode_cachep); +} + +/* + * Called when we have a dirty inode, right here we only throw out + * parts of our readdir list that are too old. + * + * No need to grab sb info's rwsem. + */ +static int unionfs_write_inode(struct inode *inode, int sync) +{ + struct list_head *pos, *n; + struct unionfs_dir_state *rdstate; + + spin_lock(&UNIONFS_I(inode)->rdlock); + list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) { + rdstate = list_entry(pos, struct unionfs_dir_state, cache); + /* We keep this list in LRU order. */ + if ((rdstate->access + RDCACHE_JIFFIES) > jiffies) + break; + UNIONFS_I(inode)->rdcount--; + list_del(&rdstate->cache); + free_rdstate(rdstate); + } + spin_unlock(&UNIONFS_I(inode)->rdlock); + + return 0; +} + +/* + * Used only in nfs, to kill any pending RPC tasks, so that subsequent + * code can actually succeed and won't leave tasks that need handling. + */ +static void unionfs_umount_begin(struct vfsmount *mnt, int flags) +{ + struct super_block *sb, *lower_sb; + struct vfsmount *lower_mnt; + int bindex, bstart, bend; + + if (!(flags & MNT_FORCE)) + /* + * we are not being MNT_FORCE'd, therefore we should emulate + * old behavior + */ + return; + + sb = mnt->mnt_sb; + + unionfs_read_lock(sb); + + bstart = sbstart(sb); + bend = sbend(sb); + for (bindex = bstart; bindex <= bend; bindex++) { + lower_mnt = unionfs_lower_mnt_idx(sb->s_root, bindex); + lower_sb = unionfs_lower_super_idx(sb, bindex); + + if (lower_mnt && lower_sb && lower_sb->s_op && + lower_sb->s_op->umount_begin) + lower_sb->s_op->umount_begin(lower_mnt, flags); + } + + unionfs_read_unlock(sb); +} + +static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct super_block *sb = mnt->mnt_sb; + int ret = 0; + char *tmp_page; + char *path; + int bindex, bstart, bend; + int perms; + + unionfs_read_lock(sb); + + unionfs_lock_dentry(sb->s_root); + + tmp_page = (char *) __get_free_page(GFP_KERNEL); + if (unlikely(!tmp_page)) { + ret = -ENOMEM; + goto out; + } + + bstart = sbstart(sb); + bend = sbend(sb); + + seq_printf(m, ",dirs="); + for (bindex = bstart; bindex <= bend; bindex++) { + path = d_path(unionfs_lower_dentry_idx(sb->s_root, bindex), + unionfs_lower_mnt_idx(sb->s_root, bindex), + tmp_page, PAGE_SIZE); + if (IS_ERR(path)) { + ret = PTR_ERR(path); + goto out; + } + + perms = branchperms(sb, bindex); + + seq_printf(m, "%s=%s", path, + perms & MAY_WRITE ? "rw" : "ro"); + if (bindex != bend) + seq_printf(m, ":"); + } + +out: + free_page((unsigned long) tmp_page); + + unionfs_unlock_dentry(sb->s_root); + + unionfs_read_unlock(sb); + + return ret; +} + +struct super_operations unionfs_sops = { + .read_inode = unionfs_read_inode, + .delete_inode = unionfs_delete_inode, + .put_super = unionfs_put_super, + .statfs = unionfs_statfs, + .remount_fs = unionfs_remount_fs, + .clear_inode = unionfs_clear_inode, + .umount_begin = unionfs_umount_begin, + .show_options = unionfs_show_options, + .write_inode = unionfs_write_inode, + .alloc_inode = unionfs_alloc_inode, + .destroy_inode = unionfs_destroy_inode, +}; -- 1.5.2.2