All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4/4] Orangefs: wiring and misc
@ 2014-12-31 20:53 hubcap
  2015-01-01 17:45 ` Randy Dunlap
  2015-01-05 19:23 ` [PATCH] Orangefs: Don't compile orangefs by default Mike Marshall
  0 siblings, 2 replies; 3+ messages in thread
From: hubcap @ 2014-12-31 20:53 UTC (permalink / raw)
  To: viro; +Cc: Mike Marshall, linux-fsdevel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 83705 bytes --]

From: Mike Marshall <hubcap@omnibond.com>

The C files logically belong in some of the previous patches in
this sequence. Having them here, instead, keeps all four patches
under the size limit for linux-fsdevel.

Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/Kconfig                 |   1 +
 fs/Makefile                |   1 +
 fs/orangefs/Kconfig        |   7 +
 fs/orangefs/Makefile       |   9 +
 fs/orangefs/devpvfs2-req.c | 903 +++++++++++++++++++++++++++++++++++++++++
 fs/orangefs/file.c         | 990 +++++++++++++++++++++++++++++++++++++++++++++
 fs/orangefs/pvfs2-utils.c  | 914 +++++++++++++++++++++++++++++++++++++++++
 7 files changed, 2825 insertions(+)
 create mode 100644 fs/orangefs/Kconfig
 create mode 100644 fs/orangefs/Makefile
 create mode 100644 fs/orangefs/devpvfs2-req.c
 create mode 100644 fs/orangefs/file.c
 create mode 100644 fs/orangefs/pvfs2-utils.c

diff --git a/fs/Kconfig b/fs/Kconfig
index 664991a..3010139 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -183,6 +183,7 @@ menuconfig MISC_FILESYSTEMS

 if MISC_FILESYSTEMS

+source "fs/orangefs/Kconfig"
 source "fs/adfs/Kconfig"
 source "fs/affs/Kconfig"
 source "fs/ecryptfs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index bedff48..3dee8ba 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -105,6 +105,7 @@ obj-$(CONFIG_AUTOFS4_FS)	+= autofs4/
 obj-$(CONFIG_ADFS_FS)		+= adfs/
 obj-$(CONFIG_FUSE_FS)		+= fuse/
 obj-$(CONFIG_OVERLAY_FS)	+= overlayfs/
+obj-$(CONFIG_ORANGEFS_FS)	+= orangefs/
 obj-$(CONFIG_UDF_FS)		+= udf/
 obj-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs/
 obj-$(CONFIG_OMFS_FS)		+= omfs/
diff --git a/fs/orangefs/Kconfig b/fs/orangefs/Kconfig
new file mode 100644
index 0000000..b600e08
--- /dev/null
+++ b/fs/orangefs/Kconfig
@@ -0,0 +1,7 @@
+config ORANGEFS_FS
+	tristate "ORANGEFS (Powered by PVFS) support"
+	select FS_POSIX_ACL
+	default y
+	help
+	   Orange is a parallel file system designed for use on high end
+	   computing (HEC) systems.
diff --git a/fs/orangefs/Makefile b/fs/orangefs/Makefile
new file mode 100644
index 0000000..f4136f2
--- /dev/null
+++ b/fs/orangefs/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the ORANGEFS filesystem.
+#
+
+obj-$(CONFIG_ORANGEFS_FS) += orangefs.o
+
+orangefs-objs := acl.o file.o pvfs2-cache.o pvfs2-utils.o xattr.o dcache.o \
+		 inode.o pvfs2-mod.o super.o devpvfs2-req.o namei.o symlink.o\
+		 dir.o pvfs2-bufmap.o pvfs2-proc.o waitqueue.o
diff --git a/fs/orangefs/devpvfs2-req.c b/fs/orangefs/devpvfs2-req.c
new file mode 100644
index 0000000..0d17f33
--- /dev/null
+++ b/fs/orangefs/devpvfs2-req.c
@@ -0,0 +1,903 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * Changes by Acxiom Corporation to add protocol version to kernel
+ * communication, Copyright © Acxiom Corporation, 2005.
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-dev-proto.h"
+#include "pvfs2-bufmap.h"
+
+/* this file implements the /dev/pvfs2-req device node */
+
+static int open_access_count;
+
+#define DUMP_DEVICE_ERROR()                                                   \
+do {                                                                          \
+	gossip_err("*****************************************************\n");\
+	gossip_err("PVFS2 Device Error:  You cannot open the device file ");  \
+	gossip_err("\n/dev/%s more than once.  Please make sure that\nthere " \
+		   "are no ", PVFS2_REQDEVICE_NAME);                          \
+	gossip_err("instances of a program using this device\ncurrently "     \
+		   "running. (You must verify this!)\n");                     \
+	gossip_err("For example, you can use the lsof program as follows:\n");\
+	gossip_err("'lsof | grep %s' (run this as root)\n",                   \
+		   PVFS2_REQDEVICE_NAME);                                     \
+	gossip_err("  open_access_count = %d\n", open_access_count);          \
+	gossip_err("*****************************************************\n");\
+} while (0)
+
+static int hash_func(uint64_t tag, int table_size)
+{
+	return tag % ((unsigned int)table_size);
+}
+
+static void pvfs2_devreq_add_op(struct pvfs2_kernel_op *op)
+{
+	int index = hash_func(op->tag, hash_table_size);
+
+	spin_lock(&htable_ops_in_progress_lock);
+	list_add_tail(&op->list, &htable_ops_in_progress[index]);
+	spin_unlock(&htable_ops_in_progress_lock);
+}
+
+static struct pvfs2_kernel_op *pvfs2_devreq_remove_op(uint64_t tag)
+{
+	struct pvfs2_kernel_op *op, *next;
+	int index;
+
+	index = hash_func(tag, hash_table_size);
+
+	spin_lock(&htable_ops_in_progress_lock);
+	list_for_each_entry_safe(op,
+				 next,
+				 &htable_ops_in_progress[index],
+				 list) {
+		if (op->tag == tag) {
+			list_del(&op->list);
+			spin_unlock(&htable_ops_in_progress_lock);
+			return op;
+		}
+	}
+
+	spin_unlock(&htable_ops_in_progress_lock);
+	return NULL;
+}
+
+static int pvfs2_devreq_open(struct inode *inode, struct file *file)
+{
+	int ret = -EINVAL;
+
+	if (!(file->f_flags & O_NONBLOCK)) {
+		gossip_err("pvfs2: device cannot be opened in blocking mode\n");
+		return ret;
+	}
+	ret = -EACCES;
+	gossip_debug(GOSSIP_DEV_DEBUG, "pvfs2-client-core: opening device\n");
+	mutex_lock(&devreq_mutex);
+
+	if (open_access_count == 0) {
+		ret = generic_file_open(inode, file);
+		if (ret == 0)
+			open_access_count++;
+	} else {
+		DUMP_DEVICE_ERROR();
+	}
+	mutex_unlock(&devreq_mutex);
+
+	gossip_debug(GOSSIP_DEV_DEBUG,
+		     "pvfs2-client-core: open device complete (ret = %d)\n",
+		     ret);
+	return ret;
+}
+
+static ssize_t pvfs2_devreq_read(struct file *file,
+				 char __user *buf,
+				 size_t count, loff_t *offset)
+{
+	int ret = 0;
+	ssize_t len = 0;
+	struct pvfs2_kernel_op *cur_op = NULL;
+	static int32_t magic = PVFS2_DEVREQ_MAGIC;
+	int32_t proto_ver = PVFS_KERNEL_PROTO_VERSION;
+
+	if (!(file->f_flags & O_NONBLOCK)) {
+		/* We do not support blocking reads/opens any more */
+		gossip_err("pvfs2: blocking reads are not supported! (pvfs2-client-core bug)\n");
+		return -EINVAL;
+	} else {
+		struct pvfs2_kernel_op *op = NULL, *temp = NULL;
+		/* get next op (if any) from top of list */
+		spin_lock(&pvfs2_request_list_lock);
+		list_for_each_entry_safe(op, temp, &pvfs2_request_list, list) {
+			int32_t fsid = fsid_of_op(op);
+			/*
+			 * Check if this op's fsid is known and needs
+			 * remounting
+			 */
+			if (fsid != PVFS_FS_ID_NULL &&
+			    fs_mount_pending(fsid) == 1) {
+				gossip_debug(GOSSIP_DEV_DEBUG,
+					     "Skipping op tag %llu %s\n",
+					     llu(op->tag),
+					     get_opname_string(op));
+				continue;
+			} else {
+				/*
+				 * op does not belong to any particular fsid
+				 * or already mounted.. let it through
+				 */
+				cur_op = op;
+				spin_lock(&cur_op->lock);
+				list_del(&cur_op->list);
+				cur_op->op_linger_tmp--;
+				/*
+				 * if there is a trailer, re-add it to
+				 * the request list.
+				 */
+				if (cur_op->op_linger == 2 &&
+				    cur_op->op_linger_tmp == 1) {
+					if (cur_op->upcall.trailer_size <= 0 ||
+					    cur_op->upcall.trailer_buf == NULL)
+						gossip_err("BUG:trailer_size is %ld and trailer buf is %p\n", (long)cur_op->upcall.trailer_size, cur_op->upcall.trailer_buf);
+					/* re-add it to the head of the list */
+					list_add(&cur_op->list,
+						 &pvfs2_request_list);
+				}
+				spin_unlock(&cur_op->lock);
+				break;
+			}
+		}
+		spin_unlock(&pvfs2_request_list_lock);
+	}
+
+	if (cur_op) {
+		spin_lock(&cur_op->lock);
+
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "client-core: reading op tag %llu %s\n",
+			     llu(cur_op->tag), get_opname_string(cur_op));
+		if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
+			if (cur_op->op_linger == 1)
+				gossip_err("WARNING: Current op already queued...skipping\n");
+		} else if (cur_op->op_linger == 1 ||
+			   (cur_op->op_linger == 2 &&
+			    cur_op->op_linger_tmp == 0)) {
+			/*
+			 * atomically move the operation to the
+			 * htable_ops_in_progress
+			 */
+			set_op_state_inprogress(cur_op);
+			pvfs2_devreq_add_op(cur_op);
+		}
+
+		spin_unlock(&cur_op->lock);
+
+		/* 2 cases
+		 * a) OPs with no trailers
+		 * b) OPs with trailers, Stage 1
+		 * Either way push the upcall out
+		 */
+		if (cur_op->op_linger == 1 ||
+		   (cur_op->op_linger == 2 && cur_op->op_linger_tmp == 1)) {
+			len = MAX_ALIGNED_DEV_REQ_UPSIZE;
+			if ((size_t) len <= count) {
+			    ret = copy_to_user(buf,
+					       &proto_ver,
+					       sizeof(int32_t));
+			    if (ret == 0) {
+				ret = copy_to_user(buf + sizeof(int32_t),
+						   &magic,
+						   sizeof(int32_t));
+				if (ret == 0) {
+				    ret = copy_to_user(buf+2 * sizeof(int32_t),
+						       &cur_op->tag,
+						       sizeof(uint64_t));
+				    if (ret == 0) {
+					ret = copy_to_user(
+						buf +
+						  2 *
+						  sizeof(int32_t) +
+						  sizeof(uint64_t),
+						&cur_op->upcall,
+						sizeof(struct pvfs2_upcall_s));
+				    }
+				}
+			    }
+
+			    if (ret) {
+				gossip_err("Failed to copy data to user space\n");
+				len = -EFAULT;
+			    }
+			} else {
+				gossip_err
+				    ("Failed to copy data to user space\n");
+				len = -EIO;
+			}
+		}
+		/* Stage 2: Push the trailer out */
+		else if (cur_op->op_linger == 2 && cur_op->op_linger_tmp == 0) {
+			len = cur_op->upcall.trailer_size;
+			if ((size_t) len <= count) {
+				ret = copy_to_user(buf,
+						   cur_op->upcall.trailer_buf,
+						   len);
+				if (ret) {
+					gossip_err("Failed to copy trailer to user space\n");
+					len = -EFAULT;
+				}
+			} else {
+				gossip_err("Read buffer for trailer is too small (%ld as opposed to %ld)\n",
+					(long)count,
+					(long)len);
+				len = -EIO;
+			}
+		} else {
+			gossip_err("cur_op: %p (op_linger %d), (op_linger_tmp %d), erroneous request list?\n",
+				cur_op,
+				cur_op->op_linger,
+				cur_op->op_linger_tmp);
+			len = 0;
+		}
+	} else if (file->f_flags & O_NONBLOCK) {
+		/*
+		 * if in non-blocking mode, return EAGAIN since no requests are
+		 * ready yet
+		 */
+		len = -EAGAIN;
+	}
+	return len;
+}
+
+/* Function for writev() callers into the device */
+static ssize_t pvfs2_devreq_writev(struct file *file,
+				   const struct iovec *iov,
+				   size_t count,
+				   loff_t *offset)
+{
+	struct pvfs2_kernel_op *op = NULL;
+	void *buffer = NULL;
+	void *ptr = NULL;
+	unsigned long i = 0;
+	static int max_downsize = MAX_ALIGNED_DEV_REQ_DOWNSIZE;
+	int ret = 0, num_remaining = max_downsize;
+	int notrailer_count = 4; /* num elements in iovec without trailer */
+	int payload_size = 0;
+	int32_t magic = 0;
+	int32_t proto_ver = 0;
+	uint64_t tag = 0;
+	ssize_t total_returned_size = 0;
+
+	/* Either there is a trailer or there isn't */
+	if (count != notrailer_count && count != (notrailer_count + 1)) {
+		gossip_err("Error: Number of iov vectors is (%ld) and notrailer count is %d\n",
+			count,
+			notrailer_count);
+		return -EPROTO;
+	}
+	buffer = dev_req_alloc();
+	if (!buffer)
+		return -ENOMEM;
+	ptr = buffer;
+
+	for (i = 0; i < notrailer_count; i++) {
+		if (iov[i].iov_len > num_remaining) {
+			gossip_err
+			    ("writev error: Freeing buffer and returning\n");
+			dev_req_release(buffer);
+			return -EMSGSIZE;
+		}
+		ret = copy_from_user(ptr, iov[i].iov_base, iov[i].iov_len);
+		if (ret) {
+			gossip_err("Failed to copy data from user space\n");
+			dev_req_release(buffer);
+			return -EIO;
+		}
+		num_remaining -= iov[i].iov_len;
+		ptr += iov[i].iov_len;
+		payload_size += iov[i].iov_len;
+	}
+	total_returned_size = payload_size;
+
+	/* these elements are currently 8 byte aligned (8 bytes for (version +
+	 * magic) 8 bytes for tag).  If you add another element, either
+	 * make it 8 bytes big, or use get_unaligned when asigning.
+	 */
+	ptr = buffer;
+	proto_ver = *((int32_t *) ptr);
+	ptr += sizeof(int32_t);
+
+	magic = *((int32_t *) ptr);
+	ptr += sizeof(int32_t);
+
+	tag = *((uint64_t *) ptr);
+	ptr += sizeof(uint64_t);
+
+	if (magic != PVFS2_DEVREQ_MAGIC) {
+		gossip_err("Error: Device magic number does not match.\n");
+		dev_req_release(buffer);
+		return -EPROTO;
+	}
+	if (proto_ver != PVFS_KERNEL_PROTO_VERSION) {
+		gossip_err("Error: Device protocol version numbers do not match.\n");
+		gossip_err("Please check that your pvfs2 module and pvfs2-client versions are consistent.\n");
+		dev_req_release(buffer);
+		return -EPROTO;
+	}
+
+	op = pvfs2_devreq_remove_op(tag);
+	if (op) {
+		/* Increase ref count! */
+		get_op(op);
+		/* cut off magic and tag from payload size */
+		payload_size -= (2 * sizeof(int32_t) + sizeof(uint64_t));
+		if (payload_size <= sizeof(struct pvfs2_downcall))
+			/* copy the passed in downcall into the op */
+			memcpy(&op->downcall,
+			       ptr,
+			       sizeof(struct pvfs2_downcall));
+		else
+			gossip_debug(GOSSIP_DEV_DEBUG,
+				     "writev: Ignoring %d bytes\n",
+				     payload_size);
+
+		/* Do not allocate needlessly if client-core forgets
+		 * to reset trailer size on op errors.
+		 */
+		if (op->downcall.status == 0 && op->downcall.trailer_size > 0) {
+			gossip_debug(GOSSIP_DEV_DEBUG,
+				     "writev: trailer size %ld\n",
+				     (unsigned long)op->downcall.trailer_size);
+			if (count != (notrailer_count + 1)) {
+				gossip_err("Error: trailer size (%ld) is non-zero, no trailer elements though? (%ld)\n", (unsigned long)op->downcall.trailer_size, count);
+				dev_req_release(buffer);
+				put_op(op);
+				return -EPROTO;
+			}
+			if (iov[notrailer_count].iov_len >
+			    op->downcall.trailer_size) {
+				gossip_err("writev error: trailer size (%ld) != iov_len (%ld)\n", (unsigned long)op->downcall.trailer_size, (unsigned long)iov[notrailer_count].iov_len);
+				dev_req_release(buffer);
+				put_op(op);
+				return -EMSGSIZE;
+			}
+			/* Allocate a buffer large enough to hold the
+			 * trailer bytes.
+			 */
+			op->downcall.trailer_buf =
+			    vmalloc(op->downcall.trailer_size);
+			if (op->downcall.trailer_buf != NULL) {
+				gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n",
+					     op->downcall.trailer_buf);
+				ret = copy_from_user(op->downcall.trailer_buf,
+						     iov[notrailer_count].
+						     iov_base,
+						     iov[notrailer_count].
+						     iov_len);
+				if (ret) {
+					gossip_err("Failed to copy trailer data from user space\n");
+					dev_req_release(buffer);
+					gossip_debug(GOSSIP_DEV_DEBUG,
+						     "vfree: %p\n",
+						     op->downcall.trailer_buf);
+					vfree(op->downcall.trailer_buf);
+					op->downcall.trailer_buf = NULL;
+					put_op(op);
+					return -EIO;
+				}
+			} else {
+				/* Change downcall status */
+				op->downcall.status = -ENOMEM;
+				gossip_err("writev: could not vmalloc for trailer!\n");
+			}
+		}
+
+		/* if this operation is an I/O operation and if it was
+		 * initiated on behalf of a *synchronous* VFS I/O operation,
+		 * only then we need to wait
+		 * for all data to be copied before we can return to avoid
+		 * buffer corruption and races that can pull the buffers
+		 * out from under us.
+		 *
+		 * Essentially we're synchronizing with other parts of the
+		 * vfs implicitly by not allowing the user space
+		 * application reading/writing this device to return until
+		 * the buffers are done being used.
+		 */
+		if ((op->upcall.type == PVFS2_VFS_OP_FILE_IO &&
+		     op->upcall.req.io.async_vfs_io == PVFS_VFS_SYNC_IO) ||
+		     op->upcall.type == PVFS2_VFS_OP_FILE_IOX) {
+			int timed_out = 0;
+			DECLARE_WAITQUEUE(wait_entry, current);
+
+			/* tell the vfs op waiting on a waitqueue
+			 * that this op is done
+			 */
+			spin_lock(&op->lock);
+			set_op_state_serviced(op);
+			spin_unlock(&op->lock);
+
+			add_wait_queue_exclusive(&op->io_completion_waitq,
+						 &wait_entry);
+			wake_up_interruptible(&op->waitq);
+
+			while (1) {
+				set_current_state(TASK_INTERRUPTIBLE);
+
+				spin_lock(&op->lock);
+				if (op->io_completed) {
+					spin_unlock(&op->lock);
+					break;
+				}
+				spin_unlock(&op->lock);
+
+				if (!signal_pending(current)) {
+					int timeout =
+					    MSECS_TO_JIFFIES(1000 *
+							     op_timeout_secs);
+					if (!schedule_timeout(timeout)) {
+						gossip_debug(GOSSIP_DEV_DEBUG, "*** I/O wait time is up\n");
+						timed_out = 1;
+						break;
+					}
+					continue;
+				}
+
+				gossip_debug(GOSSIP_DEV_DEBUG, "*** signal on I/O wait -- aborting\n");
+				break;
+			}
+
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&op->io_completion_waitq,
+					  &wait_entry);
+
+			/* NOTE: for I/O operations we handle releasing the op
+			 * object except in the case of timeout.  the reason we
+			 * can't free the op in timeout cases is that the op
+			 * service logic in the vfs retries operations using
+			 * the same op ptr, thus it can't be freed.
+			 */
+			if (!timed_out)
+				op_release(op);
+		} else {
+
+			/*
+			 * tell the vfs op waiting on a waitqueue that
+			 * this op is done
+			 */
+			spin_lock(&op->lock);
+			set_op_state_serviced(op);
+			spin_unlock(&op->lock);
+			/*
+			   for every other operation (i.e. non-I/O), we need to
+			   wake up the callers for downcall completion
+			   notification
+			 */
+			wake_up_interruptible(&op->waitq);
+		}
+	} else {
+		/* ignore downcalls that we're not interested in */
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "WARNING: No one's waiting for tag %llu\n",
+			     llu(tag));
+	}
+	dev_req_release(buffer);
+
+	return total_returned_size;
+}
+
+static ssize_t pvfs2_devreq_write_iter(struct kiocb *iocb,
+				      struct iov_iter *iter)
+{
+	return pvfs2_devreq_writev(iocb->ki_filp,
+				   iter->iov,
+				   iter->nr_segs,
+				   &iocb->ki_pos);
+}
+
+/* Returns whether any FS are still pending remounted */
+static int mark_all_pending_mounts(void)
+{
+	int unmounted = 1;
+	struct pvfs2_sb_info_s *pvfs2_sb = NULL;
+
+	spin_lock(&pvfs2_superblocks_lock);
+	list_for_each_entry(pvfs2_sb, &pvfs2_superblocks, list) {
+		/* All of these file system require a remount */
+		pvfs2_sb->mount_pending = 1;
+		unmounted = 0;
+	}
+	spin_unlock(&pvfs2_superblocks_lock);
+	return unmounted;
+}
+
+/*
+ * Determine if a given file system needs to be remounted or not
+ *  Returns -1 on error
+ *           0 if already mounted
+ *           1 if needs remount
+ */
+int fs_mount_pending(int32_t fsid)
+{
+	int mount_pending = -1;
+	struct pvfs2_sb_info_s *pvfs2_sb = NULL;
+
+	spin_lock(&pvfs2_superblocks_lock);
+	list_for_each_entry(pvfs2_sb, &pvfs2_superblocks, list) {
+		if (pvfs2_sb->fs_id == fsid) {
+			mount_pending = pvfs2_sb->mount_pending;
+			break;
+		}
+	}
+	spin_unlock(&pvfs2_superblocks_lock);
+	return mount_pending;
+}
+
+/*
+ * NOTE: gets called when the last reference to this device is dropped.
+ * Using the open_access_count variable, we enforce a reference count
+ * on this file so that it can be opened by only one process at a time.
+ * the devreq_mutex is used to make sure all i/o has completed
+ * before we call pvfs_bufmap_finalize, and similar such tricky
+ * situations
+ */
+static int pvfs2_devreq_release(struct inode *inode, struct file *file)
+{
+	int unmounted = 0;
+
+	gossip_debug(GOSSIP_DEV_DEBUG,
+		     "%s:pvfs2-client-core: exiting, closing device\n",
+		     __func__);
+
+	mutex_lock(&devreq_mutex);
+	pvfs_bufmap_finalize();
+
+	open_access_count--;
+
+	unmounted = mark_all_pending_mounts();
+	gossip_debug(GOSSIP_DEV_DEBUG, "PVFS2 Device Close: Filesystem(s) %s\n",
+		     (unmounted ? "UNMOUNTED" : "MOUNTED"));
+	mutex_unlock(&devreq_mutex);
+
+	/*
+	 * Walk through the list of ops in the request list, mark them
+	 * as purged and wake them up.
+	 */
+	purge_waiting_ops();
+	/*
+	 * Walk through the hash table of in progress operations; mark
+	 * them as purged and wake them up
+	 */
+	purge_inprogress_ops();
+	gossip_debug(GOSSIP_DEV_DEBUG,
+		     "pvfs2-client-core: device close complete\n");
+	return 0;
+}
+
+int is_daemon_in_service(void)
+{
+	int in_service;
+
+	/*
+	 * What this function does is checks if client-core is alive
+	 * based on the access count we maintain on the device.
+	 */
+	mutex_lock(&devreq_mutex);
+	in_service = open_access_count == 1 ? 0 : -EIO;
+	mutex_unlock(&devreq_mutex);
+	return in_service;
+}
+
+static inline long check_ioctl_command(unsigned int command)
+{
+	/* Check for valid ioctl codes */
+	if (_IOC_TYPE(command) != PVFS_DEV_MAGIC) {
+		gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n",
+			command,
+			_IOC_TYPE(command),
+			PVFS_DEV_MAGIC);
+		return -EINVAL;
+	}
+	/* and valid ioctl commands */
+	if (_IOC_NR(command) >= PVFS_DEV_MAXNR || _IOC_NR(command) <= 0) {
+		gossip_err("Invalid ioctl command number [%d >= %d]\n",
+			   _IOC_NR(command), PVFS_DEV_MAXNR);
+		return -ENOIOCTLCMD;
+	}
+	return 0;
+}
+
+static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
+{
+	static int32_t magic = PVFS2_DEVREQ_MAGIC;
+	static int32_t max_up_size = MAX_ALIGNED_DEV_REQ_UPSIZE;
+	static int32_t max_down_size = MAX_ALIGNED_DEV_REQ_DOWNSIZE;
+	struct PVFS_dev_map_desc user_desc;
+	int ret = 0;
+	struct dev_mask_info_t mask_info = { 0 };
+	struct list_head *tmp = NULL;
+	struct pvfs2_sb_info_s *pvfs2_sb = NULL;
+
+
+	/* mtmoore: add locking here */
+
+	switch (command) {
+	case PVFS_DEV_GET_MAGIC:
+		return ((put_user(magic, (int32_t __user *) arg) == -EFAULT) ?
+			-EIO :
+			0);
+	case PVFS_DEV_GET_MAX_UPSIZE:
+		return ((put_user(max_up_size,
+				  (int32_t __user *) arg) == -EFAULT) ?
+					-EIO :
+					0);
+	case PVFS_DEV_GET_MAX_DOWNSIZE:
+		return ((put_user(max_down_size,
+				  (int32_t __user *) arg) == -EFAULT) ?
+					-EIO :
+					0);
+	case PVFS_DEV_MAP:
+		ret = copy_from_user(&user_desc,
+				     (struct PVFS_dev_map_desc __user *)
+				     arg,
+				     sizeof(struct PVFS_dev_map_desc));
+		return ret ? -EIO : pvfs_bufmap_initialize(&user_desc);
+	case PVFS_DEV_REMOUNT_ALL:
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "pvfs2_devreq_ioctl: got PVFS_DEV_REMOUNT_ALL\n");
+
+		/*
+		 * remount all mounted pvfs2 volumes to regain the lost
+		 * dynamic mount tables (if any) -- NOTE: this is done
+		 * without keeping the superblock list locked due to the
+		 * upcall/downcall waiting.  also, the request semaphore is
+		 * used to ensure that no operations will be serviced until
+		 * all of the remounts are serviced (to avoid ops between
+		 * mounts to fail)
+		 */
+		ret = mutex_lock_interruptible(&request_mutex);
+		if (ret < 0)
+			return ret;
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "pvfs2_devreq_ioctl: priority remount in progress\n");
+		list_for_each(tmp, &pvfs2_superblocks) {
+			pvfs2_sb =
+				list_entry(tmp, struct pvfs2_sb_info_s, list);
+			if (pvfs2_sb && (pvfs2_sb->sb)) {
+				gossip_debug(GOSSIP_DEV_DEBUG,
+					     "Remounting SB %p\n",
+					     pvfs2_sb);
+
+				ret = pvfs2_remount(pvfs2_sb->sb);
+				if (ret) {
+					gossip_debug(GOSSIP_DEV_DEBUG,
+						     "SB %p remount failed\n",
+						     pvfs2_sb);
+						break;
+				}
+			}
+		}
+		gossip_debug(GOSSIP_DEV_DEBUG,
+			     "pvfs2_devreq_ioctl: priority remount complete\n");
+		mutex_unlock(&request_mutex);
+		return ret;
+	case PVFS_DEV_DEBUG:
+		ret = copy_from_user(&mask_info,
+				     (void __user *)arg,
+				     sizeof(mask_info));
+		if (ret != 0)
+			return -EIO;
+
+		if (mask_info.mask_type == KERNEL_MASK) {
+			if ((mask_info.mask_value == 0)
+			    && (kernel_mask_set_mod_init)) {
+				/*
+				 * the kernel debug mask was set when the
+				 * kernel module was loaded; don't override
+				 * it if the client-core was started without
+				 * a value for PVFS2_KMODMASK.
+				 */
+				return 0;
+			}
+			ret = PVFS_proc_kmod_mask_to_eventlog(
+				mask_info.
+				mask_value,
+				kernel_debug_string);
+			gossip_debug_mask = mask_info.mask_value;
+			pr_info("PVFS: kernel debug mask has been modified to \"%s\" (0x%08llx)\n",
+				kernel_debug_string,
+				llu(gossip_debug_mask));
+		} else if (mask_info.mask_type == CLIENT_MASK) {
+			ret = PVFS_proc_mask_to_eventlog(mask_info.mask_value,
+							 client_debug_string);
+			pr_info("PVFS: client debug mask has been modified to \"%s\" (0x%08llx)\n",
+				client_debug_string,
+				llu(mask_info.mask_value));
+		} else {
+			gossip_lerr("Invalid mask type....\n");
+			return -EINVAL;
+		}
+
+		return ret;
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+	return -ENOIOCTLCMD;
+}
+
+static long pvfs2_devreq_ioctl(struct file *file,
+			       unsigned int command, unsigned long arg)
+{
+	long ret;
+
+	/* Check for properly constructed commands */
+	ret = check_ioctl_command(command);
+	if (ret < 0)
+		return (int)ret;
+
+	return (int)dispatch_ioctl_command(command, arg);
+}
+
+#ifdef CONFIG_COMPAT		/* CONFIG_COMPAT is in .config */
+
+/*  Compat structure for the PVFS_DEV_MAP ioctl */
+struct PVFS_dev_map_desc32 {
+	compat_uptr_t ptr;
+	int32_t total_size;
+	int32_t size;
+	int32_t count;
+};
+
+static unsigned long translate_dev_map26(unsigned long args, long *error)
+{
+	struct PVFS_dev_map_desc32 __user *p32 = (void __user *)args;
+	/*
+	 * Depending on the architecture, allocate some space on the
+	 * user-call-stack based on our expected layout.
+	 */
+	struct PVFS_dev_map_desc __user *p =
+	    compat_alloc_user_space(sizeof(*p));
+	u32 addr;
+
+	*error = 0;
+	/* get the ptr from the 32 bit user-space */
+	if (get_user(addr, &p32->ptr))
+		goto err;
+	/* try to put that into a 64-bit layout */
+	if (put_user(compat_ptr(addr), &p->ptr))
+		goto err;
+	/* copy the remaining fields */
+	if (copy_in_user(&p->total_size, &p32->total_size, sizeof(int32_t)))
+		goto err;
+	if (copy_in_user(&p->size, &p32->size, sizeof(int32_t)))
+		goto err;
+	if (copy_in_user(&p->count, &p32->count, sizeof(int32_t)))
+		goto err;
+	return (unsigned long)p;
+err:
+	*error = -EFAULT;
+	return 0;
+}
+
+/*
+ * 32 bit user-space apps' ioctl handlers when kernel modules
+ * is compiled as a 64 bit one
+ */
+static long pvfs2_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
+				      unsigned long args)
+{
+	long ret;
+	unsigned long arg = args;
+
+	/* Check for properly constructed commands */
+	ret = check_ioctl_command(cmd);
+	if (ret < 0)
+		return ret;
+	if (cmd == PVFS_DEV_MAP) {
+		/*
+		 * convert the arguments to what we expect internally
+		 * in kernel space
+		 */
+		arg = translate_dev_map26(args, &ret);
+		if (ret < 0) {
+			gossip_err("Could not translate dev map\n");
+			return ret;
+		}
+	}
+	/* no other ioctl requires translation */
+	return dispatch_ioctl_command(cmd, arg);
+}
+
+static int pvfs2_ioctl32_init(void)
+{
+	return 0;
+}
+
+static void pvfs2_ioctl32_cleanup(void)
+{
+	return;
+}
+
+#endif /* CONFIG_COMPAT is in .config */
+
+/* the assigned character device major number */
+static int pvfs2_dev_major;
+
+/*
+ * Initialize pvfs2 device specific state:
+ * Must be called at module load time only
+ */
+int pvfs2_dev_init(void)
+{
+	int ret;
+
+	/* register the ioctl32 sub-system */
+	ret = pvfs2_ioctl32_init();
+	if (ret < 0)
+		return ret;
+
+	/* register pvfs2-req device  */
+	pvfs2_dev_major = register_chrdev(0,
+					  PVFS2_REQDEVICE_NAME,
+					  &pvfs2_devreq_file_operations);
+	if (pvfs2_dev_major < 0) {
+		gossip_debug(GOSSIP_INIT_DEBUG,
+			     "Failed to register /dev/%s (error %d)\n",
+			     PVFS2_REQDEVICE_NAME, pvfs2_dev_major);
+		pvfs2_ioctl32_cleanup();
+		return pvfs2_dev_major;
+	}
+
+	gossip_debug(GOSSIP_INIT_DEBUG,
+		     "*** /dev/%s character device registered ***\n",
+		     PVFS2_REQDEVICE_NAME);
+	gossip_debug(GOSSIP_INIT_DEBUG, "'mknod /dev/%s c %d 0'.\n",
+		     PVFS2_REQDEVICE_NAME, pvfs2_dev_major);
+	return 0;
+}
+
+void pvfs2_dev_cleanup(void)
+{
+	unregister_chrdev(pvfs2_dev_major, PVFS2_REQDEVICE_NAME);
+	gossip_debug(GOSSIP_INIT_DEBUG,
+		     "*** /dev/%s character device unregistered ***\n",
+		     PVFS2_REQDEVICE_NAME);
+	/* unregister the ioctl32 sub-system */
+	pvfs2_ioctl32_cleanup();
+	return;
+}
+
+static unsigned int pvfs2_devreq_poll(struct file *file,
+				      struct poll_table_struct *poll_table)
+{
+	int poll_revent_mask = 0;
+
+	if (open_access_count == 1) {
+		poll_wait(file, &pvfs2_request_list_waitq, poll_table);
+
+		spin_lock(&pvfs2_request_list_lock);
+		if (!list_empty(&pvfs2_request_list))
+			poll_revent_mask |= POLL_IN;
+		spin_unlock(&pvfs2_request_list_lock);
+	}
+	return poll_revent_mask;
+}
+
+const struct file_operations pvfs2_devreq_file_operations = {
+	.owner = THIS_MODULE,
+	.read = pvfs2_devreq_read,
+	.write_iter = pvfs2_devreq_write_iter,
+	.open = pvfs2_devreq_open,
+	.release = pvfs2_devreq_release,
+	.unlocked_ioctl = pvfs2_devreq_ioctl,
+
+#ifdef CONFIG_COMPAT		/* CONFIG_COMPAT is in .config */
+	.compat_ioctl = pvfs2_devreq_compat_ioctl,
+#endif
+	.poll = pvfs2_devreq_poll
+};
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
new file mode 100644
index 0000000..c013134
--- /dev/null
+++ b/fs/orangefs/file.c
@@ -0,0 +1,990 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ *  Linux VFS file operations.
+ */
+
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-bufmap.h"
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+
+#define wake_up_daemon_for_return(op)			\
+do {							\
+	spin_lock(&op->lock);                           \
+	op->io_completed = 1;                           \
+	spin_unlock(&op->lock);                         \
+	wake_up_interruptible(&op->io_completion_waitq);\
+} while (0)
+
+/*
+ * Copy to client-core's address space from the buffers specified
+ * by the iovec upto total_size bytes.
+ * NOTE: the iovector can either contain addresses which
+ *       can futher be kernel-space or user-space addresses.
+ *       or it can pointers to struct page's
+ */
+static int precopy_buffers(struct pvfs2_bufmap *bufmap,
+			   int buffer_index,
+			   const struct iovec *vec,
+			   unsigned long nr_segs,
+			   size_t total_size,
+			   int from_user)
+{
+	int ret = 0;
+
+	/*
+	 * copy data from application/kernel by pulling it out
+	 * of the iovec.
+	 */
+	/* Are we copying from User Virtual Addresses? */
+	if (from_user)
+		ret = pvfs_bufmap_copy_iovec_from_user(
+			bufmap,
+			buffer_index,
+			vec,
+			nr_segs,
+			total_size);
+	/* Are we copying from Kernel Virtual Addresses? */
+	else
+		ret = pvfs_bufmap_copy_iovec_from_kernel(
+			bufmap,
+			buffer_index,
+			vec,
+			nr_segs,
+			total_size);
+	if (ret < 0)
+		gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
+			__func__,
+			(long)ret);
+	return ret;
+}
+
+/*
+ * Copy from client-core's address space to the buffers specified
+ * by the iovec upto total_size bytes.
+ * NOTE: the iovector can either contain addresses which
+ *       can futher be kernel-space or user-space addresses.
+ *       or it can pointers to struct page's
+ */
+static int postcopy_buffers(struct pvfs2_bufmap *bufmap,
+			    int buffer_index,
+			    const struct iovec *vec,
+			    int nr_segs,
+			    size_t total_size,
+			    int to_user)
+{
+	int ret = 0;
+
+	/*
+	 * copy data to application/kernel by pushing it out to
+	 * the iovec. NOTE; target buffers can be addresses or
+	 * struct page pointers.
+	 */
+	if (total_size) {
+		/* Are we copying to User Virtual Addresses? */
+		if (to_user)
+			ret = pvfs_bufmap_copy_to_user_iovec(
+				bufmap,
+				buffer_index,
+				vec,
+				nr_segs,
+				total_size);
+		/* Are we copying to Kern Virtual Addresses? */
+		else
+			ret = pvfs_bufmap_copy_to_kernel_iovec(
+				bufmap,
+				buffer_index,
+				vec,
+				nr_segs,
+				total_size);
+		if (ret < 0)
+			gossip_err("%s: Failed to copy-out buffers.  Please make sure that the pvfs2-client is running (%ld)\n",
+				__func__,
+				(long)ret);
+	}
+	return ret;
+}
+
+/*
+ * Post and wait for the I/O upcall to finish
+ */
+static ssize_t wait_for_direct_io(enum PVFS_io_type type, struct inode *inode,
+		loff_t *offset, struct iovec *vec, unsigned long nr_segs,
+		size_t total_size, loff_t readahead_size, int to_user)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle;
+	struct pvfs2_bufmap *bufmap = NULL;
+	struct pvfs2_kernel_op *new_op = NULL;
+	int buffer_index = -1;
+	ssize_t ret;
+
+	new_op = op_alloc(PVFS2_VFS_OP_FILE_IO);
+	if (!new_op) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	/* synchronous I/O */
+	new_op->upcall.req.io.async_vfs_io = PVFS_VFS_SYNC_IO;
+	new_op->upcall.req.io.readahead_size = readahead_size;
+	new_op->upcall.req.io.io_type = type;
+	new_op->upcall.req.io.refn = pvfs2_inode->refn;
+
+populate_shared_memory:
+	/* get a shared buffer index */
+	ret = pvfs_bufmap_get(&bufmap, &buffer_index);
+	if (ret < 0) {
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s: pvfs_bufmap_get failure (%ld)\n",
+			     __func__, (long)ret);
+		goto out;
+	}
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU): GET op %p -> buffer_index %d\n",
+		     __func__,
+		     handle,
+		     new_op,
+		     buffer_index);
+
+	new_op->uses_shared_memory = 1;
+	new_op->upcall.req.io.buf_index = buffer_index;
+	new_op->upcall.req.io.count = total_size;
+	new_op->upcall.req.io.offset = *offset;
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU): copy_to_user %d nr_segs %lu, offset: %llu total_size: %zd\n",
+		     __func__,
+		     handle,
+		     to_user,
+		     nr_segs,
+		     llu(*offset),
+		     total_size);
+	/*
+	 * Stage 1: copy the buffers into client-core's address space
+	 * precopy_buffers only pertains to writes.
+	 */
+	if (type == PVFS_IO_WRITE) {
+		ret = precopy_buffers(bufmap,
+				      buffer_index,
+				      vec,
+				      nr_segs,
+				      total_size,
+				      to_user);
+		if (ret < 0)
+			goto out;
+	}
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU): Calling post_io_request with tag (%llu)\n",
+		     __func__,
+		     handle,
+		     llu(new_op->tag));
+
+	/* Stage 2: Service the I/O operation */
+	ret = service_operation(new_op,
+				type == PVFS_IO_WRITE ? "file_write" : "file_read",
+				get_interruptible_flag(inode));
+
+	/*
+	 * If service_operation() returns -EAGAIN #and# the operation was
+	 * purged from pvfs2_request_list or htable_ops_in_progress, then
+	 * we know that the client was restarted, causing the shared memory
+	 * area to be wiped clean.  To restart a  write operation in this
+	 * case, we must re-copy the data from the user's iovec to a NEW
+	 * shared memory location. To restart a read operation, we must get
+	 * a new shared memory location.
+	 */
+	if (ret == -EAGAIN && op_state_purged(new_op)) {
+		pvfs_bufmap_put(bufmap, buffer_index);
+		gossip_debug(GOSSIP_WAIT_DEBUG,
+			     "%s:going to repopulate_shared_memory.\n",
+			     __func__);
+		goto populate_shared_memory;
+	}
+
+	if (ret < 0) {
+		handle_io_error(); /* defined in pvfs2-kernel.h */
+		/*
+		   don't write an error to syslog on signaled operation
+		   termination unless we've got debugging turned on, as
+		   this can happen regularly (i.e. ctrl-c)
+		 */
+		if (ret == -EINTR)
+			gossip_debug(GOSSIP_FILE_DEBUG,
+				     "%s: returning error %ld\n", __func__,
+				     (long)ret);
+		else
+			gossip_err("%s: error in %s handle %pU, returning %zd\n",
+				__func__,
+				type == PVFS_IO_READ ?
+					"read from" : "write to",
+				handle, ret);
+		goto out;
+	}
+
+	/*
+	 * Stage 3: Post copy buffers from client-core's address space
+	 * postcopy_buffers only pertains to reads.
+	 */
+	if (type == PVFS_IO_READ) {
+		ret = postcopy_buffers(bufmap,
+				       buffer_index,
+				       vec,
+				       nr_segs,
+				       new_op->downcall.resp.io.amt_complete,
+				       to_user);
+		if (ret < 0) {
+			/*
+			 * put error codes in downcall so that handle_io_error()
+			 * preserves it properly
+			 */
+			new_op->downcall.status = ret;
+			handle_io_error();
+			goto out;
+		}
+	}
+	gossip_debug(GOSSIP_FILE_DEBUG,
+	    "%s(%pU): Amount written as returned by the sys-io call:%d\n",
+	    __func__,
+	    handle,
+	    (int)new_op->downcall.resp.io.amt_complete);
+
+	ret = new_op->downcall.resp.io.amt_complete;
+
+	/*
+	   tell the device file owner waiting on I/O that this read has
+	   completed and it can return now.  in this exact case, on
+	   wakeup the daemon will free the op, so we *cannot* touch it
+	   after this.
+	 */
+	wake_up_daemon_for_return(new_op);
+	new_op = NULL;
+
+out:
+	if (buffer_index >= 0) {
+		pvfs_bufmap_put(bufmap, buffer_index);
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): PUT buffer_index %d\n",
+			     __func__, handle, buffer_index);
+		buffer_index = -1;
+	}
+	if (new_op) {
+		op_release(new_op);
+		new_op = NULL;
+	}
+	return ret;
+}
+
+/*
+ * The reason we need to do this is to be able to support readv and writev
+ * that are larger than (pvfs_bufmap_size_query()) Default is
+ * PVFS2_BUFMAP_DEFAULT_DESC_SIZE MB. What that means is that we will
+ * create a new io vec descriptor for those memory addresses that
+ * go beyond the limit. Return value for this routine is negative in case
+ * of errors and 0 in case of success.
+ *
+ * Further, the new_nr_segs pointer is updated to hold the new value
+ * of number of iovecs, the new_vec pointer is updated to hold the pointer
+ * to the new split iovec, and the size array is an array of integers holding
+ * the number of iovecs that straddle pvfs_bufmap_size_query().
+ * The max_new_nr_segs value is computed by the caller and returned.
+ * (It will be (count of all iov_len/ block_size) + 1).
+ */
+static int split_iovecs(unsigned long max_new_nr_segs,		/* IN */
+			unsigned long nr_segs,			/* IN */
+			const struct iovec *original_iovec,	/* IN */
+			unsigned long *new_nr_segs,		/* OUT */
+			struct iovec **new_vec,			/* OUT */
+			unsigned long *seg_count,		/* OUT */
+			unsigned long **seg_array)		/* OUT */
+{
+	unsigned long seg;
+	unsigned long count = 0;
+	unsigned long begin_seg;
+	unsigned long tmpnew_nr_segs = 0;
+	struct iovec *new_iovec = NULL;
+	struct iovec *orig_iovec;
+	unsigned long *sizes = NULL;
+	unsigned long sizes_count = 0;
+
+	if (nr_segs <= 0 ||
+	    original_iovec == NULL ||
+	    new_nr_segs == NULL ||
+	    new_vec == NULL ||
+	    seg_count == NULL ||
+	    seg_array == NULL ||
+	    max_new_nr_segs <= 0) {
+		gossip_err("Invalid parameters to split_iovecs\n");
+		return -EINVAL;
+	}
+	*new_nr_segs = 0;
+	*new_vec = NULL;
+	*seg_count = 0;
+	*seg_array = NULL;
+	/* copy the passed in iovec descriptor to a temp structure */
+	orig_iovec = kmalloc(nr_segs * sizeof(*orig_iovec),
+			     PVFS2_BUFMAP_GFP_FLAGS);
+	if (orig_iovec == NULL) {
+		gossip_err(
+		    "split_iovecs: Could not allocate memory for %lu bytes!\n",
+		    (unsigned long)(nr_segs * sizeof(*orig_iovec)));
+		return -ENOMEM;
+	}
+	new_iovec = kzalloc(max_new_nr_segs * sizeof(*new_iovec),
+			    PVFS2_BUFMAP_GFP_FLAGS);
+	if (new_iovec == NULL) {
+		kfree(orig_iovec);
+		gossip_err(
+		    "split_iovecs: Could not allocate memory for %lu bytes!\n",
+		    (unsigned long)(max_new_nr_segs * sizeof(*new_iovec)));
+		return -ENOMEM;
+	}
+	sizes = kzalloc(max_new_nr_segs * sizeof(*sizes),
+			PVFS2_BUFMAP_GFP_FLAGS);
+	if (sizes == NULL) {
+		kfree(new_iovec);
+		kfree(orig_iovec);
+		gossip_err(
+		    "split_iovecs: Could not allocate memory for %lu bytes!\n",
+		    (unsigned long)(max_new_nr_segs * sizeof(*sizes)));
+		return -ENOMEM;
+	}
+	/* copy the passed in iovec to a temp structure */
+	memcpy(orig_iovec, original_iovec, nr_segs * sizeof(*orig_iovec));
+	begin_seg = 0;
+repeat:
+	for (seg = begin_seg; seg < nr_segs; seg++) {
+		if (tmpnew_nr_segs >= max_new_nr_segs ||
+		    sizes_count >= max_new_nr_segs) {
+			kfree(sizes);
+			kfree(orig_iovec);
+			kfree(new_iovec);
+			gossip_err
+			    ("split_iovecs: exceeded the index limit (%lu)\n",
+			    tmpnew_nr_segs);
+			return -EINVAL;
+		}
+		if (count + orig_iovec[seg].iov_len <
+		    pvfs_bufmap_size_query()) {
+			count += orig_iovec[seg].iov_len;
+			memcpy(&new_iovec[tmpnew_nr_segs],
+			       &orig_iovec[seg],
+			       sizeof(*new_iovec));
+			tmpnew_nr_segs++;
+			sizes[sizes_count]++;
+		} else {
+			new_iovec[tmpnew_nr_segs].iov_base =
+			    orig_iovec[seg].iov_base;
+			new_iovec[tmpnew_nr_segs].iov_len =
+			    (pvfs_bufmap_size_query() - count);
+			tmpnew_nr_segs++;
+			sizes[sizes_count]++;
+			sizes_count++;
+			begin_seg = seg;
+			orig_iovec[seg].iov_base +=
+			    (pvfs_bufmap_size_query() - count);
+			orig_iovec[seg].iov_len -=
+			    (pvfs_bufmap_size_query() - count);
+			count = 0;
+			break;
+		}
+	}
+	if (seg != nr_segs)
+		goto repeat;
+	else
+		sizes_count++;
+
+	*new_nr_segs = tmpnew_nr_segs;
+	/* new_iovec is freed by the caller */
+	*new_vec = new_iovec;
+	*seg_count = sizes_count;
+	/* seg_array is also freed by the caller */
+	*seg_array = sizes;
+	kfree(orig_iovec);
+	return 0;
+}
+
+static long bound_max_iovecs(const struct iovec *curr, unsigned long nr_segs,
+			     ssize_t *total_count)
+{
+	unsigned long i;
+	long max_nr_iovecs;
+	ssize_t total;
+	ssize_t count;
+
+	total = 0;
+	count = 0;
+	max_nr_iovecs = 0;
+	for (i = 0; i < nr_segs; i++) {
+		const struct iovec *iv = &curr[i];
+		count += iv->iov_len;
+		if (unlikely((ssize_t) (count | iv->iov_len) < 0))
+			return -EINVAL;
+		if (total + iv->iov_len < pvfs_bufmap_size_query()) {
+			total += iv->iov_len;
+			max_nr_iovecs++;
+		} else {
+			total =
+			    (total + iv->iov_len - pvfs_bufmap_size_query());
+			max_nr_iovecs += (total / pvfs_bufmap_size_query() + 2);
+		}
+	}
+	*total_count = count;
+	return max_nr_iovecs;
+}
+
+/*
+ * Common entry point for read/write/readv/writev
+ * This function will dispatch it to either the direct I/O
+ * or buffered I/O path depending on the mount options and/or
+ * augmented/extended metadata attached to the file.
+ * Note: File extended attributes override any mount options.
+ */
+static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file,
+		loff_t *offset, const struct iovec *iov, unsigned long nr_segs)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle;
+	ssize_t ret;
+	ssize_t total_count;
+	unsigned int to_free;
+	size_t count;
+	unsigned long seg;
+	unsigned long new_nr_segs = 0;
+	unsigned long max_new_nr_segs = 0;
+	unsigned long seg_count = 0;
+	unsigned long *seg_array = NULL;
+	struct iovec *iovecptr = NULL;
+	struct iovec *ptr = NULL;
+
+	total_count = 0;
+	ret = -EINVAL;
+	count = 0;
+	to_free = 0;
+
+	/* Compute total and max number of segments after split */
+	max_new_nr_segs = bound_max_iovecs(iov, nr_segs, &count);
+	if (max_new_nr_segs < 0) {
+		gossip_lerr("%s: could not bound iovec %lu\n",
+			    __func__,
+			    max_new_nr_segs);
+		goto out;
+	}
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		"%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
+		__func__,
+		handle,
+		(int)count);
+
+	if (type == PVFS_IO_WRITE) {
+		if (file->f_flags & O_APPEND) {
+			/*
+			 * Make sure generic_write_checks sees an uptodate
+			 * inode size.
+			 */
+			ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_SIZE);
+			if (ret != 0)
+				goto out;
+		} else if (file->f_pos > i_size_read(inode))
+			pvfs2_i_size_write(inode, file->f_pos);
+	
+
+		ret = generic_write_checks(file,
+					   offset,
+					   &count,
+					   S_ISBLK(inode->i_mode));
+		if (ret != 0) {
+			gossip_err("%s: failed generic argument checks.\n",
+				   __func__);
+			goto out;
+		}
+
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): proceeding with offset : %llu, "
+			     "size %d\n",
+			     __func__,
+			     handle,
+			     llu(*offset),
+			     (int)count);
+	}
+
+	if (count == 0) {
+		ret = 0;
+		goto out;
+	}
+
+	/*
+	 * if the total size of data transfer requested is greater than
+	 * the kernel-set blocksize of PVFS2, then we split the iovecs
+	 * such that no iovec description straddles a block size limit
+	 */
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s: pvfs_bufmap_size:%d\n",
+		     __func__,
+		     pvfs_bufmap_size_query());
+
+	if (count > pvfs_bufmap_size_query()) {
+		/*
+		 * Split up the given iovec description such that
+		 * no iovec descriptor straddles over the block-size limitation.
+		 * This makes us our job easier to stage the I/O.
+		 * In addition, this function will also compute an array
+		 * with seg_count entries that will store the number of
+		 * segments that straddle the block-size boundaries.
+		 */
+		ret = split_iovecs(max_new_nr_segs,	/* IN */
+				   nr_segs,		/* IN */
+				   iov,			/* IN */
+				   &new_nr_segs,	/* OUT */
+				   &iovecptr,		/* OUT */
+				   &seg_count,		/* OUT */
+				   &seg_array);		/* OUT */
+		if (ret < 0) {
+			gossip_err("%s: Failed to split iovecs to satisfy larger than blocksize readv/writev request %zd\n",
+				__func__,
+				ret);
+			goto out;
+		}
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s: Splitting iovecs from %lu to %lu"
+			     " [max_new %lu]\n",
+			     __func__,
+			     nr_segs,
+			     new_nr_segs,
+			     max_new_nr_segs);
+		/* We must free seg_array and iovecptr */
+		to_free = 1;
+	} else {
+		new_nr_segs = nr_segs;
+		/* use the given iovec description */
+		iovecptr = (struct iovec *)iov;
+		/* There is only 1 element in the seg_array */
+		seg_count = 1;
+		/* and its value is the number of segments passed in */
+		seg_array = &nr_segs;
+		/* We dont have to free up anything */
+		to_free = 0;
+	}
+	ptr = iovecptr;
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU) %zd@%llu\n",
+		     __func__,
+		     handle,
+		     count,
+		     llu(*offset));
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU): new_nr_segs: %lu, seg_count: %lu\n",
+		     __func__,
+		     handle,
+		     new_nr_segs, seg_count);
+
+/* PVFS2_KERNEL_DEBUG is a CFLAGS define. */
+#ifdef PVFS2_KERNEL_DEBUG
+	for (seg = 0; seg < new_nr_segs; seg++)
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s: %d) %p to %p [%d bytes]\n",
+			     __func__,
+			     (int)seg + 1,
+			     iovecptr[seg].iov_base,
+			     iovecptr[seg].iov_base + iovecptr[seg].iov_len,
+			     (int)iovecptr[seg].iov_len);
+	for (seg = 0; seg < seg_count; seg++)
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s: %zd) %lu\n",
+			     __func__,
+			     seg + 1,
+			     seg_array[seg]);
+#endif
+	seg = 0;
+	while (total_count < count) {
+		size_t each_count;
+		size_t amt_complete;
+
+		/* how much to transfer in this loop iteration */
+		each_count =
+		   (((count - total_count) > pvfs_bufmap_size_query()) ?
+			pvfs_bufmap_size_query() :
+			(count - total_count));
+
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): size of each_count(%d)\n",
+			     __func__,
+			     handle,
+			     (int)each_count);
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): BEFORE wait_for_io: offset is %d\n",
+			     __func__,
+			     handle,
+			     (int)*offset);
+
+		ret = wait_for_direct_io(type, inode, offset, ptr,
+				seg_array[seg], each_count, 0, 1);
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): return from wait_for_io:%d\n",
+			     __func__,
+			     handle,
+			     (int)ret);
+
+		if (ret < 0)
+			goto out;
+
+		/* advance the iovec pointer */
+		ptr += seg_array[seg];
+		seg++;
+		*offset += ret;
+		total_count += ret;
+		amt_complete = ret;
+
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s(%pU): AFTER wait_for_io: offset is %d\n",
+			     __func__,
+			     handle,
+			     (int)*offset);
+
+		/*
+		 * if we got a short I/O operations,
+		 * fall out and return what we got so far
+		 */
+		if (amt_complete < each_count)
+			break;
+	} /*end while */
+
+	if (total_count > 0)
+		ret = total_count;
+out:
+	if (to_free) {
+		kfree(iovecptr);
+		kfree(seg_array);
+	}
+	if (ret > 0) {
+		if (type == PVFS_IO_READ) {
+			file_accessed(file);
+		} else {
+			SetMtimeFlag(pvfs2_inode);
+			inode->i_mtime = CURRENT_TIME;
+			mark_inode_dirty_sync(inode);
+		}
+	}
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU): Value(%d) returned.\n",
+		     __func__,
+		     handle,
+		     (int)ret);
+
+	return ret;
+}
+
+/*
+ * Read data from a specified offset in a file (referenced by inode).
+ * Data may be placed either in a user or kernel buffer.
+ */
+ssize_t pvfs2_inode_read(struct inode *inode,
+			 char __user *buf,
+			 size_t count,
+			 loff_t *offset,
+			 loff_t readahead_size)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	size_t bufmap_size;
+	struct iovec vec;
+	ssize_t ret = -EINVAL;
+
+	g_pvfs2_stats.reads++;
+
+	vec.iov_base = buf;
+	vec.iov_len = count;
+
+	bufmap_size = pvfs_bufmap_size_query();
+	if (count > bufmap_size) {
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "%s: count is too large (%zd/%zd)!\n",
+			     __func__, count, bufmap_size);
+		return -EINVAL;
+	}
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU) %zd@%llu\n",
+		     __func__,
+		     &pvfs2_inode->refn.khandle,
+		     count,
+		     llu(*offset));
+
+	ret = wait_for_direct_io(PVFS_IO_READ, inode, offset, &vec, 1,
+			count, readahead_size, 0);
+	if (ret > 0)
+		*offset += ret;
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "%s(%pU): Value(%zd) returned.\n",
+		     __func__,
+		     &pvfs2_inode->refn.khandle,
+		     ret);
+
+	return ret;
+}
+
+static ssize_t pvfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+	loff_t pos = *(&iocb->ki_pos);
+	ssize_t rc = 0;
+	unsigned long nr_segs = iter->nr_segs;
+	
+	BUG_ON(iocb->private);
+
+	gossip_debug(GOSSIP_FILE_DEBUG,"pvfs2_file_read_iter\n");
+
+	g_pvfs2_stats.reads++;
+
+	rc = do_readv_writev(PVFS_IO_READ,
+			     file,
+			     &pos,
+			     iter->iov,
+			     nr_segs);
+	iocb->ki_pos = pos;
+
+	return rc;
+}
+
+static ssize_t pvfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+        loff_t pos = *(&iocb->ki_pos);
+	unsigned long nr_segs = iter->nr_segs;
+	ssize_t rc;
+	
+	BUG_ON(iocb->private);
+
+	gossip_debug(GOSSIP_FILE_DEBUG,"pvfs2_file_write_iter\n");
+
+	g_pvfs2_stats.writes++;
+
+	rc = do_readv_writev(PVFS_IO_WRITE,
+			     file,
+			     &pos,
+			     iter->iov,
+			     nr_segs);
+	iocb->ki_pos = pos;
+
+	return rc;
+}
+
+/*
+ * Perform a miscellaneous operation on a file.
+ */
+long pvfs2_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int ret = -ENOTTY;
+	uint64_t val = 0;
+	unsigned long uval;
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "pvfs2_ioctl: called with cmd %d\n",
+		     cmd);
+
+	/*
+	 * we understand some general ioctls on files, such as the immutable
+	 * and append flags
+	 */
+	if (cmd == FS_IOC_GETFLAGS) {
+		val = 0;
+		ret = pvfs2_xattr_get_default(file->f_path.dentry,
+					      "user.pvfs2.meta_hint",
+					      &val,
+					      sizeof(val),
+					      0);
+		if (ret < 0 && ret != -ENODATA)
+			return ret;
+		else if (ret == -ENODATA)
+			val = 0;
+		uval = val;
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "pvfs2_ioctl: FS_IOC_GETFLAGS: %llu\n",
+			     (unsigned long long)uval);
+		return put_user(uval, (int __user *)arg);
+	} else if (cmd == FS_IOC_SETFLAGS) {
+		ret = 0;
+		if (get_user(uval, (int __user *)arg))
+			return -EFAULT;
+		/*
+		 * PVFS_MIRROR_FL is set internally when the mirroring mode
+		 * is turned on for a file. The user is not allowed to turn
+		 * on this bit, but the bit is present if the user first gets
+		 * the flags and then updates the flags with some new
+		 * settings. So, we ignore it in the following edit. bligon.
+		 */
+		if ((uval & ~PVFS_MIRROR_FL) &
+		    (~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL))) {
+			gossip_err("pvfs2_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n");
+			return -EINVAL;
+		}
+		val = uval;
+		gossip_debug(GOSSIP_FILE_DEBUG,
+			     "pvfs2_ioctl: FS_IOC_SETFLAGS: %llu\n",
+			     (unsigned long long)val);
+		ret = pvfs2_xattr_set_default(file->f_path.dentry,
+					      "user.pvfs2.meta_hint",
+					      &val,
+					      sizeof(val),
+					      0,
+					      0);
+	}
+
+	return ret;
+}
+
+/*
+ * Memory map a region of a file.
+ */
+static int pvfs2_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "pvfs2_file_mmap: called on %s\n",
+		     (file ?
+			(char *)file->f_path.dentry->d_name.name :
+			(char *)"Unknown"));
+
+	/* set the sequential readahead hint */
+	vma->vm_flags |= VM_SEQ_READ;
+	vma->vm_flags &= ~VM_RAND_READ;
+	return generic_file_mmap(file, vma);
+}
+
+#define mapping_nrpages(idata) ((idata)->nrpages)
+
+/*
+ * Called to notify the module that there are no more references to
+ * this file (i.e. no processes have it open).
+ *
+ * \note Not called when each file is closed.
+ */
+int pvfs2_file_release(struct inode *inode, struct file *file)
+{
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "pvfs2_file_release: called on %s\n",
+		     file->f_path.dentry->d_name.name);
+
+	pvfs2_flush_inode(inode);
+
+	/*
+	   remove all associated inode pages from the page cache and mmap
+	   readahead cache (if any); this forces an expensive refresh of
+	   data for the next caller of mmap (or 'get_block' accesses)
+	 */
+	if (file->f_path.dentry->d_inode &&
+	    file->f_path.dentry->d_inode->i_mapping &&
+	    mapping_nrpages(&file->f_path.dentry->d_inode->i_data))
+		truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping,
+				     0);
+	return 0;
+}
+
+/*
+ * Push all data for a specific file onto permanent storage.
+ */
+int pvfs2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+	int ret = -EINVAL;
+	struct pvfs2_inode_s *pvfs2_inode =
+		PVFS2_I(file->f_path.dentry->d_inode);
+	struct pvfs2_kernel_op *new_op = NULL;
+
+	/* required call */
+	filemap_write_and_wait_range(file->f_mapping, start, end);
+
+	new_op = op_alloc(PVFS2_VFS_OP_FSYNC);
+	if (!new_op)
+		return -ENOMEM;
+	new_op->upcall.req.fsync.refn = pvfs2_inode->refn;
+
+	ret = service_operation(new_op,
+			"pvfs2_fsync",
+			get_interruptible_flag(file->f_path.dentry->d_inode));
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "pvfs2_fsync got return value of %d\n",
+		     ret);
+
+	op_release(new_op);
+
+	pvfs2_flush_inode(file->f_path.dentry->d_inode);
+	return ret;
+}
+
+/*
+ * Change the file pointer position for an instance of an open file.
+ *
+ * \note If .llseek is overriden, we must acquire lock as described in
+ *       Documentation/filesystems/Locking.
+ *
+ * Future upgrade could support SEEK_DATA and SEEK_HOLE but would
+ * require much changes to the FS
+ */
+loff_t pvfs2_file_llseek(struct file *file, loff_t offset, int origin)
+{
+	int ret = -EINVAL;
+	struct inode *inode = file->f_path.dentry->d_inode;
+
+	if (!inode) {
+		gossip_err("pvfs2_file_llseek: invalid inode (NULL)\n");
+		return ret;
+	}
+
+	if (origin == PVFS2_SEEK_END) {
+		/*
+		 * revalidate the inode's file size.
+		 * NOTE: We are only interested in file size here,
+		 * so we set mask accordingly.
+		 */
+		ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_SIZE);
+		if (ret) {
+			gossip_debug(GOSSIP_FILE_DEBUG,
+				     "%s:%s:%d calling make bad inode\n",
+				     __FILE__,
+				     __func__,
+				     __LINE__);
+			pvfs2_make_bad_inode(inode);
+			return ret;
+		}
+	}
+
+	gossip_debug(GOSSIP_FILE_DEBUG,
+		     "pvfs2_file_llseek: offset is %ld | origin is %d | "
+		     "inode size is %lu\n",
+		     (long)offset,
+		     origin,
+		     (unsigned long)file->f_path.dentry->d_inode->i_size);
+
+	return generic_file_llseek(file, offset, origin);
+}
+
+int pvfs2_lock(struct file *f, int flags, struct file_lock *lock)
+{
+	return -ENOSYS;
+}
+
+/** PVFS2 implementation of VFS file operations */
+const struct file_operations pvfs2_file_operations = {
+	.llseek		= pvfs2_file_llseek,
+	.read		= new_sync_read,
+	.write		= new_sync_write,
+	.read_iter	= pvfs2_file_read_iter,
+	.write_iter	= pvfs2_file_write_iter,
+	.lock		= pvfs2_lock,
+	.unlocked_ioctl	= pvfs2_ioctl,
+	.mmap		= pvfs2_file_mmap,
+	.open		= generic_file_open,
+	.release	= pvfs2_file_release,
+	.fsync		= pvfs2_fsync,
+};
diff --git a/fs/orangefs/pvfs2-utils.c b/fs/orangefs/pvfs2-utils.c
new file mode 100644
index 0000000..42c5f3f
--- /dev/null
+++ b/fs/orangefs/pvfs2-utils.c
@@ -0,0 +1,914 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+#include "protocol.h"
+#include "pvfs2-kernel.h"
+#include "pvfs2-dev-proto.h"
+#include "pvfs2-bufmap.h"
+
+int32_t fsid_of_op(struct pvfs2_kernel_op *op)
+{
+	int32_t fsid = PVFS_FS_ID_NULL;
+	if (op) {
+		switch (op->upcall.type) {
+		case PVFS2_VFS_OP_FILE_IO:
+			fsid = op->upcall.req.io.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_LOOKUP:
+			fsid = op->upcall.req.lookup.parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_CREATE:
+			fsid = op->upcall.req.create.parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_GETATTR:
+			fsid = op->upcall.req.getattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_REMOVE:
+			fsid = op->upcall.req.remove.parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_MKDIR:
+			fsid = op->upcall.req.mkdir.parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_READDIR:
+			fsid = op->upcall.req.readdir.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_SETATTR:
+			fsid = op->upcall.req.setattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_SYMLINK:
+			fsid = op->upcall.req.sym.parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_RENAME:
+			fsid = op->upcall.req.rename.old_parent_refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_STATFS:
+			fsid = op->upcall.req.statfs.fs_id;
+			break;
+		case PVFS2_VFS_OP_TRUNCATE:
+			fsid = op->upcall.req.truncate.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_MMAP_RA_FLUSH:
+			fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_FS_UMOUNT:
+			fsid = op->upcall.req.fs_umount.fs_id;
+			break;
+		case PVFS2_VFS_OP_GETXATTR:
+			fsid = op->upcall.req.getxattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_SETXATTR:
+			fsid = op->upcall.req.setxattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_LISTXATTR:
+			fsid = op->upcall.req.listxattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_REMOVEXATTR:
+			fsid = op->upcall.req.removexattr.refn.fs_id;
+			break;
+		case PVFS2_VFS_OP_FSYNC:
+			fsid = op->upcall.req.fsync.refn.fs_id;
+			break;
+		default:
+			break;
+		}
+	}
+	return fsid;
+}
+
+static void pvfs2_set_inode_flags(struct inode *inode,
+				  struct PVFS_sys_attr_s *attrs)
+{
+	if (attrs->flags & PVFS_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+	else
+		inode->i_flags &= ~S_IMMUTABLE;
+
+	if (attrs->flags & PVFS_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	else
+		inode->i_flags &= ~S_APPEND;
+
+	if (attrs->flags & PVFS_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	else
+		inode->i_flags &= ~S_NOATIME;
+
+	return;
+}
+
+/* NOTE: symname is ignored unless the inode is a sym link */
+static int copy_attributes_to_inode(struct inode *inode,
+				    struct PVFS_sys_attr_s *attrs,
+				    char *symname)
+{
+	int ret = -1;
+	int perm_mode = 0;
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	loff_t inode_size = 0;
+	loff_t rounded_up_size = 0;
+
+
+	/*
+	   arbitrarily set the inode block size; FIXME: we need to
+	   resolve the difference between the reported inode blocksize
+	   and the PAGE_CACHE_SIZE, since our block count will always
+	   be wrong.
+
+	   For now, we're setting the block count to be the proper
+	   number assuming the block size is 512 bytes, and the size is
+	   rounded up to the nearest 4K.  This is apparently required
+	   to get proper size reports from the 'du' shell utility.
+
+	   changing the inode->i_blkbits to something other than
+	   PAGE_CACHE_SHIFT breaks mmap/execution as we depend on that.
+	 */
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "attrs->mask = %x (objtype = %s)\n",
+		     attrs->mask,
+		     attrs->objtype == PVFS_TYPE_METAFILE ? "file" :
+		     attrs->objtype == PVFS_TYPE_DIRECTORY ? "directory" :
+		     attrs->objtype == PVFS_TYPE_SYMLINK ? "symlink" :
+			"invalid/unknown");
+
+	switch (attrs->objtype) {
+	case PVFS_TYPE_METAFILE:
+		pvfs2_set_inode_flags(inode, attrs);
+		if (attrs->mask & PVFS_ATTR_SYS_SIZE) {
+			inode_size = (loff_t) attrs->size;
+			rounded_up_size =
+			    (inode_size + (4096 - (inode_size % 4096)));
+
+			pvfs2_lock_inode(inode);
+			inode->i_bytes = inode_size;
+			inode->i_blocks =
+			    (unsigned long)(rounded_up_size / 512);
+			pvfs2_unlock_inode(inode);
+
+			/*
+			 * NOTE: make sure all the places we're called
+			 * from have the inode->i_sem lock. We're fine
+			 * in 99% of the cases since we're mostly
+			 * called from a lookup.
+			 */
+			inode->i_size = inode_size;
+		}
+		break;
+	case PVFS_TYPE_SYMLINK:
+		if (symname != NULL) {
+			inode->i_size = (loff_t) strlen(symname);
+			break;
+		}
+		/*FALLTHRU*/
+	default:
+		pvfs2_lock_inode(inode);
+		inode->i_bytes = PAGE_CACHE_SIZE;
+		inode->i_blocks = (unsigned long)(PAGE_CACHE_SIZE / 512);
+		pvfs2_unlock_inode(inode);
+
+		inode->i_size = PAGE_CACHE_SIZE;
+		break;
+	}
+
+	inode->i_uid = make_kuid(&init_user_ns, attrs->owner);
+	inode->i_gid = make_kgid(&init_user_ns, attrs->group);
+	inode->i_atime.tv_sec = (time_t) attrs->atime;
+	inode->i_mtime.tv_sec = (time_t) attrs->mtime;
+	inode->i_ctime.tv_sec = (time_t) attrs->ctime;
+	inode->i_atime.tv_nsec = 0;
+	inode->i_mtime.tv_nsec = 0;
+	inode->i_ctime.tv_nsec = 0;
+
+	if (attrs->perms & PVFS_O_EXECUTE)
+		perm_mode |= S_IXOTH;
+	if (attrs->perms & PVFS_O_WRITE)
+		perm_mode |= S_IWOTH;
+	if (attrs->perms & PVFS_O_READ)
+		perm_mode |= S_IROTH;
+
+	if (attrs->perms & PVFS_G_EXECUTE)
+		perm_mode |= S_IXGRP;
+	if (attrs->perms & PVFS_G_WRITE)
+		perm_mode |= S_IWGRP;
+	if (attrs->perms & PVFS_G_READ)
+		perm_mode |= S_IRGRP;
+
+	if (attrs->perms & PVFS_U_EXECUTE)
+		perm_mode |= S_IXUSR;
+	if (attrs->perms & PVFS_U_WRITE)
+		perm_mode |= S_IWUSR;
+	if (attrs->perms & PVFS_U_READ)
+		perm_mode |= S_IRUSR;
+
+	if (attrs->perms & PVFS_G_SGID)
+		perm_mode |= S_ISGID;
+	if (attrs->perms & PVFS_U_SUID)
+		perm_mode |= S_ISUID;
+
+	inode->i_mode = perm_mode;
+
+	if (is_root_handle(inode)) {
+		/* special case: mark the root inode as sticky */
+		inode->i_mode |= S_ISVTX;
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "Marking inode %pU as sticky\n",
+			     get_khandle_from_ino(inode));
+	}
+
+	switch (attrs->objtype) {
+	case PVFS_TYPE_METAFILE:
+		inode->i_mode |= S_IFREG;
+		ret = 0;
+		break;
+	case PVFS_TYPE_DIRECTORY:
+		inode->i_mode |= S_IFDIR;
+		/* NOTE: we have no good way to keep nlink consistent
+		 * for directories across clients; keep constant at 1.
+		 * Why 1?  If we go with 2, then find(1) gets confused
+		 * and won't work properly withouth the -noleaf option
+		 */
+		set_nlink(inode, 1);
+		ret = 0;
+		break;
+	case PVFS_TYPE_SYMLINK:
+		inode->i_mode |= S_IFLNK;
+
+		/* copy link target to inode private data */
+		if (pvfs2_inode && symname) {
+			strncpy(pvfs2_inode->link_target,
+				symname,
+				PVFS_NAME_MAX);
+			gossip_debug(GOSSIP_UTILS_DEBUG,
+				     "Copied attr link target %s\n",
+				     pvfs2_inode->link_target);
+		}
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "symlink mode %o\n",
+			     inode->i_mode);
+		ret = 0;
+		break;
+	default:
+		gossip_err("pvfs2: copy_attributes_to_inode: got invalid attribute type %x\n",
+			attrs->objtype);
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2: copy_attributes_to_inode: setting i_mode to %o, i_size to %lu\n",
+		     inode->i_mode,
+		     (unsigned long)i_size_read(inode));
+
+	return ret;
+}
+
+/*
+ * NOTE: in kernel land, we never use the sys_attr->link_target for
+ * anything, so don't bother copying it into the sys_attr object here.
+ */
+static inline int copy_attributes_from_inode(struct inode *inode,
+					     struct PVFS_sys_attr_s *attrs,
+					     struct iattr *iattr)
+{
+	umode_t tmp_mode;
+
+	if (!iattr || !inode || !attrs) {
+		gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
+			   "in copy_attributes_from_inode!\n",
+			   iattr,
+			   inode,
+			   attrs);
+		return -EINVAL;
+	}
+	/*
+	 * We need to be careful to only copy the attributes out of the
+	 * iattr object that we know are valid.
+	 */
+	attrs->mask = 0;
+	if (iattr->ia_valid & ATTR_UID) {
+		attrs->owner = from_kuid(current_user_ns(), iattr->ia_uid);
+		attrs->mask |= PVFS_ATTR_SYS_UID;
+		gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
+	}
+	if (iattr->ia_valid & ATTR_GID) {
+		attrs->group = from_kgid(current_user_ns(), iattr->ia_gid);
+		attrs->mask |= PVFS_ATTR_SYS_GID;
+		gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
+	}
+
+	if (iattr->ia_valid & ATTR_ATIME) {
+		attrs->mask |= PVFS_ATTR_SYS_ATIME;
+		if (iattr->ia_valid & ATTR_ATIME_SET) {
+			attrs->atime =
+			    pvfs2_convert_time_field((void *)&iattr->ia_atime);
+			attrs->mask |= PVFS_ATTR_SYS_ATIME_SET;
+		}
+	}
+	if (iattr->ia_valid & ATTR_MTIME) {
+		attrs->mask |= PVFS_ATTR_SYS_MTIME;
+		if (iattr->ia_valid & ATTR_MTIME_SET) {
+			attrs->mtime =
+			    pvfs2_convert_time_field((void *)&iattr->ia_mtime);
+			attrs->mask |= PVFS_ATTR_SYS_MTIME_SET;
+		}
+	}
+	if (iattr->ia_valid & ATTR_CTIME)
+		attrs->mask |= PVFS_ATTR_SYS_CTIME;
+
+	/*
+	 * PVFS2 cannot set size with a setattr operation.  Probably not likely
+	 * to be requested through the VFS, but just in case, don't worry about
+	 * ATTR_SIZE
+	 */
+
+	if (iattr->ia_valid & ATTR_MODE) {
+		tmp_mode = iattr->ia_mode;
+		if (tmp_mode & (S_ISVTX)) {
+			if (is_root_handle(inode)) {
+				/*
+				 * allow sticky bit to be set on root (since
+				 * it shows up that way by default anyhow),
+				 * but don't show it to the server
+				 */
+				tmp_mode -= S_ISVTX;
+			} else {
+				gossip_debug(GOSSIP_UTILS_DEBUG,
+					     "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
+				return -EINVAL;
+			}
+		}
+
+		if (tmp_mode & (S_ISUID)) {
+			gossip_debug(GOSSIP_UTILS_DEBUG,
+				     "Attempting to set setuid bit (not supported); returning EINVAL.\n");
+			return -EINVAL;
+		}
+
+		attrs->perms = PVFS_util_translate_mode(tmp_mode);
+		attrs->mask |= PVFS_ATTR_SYS_PERM;
+	}
+
+	return 0;
+}
+
+/*
+ * issues a pvfs2 getattr request and fills in the appropriate inode
+ * attributes if successful.  returns 0 on success; -errno otherwise
+ */
+int pvfs2_inode_getattr(struct inode *inode, uint32_t getattr_mask)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_kernel_op *new_op;
+	int ret = -EINVAL;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "%s: called on inode %pU\n",
+		     __func__,
+		     get_khandle_from_ino(inode));
+
+	new_op = op_alloc(PVFS2_VFS_OP_GETATTR);
+	if (!new_op)
+		return -ENOMEM;
+	new_op->upcall.req.getattr.refn = pvfs2_inode->refn;
+	new_op->upcall.req.getattr.mask = getattr_mask;
+
+	ret = service_operation(new_op, __func__,
+				get_interruptible_flag(inode));
+	if (ret != 0)
+		goto out;
+
+	if (copy_attributes_to_inode(inode,
+			&new_op->downcall.resp.getattr.attributes,
+			new_op->downcall.resp.getattr.link_target)) {
+		gossip_err("%s: failed to copy attributes\n", __func__);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	/*
+	 * Store blksize in pvfs2 specific part of inode structure; we are
+	 * only going to use this to report to stat to make sure it doesn't
+	 * perturb any inode related code paths.
+	 */
+	if (new_op->downcall.resp.getattr.attributes.objtype ==
+			PVFS_TYPE_METAFILE) {
+		pvfs2_inode->blksize =
+			new_op->downcall.resp.getattr.attributes.blksize;
+	} else {
+		/* mimic behavior of generic_fillattr() for other types. */
+		pvfs2_inode->blksize = (1 << inode->i_blkbits);
+
+	}
+
+out:
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Getattr on handle %pU, "
+		     "fsid %d\n  (inode ct = %d) returned %d\n",
+		     &pvfs2_inode->refn.khandle,
+		     pvfs2_inode->refn.fs_id,
+		     (int)atomic_read(&inode->i_count),
+		     ret);
+
+	op_release(new_op);
+	return ret;
+}
+
+/*
+ * issues a pvfs2 setattr request to make sure the new attribute values
+ * take effect if successful.  returns 0 on success; -errno otherwise
+ */
+int pvfs2_inode_setattr(struct inode *inode, struct iattr *iattr)
+{
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+	struct pvfs2_kernel_op *new_op;
+	int ret;
+
+	new_op = op_alloc(PVFS2_VFS_OP_SETATTR);
+	if (!new_op)
+		return -ENOMEM;
+
+	new_op->upcall.req.setattr.refn = pvfs2_inode->refn;
+	ret = copy_attributes_from_inode(inode,
+		       &new_op->upcall.req.setattr.attributes,
+		       iattr);
+	if (ret < 0) {
+		op_release(new_op);
+		return ret;
+	}
+
+	ret = service_operation(new_op, __func__,
+				get_interruptible_flag(inode));
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_inode_setattr: returning %d\n",
+		     ret);
+
+	/* when request is serviced properly, free req op struct */
+	op_release(new_op);
+
+	/*
+	 * successful setattr should clear the atime, mtime and
+	 * ctime flags.
+	 */
+	if (ret == 0) {
+		ClearAtimeFlag(pvfs2_inode);
+		ClearMtimeFlag(pvfs2_inode);
+		ClearCtimeFlag(pvfs2_inode);
+		ClearModeFlag(pvfs2_inode);
+	}
+
+	return ret;
+}
+
+int pvfs2_flush_inode(struct inode *inode)
+{
+	/*
+	 * If it is a dirty inode, this function gets called.
+	 * Gather all the information that needs to be setattr'ed
+	 * Right now, this will only be used for mode, atime, mtime
+	 * and/or ctime.
+	 */
+	struct iattr wbattr;
+	int ret;
+	int mtime_flag;
+	int ctime_flag;
+	int atime_flag;
+	int mode_flag;
+	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
+
+	memset(&wbattr, 0, sizeof(wbattr));
+
+	/*
+	 * check inode flags up front, and clear them if they are set.  This
+	 * will prevent multiple processes from all trying to flush the same
+	 * inode if they call close() simultaneously
+	 */
+	mtime_flag = MtimeFlag(pvfs2_inode);
+	ClearMtimeFlag(pvfs2_inode);
+	ctime_flag = CtimeFlag(pvfs2_inode);
+	ClearCtimeFlag(pvfs2_inode);
+	atime_flag = AtimeFlag(pvfs2_inode);
+	ClearAtimeFlag(pvfs2_inode);
+	mode_flag = ModeFlag(pvfs2_inode);
+	ClearModeFlag(pvfs2_inode);
+
+	/*  -- Lazy atime,mtime and ctime update --
+	 * Note: all times are dictated by server in the new scheme
+	 * and not by the clients
+	 *
+	 * Also mode updates are being handled now..
+	 */
+
+	if (mtime_flag)
+		wbattr.ia_valid |= ATTR_MTIME;
+	if (ctime_flag)
+		wbattr.ia_valid |= ATTR_CTIME;
+	if (atime_flag)
+		wbattr.ia_valid |= ATTR_ATIME;
+
+	if (mode_flag) {
+		wbattr.ia_mode = inode->i_mode;
+		wbattr.ia_valid |= ATTR_MODE;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "*********** pvfs2_flush_inode: %pU "
+		     "(ia_valid %d)\n",
+		     get_khandle_from_ino(inode),
+		     wbattr.ia_valid);
+	if (wbattr.ia_valid == 0) {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "pvfs2_flush_inode skipping setattr()\n");
+		return 0;
+	}
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_flush_inode (%pU) writing mode %o\n",
+		     get_khandle_from_ino(inode),
+		     inode->i_mode);
+
+	ret = pvfs2_inode_setattr(inode, &wbattr);
+
+	return ret;
+}
+
+int pvfs2_unmount_sb(struct super_block *sb)
+{
+	int ret = -EINVAL;
+	struct pvfs2_kernel_op *new_op = NULL;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_unmount_sb called on sb %p\n",
+		     sb);
+
+	new_op = op_alloc(PVFS2_VFS_OP_FS_UMOUNT);
+	if (!new_op)
+		return -ENOMEM;
+	new_op->upcall.req.fs_umount.id = PVFS2_SB(sb)->id;
+	new_op->upcall.req.fs_umount.fs_id = PVFS2_SB(sb)->fs_id;
+	strncpy(new_op->upcall.req.fs_umount.pvfs2_config_server,
+		PVFS2_SB(sb)->devname,
+		PVFS_MAX_SERVER_ADDR_LEN);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Attempting PVFS2 Unmount via host %s\n",
+		     new_op->upcall.req.fs_umount.pvfs2_config_server);
+
+	ret = service_operation(new_op, "pvfs2_fs_umount", 0);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_unmount: got return value of %d\n", ret);
+	if (ret)
+		sb = ERR_PTR(ret);
+	else
+		PVFS2_SB(sb)->mount_pending = 1;
+
+	op_release(new_op);
+	return ret;
+}
+
+/*
+ * NOTE: on successful cancellation, be sure to return -EINTR, as
+ * that's the return value the caller expects
+ */
+int pvfs2_cancel_op_in_progress(uint64_t tag)
+{
+	int ret = -EINVAL;
+	struct pvfs2_kernel_op *new_op = NULL;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_cancel_op_in_progress called on tag %llu\n",
+		     llu(tag));
+
+	new_op = op_alloc(PVFS2_VFS_OP_CANCEL);
+	if (!new_op)
+		return -ENOMEM;
+	new_op->upcall.req.cancel.op_tag = tag;
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "Attempting PVFS2 operation cancellation of tag %llu\n",
+		     llu(new_op->upcall.req.cancel.op_tag));
+
+	ret = service_operation(new_op, "pvfs2_cancel", PVFS2_OP_CANCELLATION);
+
+	gossip_debug(GOSSIP_UTILS_DEBUG,
+		     "pvfs2_cancel_op_in_progress: got return value of %d\n",
+		     ret);
+
+	op_release(new_op);
+	return ret;
+}
+
+void pvfs2_op_initialize(struct pvfs2_kernel_op *op)
+{
+	if (op) {
+		spin_lock(&op->lock);
+		op->io_completed = 0;
+
+		op->upcall.type = PVFS2_VFS_OP_INVALID;
+		op->downcall.type = PVFS2_VFS_OP_INVALID;
+		op->downcall.status = -1;
+
+		op->op_state = OP_VFS_STATE_UNKNOWN;
+		op->tag = 0;
+		spin_unlock(&op->lock);
+	}
+}
+
+void pvfs2_make_bad_inode(struct inode *inode)
+{
+	if (is_root_handle(inode)) {
+		/*
+		 * if this occurs, the pvfs2-client-core was killed but we
+		 * can't afford to lose the inode operations and such
+		 * associated with the root handle in any case.
+		 */
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "*** NOT making bad root inode %pU\n",
+			     get_khandle_from_ino(inode));
+	} else {
+		gossip_debug(GOSSIP_UTILS_DEBUG,
+			     "*** making bad inode %pU\n",
+			     get_khandle_from_ino(inode));
+		make_bad_inode(inode);
+	}
+}
+
+/* this code is based on linux/net/sunrpc/clnt.c:rpc_clnt_sigmask */
+void mask_blocked_signals(sigset_t *orig_sigset)
+{
+	unsigned long sigallow = sigmask(SIGKILL);
+	unsigned long irqflags = 0;
+	struct k_sigaction *action = pvfs2_current_sigaction;
+
+	sigallow |= ((action[SIGINT - 1].sa.sa_handler == SIG_DFL) ?
+		     sigmask(SIGINT) :
+		     0);
+	sigallow |= ((action[SIGQUIT - 1].sa.sa_handler == SIG_DFL) ?
+		     sigmask(SIGQUIT) :
+		     0);
+
+	spin_lock_irqsave(&pvfs2_current_signal_lock, irqflags);
+	*orig_sigset = current->blocked;
+	siginitsetinv(&current->blocked, sigallow & ~orig_sigset->sig[0]);
+	recalc_sigpending();
+	spin_unlock_irqrestore(&pvfs2_current_signal_lock, irqflags);
+}
+
+/* this code is based on linux/net/sunrpc/clnt.c:rpc_clnt_sigunmask */
+void unmask_blocked_signals(sigset_t *orig_sigset)
+{
+	unsigned long irqflags = 0;
+
+	spin_lock_irqsave(&pvfs2_current_signal_lock, irqflags);
+	current->blocked = *orig_sigset;
+	recalc_sigpending();
+	spin_unlock_irqrestore(&pvfs2_current_signal_lock, irqflags);
+}
+
+uint64_t pvfs2_convert_time_field(void *time_ptr)
+{
+	uint64_t pvfs2_time;
+	struct timespec *tspec = (struct timespec *)time_ptr;
+	pvfs2_time = (uint64_t) ((time_t) tspec->tv_sec);
+	return pvfs2_time;
+}
+
+/* macro defined in include/pvfs2-types.h */
+DECLARE_ERRNO_MAPPING_AND_FN();
+
+int pvfs2_normalize_to_errno(int32_t error_code)
+{
+	if (error_code > 0) {
+		gossip_err("pvfs2: error status receieved.\n");
+		gossip_err("pvfs2: assuming error code is inverted.\n");
+		error_code = -error_code;
+	}
+
+	/* convert any error codes that are in pvfs2 format */
+	if (IS_PVFS_NON_ERRNO_ERROR(-error_code)) {
+		if (PVFS_NON_ERRNO_ERROR_CODE(-error_code) == PVFS_ECANCEL) {
+			/*
+			 * cancellation error codes generally correspond to
+			 * a timeout from the client's perspective
+			 */
+			error_code = -ETIMEDOUT;
+		} else {
+			/* assume a default error code */
+			gossip_err("pvfs2: warning: got error code without errno equivalent: %d.\n",
+				   error_code);
+			error_code = -EINVAL;
+		}
+	} else if (IS_PVFS_ERROR(-error_code)) {
+		error_code = -PVFS_ERROR_TO_ERRNO(-error_code);
+	}
+	return error_code;
+}
+
+#define NUM_MODES 11
+int32_t PVFS_util_translate_mode(int mode)
+{
+	int ret = 0;
+	int i = 0;
+	static int modes[NUM_MODES] = {
+		S_IXOTH, S_IWOTH, S_IROTH,
+		S_IXGRP, S_IWGRP, S_IRGRP,
+		S_IXUSR, S_IWUSR, S_IRUSR,
+		S_ISGID, S_ISUID
+	};
+	static int pvfs2_modes[NUM_MODES] = {
+		PVFS_O_EXECUTE, PVFS_O_WRITE, PVFS_O_READ,
+		PVFS_G_EXECUTE, PVFS_G_WRITE, PVFS_G_READ,
+		PVFS_U_EXECUTE, PVFS_U_WRITE, PVFS_U_READ,
+		PVFS_G_SGID, PVFS_U_SUID
+	};
+
+	for (i = 0; i < NUM_MODES; i++)
+		if (mode & modes[i])
+			ret |= pvfs2_modes[i];
+
+	return ret;
+}
+#undef NUM_MODES
+
+static char *pvfs2_strtok(char *s, const char *toks)
+{
+	/* original string */
+	static char *in_string_p;
+	/* starting value of in_string_p during this iteration. */
+	char *this_string_p;
+	/* # of tokens */
+	uint32_t toks_len = strlen(toks);
+	/* index */
+	uint32_t i;
+
+	/* when s has a value, we are using a new input string */
+	if (s)
+		in_string_p = s;
+
+	/* set new starting position */
+	this_string_p = in_string_p;
+
+	/*
+	 * loop through the string until a token or end-of-string(null)
+	 * is found.
+	 */
+	for (; *in_string_p; in_string_p++)
+		/* Is character a token? */
+		for (i = 0; i < toks_len; i++)
+			if (*in_string_p == toks[i]) {
+				/*token found => end-of-word */
+				*in_string_p = 0;
+				in_string_p++;
+				return this_string_p;
+			}
+
+	if (*this_string_p == 0)
+		return NULL;
+
+	return this_string_p;
+}
+
+/*convert 64-bit debug mask into a readable string of keywords*/
+static int proc_mask_to_debug(struct __keyword_mask_t *mask_map,
+			      int num_mask_map,
+			      uint64_t mask,
+			      char *debug_string)
+{
+	unsigned int index = 0;
+	unsigned int i;
+
+	memset(debug_string, 0, PVFS2_MAX_DEBUG_STRING_LEN);
+
+	for (i = 0; i < num_mask_map; i++) {
+		if ((index + strlen(mask_map[i].keyword)) >=
+		    PVFS2_MAX_DEBUG_STRING_LEN)
+			return 0;
+
+		switch (mask_map[i].mask_val) {
+		case GOSSIP_NO_DEBUG:
+			if (mask == GOSSIP_NO_DEBUG) {
+				strcpy(debug_string, mask_map[i].keyword);
+				return 0;
+			}
+			break;
+		case GOSSIP_MAX_DEBUG:
+			if (mask == GOSSIP_MAX_DEBUG) {
+				strcpy(debug_string, mask_map[i].keyword);
+				return 0;
+			}
+			break;
+		default:
+			if ((mask & mask_map[i].mask_val) !=
+			    mask_map[i].mask_val)
+				/*mask does NOT contain the mask value */
+				break;
+
+			if (index != 0) {
+				/*
+				 * add comma for second and subsequent mask
+				 * keywords
+				 */
+				(debug_string[index]) = ',';
+				index++;
+			}
+
+			/*add keyword and slide index */
+			memcpy(&debug_string[index],
+			       mask_map[i].keyword,
+			       strlen(mask_map[i].keyword));
+			index += strlen(mask_map[i].keyword);
+		}
+	}
+
+	return 0;
+}
+
+static uint64_t proc_debug_to_mask(struct __keyword_mask_t *mask_map,
+				   int num_mask_map,
+				   const char *event_logging)
+{
+	uint64_t mask = 0;
+	char *s = NULL;
+	char *t = NULL;
+	const char *toks = ", ";
+	int i = 0;
+	int negate = 0;
+	int slen = 0;
+
+	if (event_logging) {
+		/* s = strdup(event_logging); */
+		slen = strlen(event_logging);
+		s = kmalloc(slen + 1, GFP_KERNEL);
+		if (!s)
+			return -ENOMEM;
+		memset(s, 0, slen + 1);
+		memcpy(s, event_logging, slen);
+
+		/* t = strtok(s, toks); */
+		t = pvfs2_strtok(s, toks);
+
+		while (t) {
+			if (*t == '-') {
+				negate = 1;
+				++t;
+			}
+
+			for (i = 0; i < num_mask_map; i++) {
+				if (!strcmp(t, mask_map[i].keyword)) {
+
+					if (negate)
+						mask &= ~mask_map[i].mask_val;
+					else
+						mask |= mask_map[i].mask_val;
+
+					break;
+				}
+			}
+			/* t = strtok(NULL, toks); */
+			t = pvfs2_strtok(NULL, toks);
+		}
+		kfree(s);
+	}
+	return mask;
+}
+
+/*
+ * Based on human readable keywords, translate them into
+ * a mask value appropriate for the debugging level desired.
+ * The 'computed' mask is returned; 0 if no keywords are
+ * present or recognized.  Unrecognized keywords are ignored when
+ * mixed with recognized keywords.
+ *
+ * Prefix a keyword with "-" to turn it off.  All keywords
+ * processed in specified order.
+ */
+uint64_t PVFS_proc_debug_eventlog_to_mask(const char *event_logging)
+{
+	return proc_debug_to_mask(s_keyword_mask_map,
+				  num_keyword_mask_map,
+				  event_logging);
+}
+
+uint64_t PVFS_proc_kmod_eventlog_to_mask(const char *event_logging)
+{
+	return proc_debug_to_mask(s_kmod_keyword_mask_map,
+				  num_kmod_keyword_mask_map,
+				  event_logging);
+}
+
+int PVFS_proc_kmod_mask_to_eventlog(uint64_t mask, char *debug_string)
+{
+	return proc_mask_to_debug(s_kmod_keyword_mask_map,
+				  num_kmod_keyword_mask_map,
+				  mask,
+				  debug_string);
+}
+
+int PVFS_proc_mask_to_eventlog(uint64_t mask, char *debug_string)
+{
+
+	return proc_mask_to_debug(s_keyword_mask_map,
+				  num_keyword_mask_map,
+				  mask,
+				  debug_string);
+}
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 4/4] Orangefs: wiring and misc
  2014-12-31 20:53 [PATCH 4/4] Orangefs: wiring and misc hubcap
@ 2015-01-01 17:45 ` Randy Dunlap
  2015-01-05 19:23 ` [PATCH] Orangefs: Don't compile orangefs by default Mike Marshall
  1 sibling, 0 replies; 3+ messages in thread
From: Randy Dunlap @ 2015-01-01 17:45 UTC (permalink / raw)
  To: hubcap, viro; +Cc: linux-fsdevel

On 12/31/14 12:53, hubcap wrote:
> diff --git a/fs/orangefs/Kconfig b/fs/orangefs/Kconfig
> new file mode 100644
> index 0000000..b600e08
> --- /dev/null
> +++ b/fs/orangefs/Kconfig
> @@ -0,0 +1,7 @@
> +config ORANGEFS_FS
> +	tristate "ORANGEFS (Powered by PVFS) support"
> +	select FS_POSIX_ACL
> +	default y
> +	help
> +	   Orange is a parallel file system designed for use on high end
> +	   computing (HEC) systems.

Hi,

You need to drop the "default y".
That's not acceptable in the mainline kernel.


-- 
~Randy

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] Orangefs: Don't compile orangefs by default.
  2014-12-31 20:53 [PATCH 4/4] Orangefs: wiring and misc hubcap
  2015-01-01 17:45 ` Randy Dunlap
@ 2015-01-05 19:23 ` Mike Marshall
  1 sibling, 0 replies; 3+ messages in thread
From: Mike Marshall @ 2015-01-05 19:23 UTC (permalink / raw)
  To: viro; +Cc: Mike Marshall, linux-fsdevel

Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/orangefs/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/orangefs/Kconfig b/fs/orangefs/Kconfig
index 6a8fb11..1554c02 100644
--- a/fs/orangefs/Kconfig
+++ b/fs/orangefs/Kconfig
@@ -1,7 +1,6 @@
 config ORANGEFS_FS
 	tristate "ORANGEFS (Powered by PVFS) support"
 	select FS_POSIX_ACL
-	default y
 	help
 	   Orange is a parallel file system designed for use on high end
 	   computing (HEC) systems.
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-01-05 19:23 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-31 20:53 [PATCH 4/4] Orangefs: wiring and misc hubcap
2015-01-01 17:45 ` Randy Dunlap
2015-01-05 19:23 ` [PATCH] Orangefs: Don't compile orangefs by default Mike Marshall

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.